Published on

The 17th session in eglug's system programming course, I only attend the session, the slides are not written by me

Published in: Technology, Spiritual
1 Like
  • Be the first to comment

No Downloads
Total views
On SlideShare
From Embeds
Number of Embeds
Embeds 0
No embeds

No notes for slide


  1. 1. C/C++ Linux System Programming <ul><ul><li>Session 17 </li></ul></ul><ul><ul><li>User-space System Programming </li></ul></ul><ul><ul><li> – session 7 </li></ul></ul>
  2. 2. Outline <ul><li>Device File I/O ops </li></ul><ul><li>Networking Concepts </li></ul><ul><li>Socket Concepts and Ops </li></ul><ul><li>Sockets for IPC </li></ul>
  3. 3. DEVICES <ul><li>Major and minor numbers </li></ul><ul><ul><li>int mknod(const char *pathname, mode_t mode, dev_t dev); </li></ul></ul><ul><li>UDEV </li></ul><ul><ul><li>FS </li></ul></ul><ul><ul><li>Events and rules </li></ul></ul>
  4. 4. I/O Multiplexing <ul><li>int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); </li></ul><ul><ul><li>void FD_CLR(int fd, fd_set *set); </li></ul></ul><ul><ul><li>int FD_ISSET(int fd, fd_set *set); </li></ul></ul><ul><ul><li>void FD_SET(int fd, fd_set *set); </li></ul></ul><ul><ul><li>void FD_ZERO(fd_set *set); </li></ul></ul><ul><li>int pselect(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask); </li></ul><ul><li>int poll(struct pollfd *fds, nfds_t nfds, int timeout); </li></ul><ul><li>int ppoll(struct pollfd *fds, nfds_vt nfds, const struct timespec *timeout, const sigset_t *sigmask); </li></ul><ul><ul><li>POLLIN/POLLOUT/POLLPRI/POLLERR </li></ul></ul>
  5. 5. Epoll <ul><li>Decouple interest set registration from poll </li></ul><ul><ul><li>+: O(1) on the wait </li></ul></ul><ul><ul><li>+: Edge trigger </li></ul></ul><ul><ul><li>- : system call for adding onto the set </li></ul></ul><ul><li>int epoll_create(int size); //desc, need close </li></ul><ul><li>int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); </li></ul><ul><li>int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout); </li></ul>typedef union epoll_data { void *ptr; int fd; uint32_t u32; uint64_t u64; } epoll_data_t; struct epoll_event { uint32_t events; /* Epoll events */ epoll_data_t data; /* User data variable */ };
  6. 6. IOCTL <ul><li>Device / special file control </li></ul><ul><li>int ioctl(int d, int request, ...); </li></ul><ul><li>Request is specific to device being controlled, and may have a payload (ioctl_list) </li></ul>
  7. 7. Filesystem events <ul><li>int inotify_init(void); // desc, need close </li></ul><ul><li>int inotify_add_watch(int fd, const char *pathname, uint32_t mask); // watch desc </li></ul><ul><li>int inotify_rm_watch(int fd, uint32_t wd); </li></ul><ul><li>FIONREAD ioctl </li></ul><ul><li>fcntl: F_NOTIFY </li></ul>struct inotify_event { int wd; /* watch descriptor */ uint32_t mask; /* mask of events */ uint32_t cookie; /* unique cookie */ uint32_t len; /* size of 'name' field */ char name[]; /* null-terminated name */ };
  8. 8. int inotifyd_main(int argc UNUSED_PARAM, char **argv) { unsigned mask = IN_ALL_EVENTS; // assume we want all events struct pollfd pfd; char **watched = ++argv; // watched name list const char *args[] = { *argv, NULL, NULL, NULL, NULL }; // open inotify pfd.fd = inotify_init(); if (pfd.fd < 0) bb_perror_msg_and_die(&quot;no kernel support&quot;); // setup watched while (*++argv) { char *path = *argv; char *masks = strchr(path, ':'); int wd; // watch descriptor // if mask is specified -> if (masks) { *masks = ''; // split path and mask // convert mask names to mask bitset mask = 0; while (*++masks) { int i = strchr(mask_names, *masks) - mask_names; if (i >= 0) { mask |= (1 << i); } } } // add watch wd = inotify_add_watch(pfd.fd, path, mask); if (wd < 0) { bb_perror_msg_and_die(&quot;add watch (%s) failed&quot;, path); } } static const char mask_names[] ALIGN1 = &quot;a&quot; // 0x00000001 File was accessed &quot;c&quot; // 0x00000002 File was modified &quot;e&quot; // 0x00000004 Metadata changed &quot;w&quot; // 0x00000008 Writtable file was closed &quot;0&quot; // 0x00000010 Unwrittable file closed &quot;r&quot; // 0x00000020 File was opened &quot;m&quot; // 0x00000040 File was moved from X &quot;y&quot; // 0x00000080 File was moved to Y &quot;n&quot; // 0x00000100 Subfile was created &quot;d&quot; // 0x00000200 Subfile was deleted &quot;D&quot; // 0x00000400 Self was deleted &quot;M&quot; // 0x00000800 Self was moved ; = POLLIN; while (!signalled && poll(&pfd, 1, -1) > 0) { ssize_t len; void *buf; struct inotify_event *ie; // read out all pending events xioctl(pfd.fd, FIONREAD, &len); #define eventbuf bb_common_bufsiz1 ie = buf = (len <= sizeof(eventbuf)) ? eventbuf : xmalloc(len); len = full_read(pfd.fd, buf, len); // process events. N.B. events may vary in length while (len > 0) { int i; char events[12]; char *s = events; unsigned m = ie->mask; for (i = 0; i < 12; ++i, m >>= 1) { if (m & 1) { *s++ = mask_names[i]; } } *s = ''; args[1] = events; args[2] = watched[ie->wd]; args[3] = ie->len ? ie->name : NULL; xspawn((char **)args); // next event i = sizeof(struct inotify_event) + ie->len; len -= i; ie = (void*)((char*)ie + i); } if (eventbuf != buf) free(buf); } return EXIT_SUCCESS; }
  9. 9. Asynchronous I/O <ul><li>Only on O_DIRECT </li></ul>struct aiocb { int aio_filedes; /* file descriptor * int aio_lio_opcode; /* operation to perform */ int aio_reqprio; /* request priority offset * volatile void *aio_buf; /* pointer to buffer */ size_t aio_nbytes; /* length of operation */ struct sigevent aio_sigevent; /* signal number and value */ /* internal, private members follow... */ }; int aio_read (struct aiocb *aiocbp); int aio_write (struct aiocb *aiocbp); int aio_error (const struct aiocb *aiocbp); int aio_return (struct aiocb *aiocbp); int aio_cancel (int fd, struct aiocb *aiocbp); int aio_fsync (int op, struct aiocb *aiocbp); int aio_suspend (const struct aiocb * const cblist[], int n, const struct timespec *timeout);
  10. 10. Network Architecture Application – telnet/ftp/http...etc Presentation -- intended for e.g. encryption Session -- e.g. iSCSI Transport – PORTS Network – IP, ATM Link -- Physical – Ethernet, wifi... <ul><li>OSI </li></ul><ul><li>Packets and Data Encapsulation </li></ul><ul><li>Protocols can be stacked on top of that </li></ul><ul><ul><li>e.g. CIM over HTTP </li></ul></ul>------------------------------------------------------------- | Eth | IP | TCP | App | DDDDAAAATTTTAAAA | -------------------------------------------------------------
  11. 11. Focus <ul><li>Link is handled by HW and drivers </li></ul><ul><li>Network: IP, handled by kernel, affects addressing and byte ordering </li></ul><ul><li>Transport layer </li></ul><ul><ul><li>TCP – Reliable, sequenced, Connection-oriented </li></ul></ul><ul><ul><li>UDP – Unreliable, unsequenced, connectionless </li></ul></ul><ul><ul><li>Handled by kernel which provides us an interface </li></ul></ul><ul><li>Application is what you are writing </li></ul>
  12. 12. Network Layer Concerns <ul><li>Byte ordering </li></ul><ul><ul><li>Network byte order vs Host byte order </li></ul></ul><ul><li>Addressing </li></ul><ul><ul><li>IPV4: 4 octets xx.xx.xx.xx (32 bits) </li></ul></ul><ul><ul><li>IPV6: 8 16-bit hex digits separated by : (128 bits) </li></ul></ul><ul><ul><ul><li>Ipv4 compatibility </li></ul></ul></ul><ul><ul><ul><li>Scopes </li></ul></ul></ul><ul><ul><li>Subnets </li></ul></ul><ul><ul><li>Unicasting/Broadcasting (v4) /Multicasting (v6) /Anycasting (v6) </li></ul></ul><ul><ul><li>Ports </li></ul></ul><ul><ul><li>Loopback </li></ul></ul>
  13. 13. Network Byte Order <ul><li>uint32_t htonl(uint32_t hostlong); </li></ul><ul><li>uint16_t htons(uint16_t hostshort); </li></ul><ul><li>uint32_t ntohl(uint32_t netlong); </li></ul><ul><li>uint16_t ntohs(uint16_t netshort); </li></ul><ul><li>What about everything else? </li></ul><ul><ul><li>Agreement: the higher level protocol </li></ul></ul><ul><ul><li>Abstraction layers for cross-platform calls (e.g. RPC, RMI): (un)marshalling </li></ul></ul>
  14. 14. IP Address Casting struct sockaddr { sa_family_t sa_family; char sa_data[14]; } struct sockaddr_in { sa_family_t sin_family; /* AF_INET */ uint16_t sin_port; /* port */ struct in_addr sin_addr; }; struct in_addr { uint32_t s_addr; }; struct sockaddr_in6 { uint16_t sin6_family; /* AF_INET6 */ uint16_t sin6_port; /* port */ uint32_t sin6_flowinfo; struct in6_addr sin6_addr; uint32_t sin6_scope_id; }; struct in6_addr { unsigned char s6_addr[16]; }; IPV4 IPV6
  15. 15. Name Service <ul><li>what hosts (sometimes, what service) </li></ul><ul><li>DNS/BIND, NIS/YP, LDAP </li></ul><ul><li>DNS: domain name (fully qualified) </li></ul><ul><ul><li>The Resolver </li></ul></ul><ul><ul><li>named </li></ul></ul><ul><ul><li>/etc/hosts </li></ul></ul><ul><ul><li>Order: /etc/host.conf </li></ul></ul>
  16. 16. Name / Address Info <ul><li>address ==> name </li></ul><ul><li>Name ==> address(es) </li></ul><ul><li>String ==> Address </li></ul><ul><li>Address ==> String </li></ul><ul><li>My host Info </li></ul>int getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags); int getaddrinfo(const char *node, const char *service, const struct addrinfo *hints, struct addrinfo **res); void freeaddrinfo(struct addrinfo *res); const char *gai_strerror(int errcode); struct addrinfo { int ai_flags; int ai_family; int ai_socktype; int ai_protocol; size_t ai_addrlen; struct sockaddr *ai_addr; char *ai_canonname; struct addrinfo *ai_next; }; int inet_pton(int af, const char *src, void *dst); const char *inet_ntop(int af, const void *src, char *dst, socklen_t cnt); NI_NOFQDN NI_NUMERICHOST NI_NAMEREQD NI_NUMERICSERV NI_DGRAM int gethostname(char *name, size_t len);
  17. 17. Legacy Name/Address Info <ul><li>struct hostent *gethostbyname(const char *name); </li></ul><ul><li>struct hostent *gethostbyaddr(const void *addr, </li></ul><ul><li>socklen_t len, int type); </li></ul><ul><li>void herror(const char *s); </li></ul><ul><li>const char *hstrerror(int err); </li></ul><ul><li>Require a deep copy </li></ul><ul><li>GNU extensions: re-entrancy (_r), POSIX extension: gethostent(void) </li></ul><ul><li>IPV4 only: inet_ntoa/aton and family </li></ul>struct hostent { char *h_name; char **h_aliases; int h_addrtype; int h_length; char **h_addr_list; }
  18. 18. Sockets <ul><li>Model </li></ul><ul><ul><li>Virtual hookup (like the phone) </li></ul></ul><ul><ul><li>A special “descriptor” (hooks VFS to transport layer) </li></ul></ul><ul><li>Creation </li></ul><ul><ul><li>int socket(int domain, int type, int protocol); </li></ul></ul><ul><li>Domains: PF_{INET, INET6, UNIX, NETLINK ....} </li></ul><ul><li>Types: SOCK_{STREAM, DGRAM, RAW, ...} </li></ul><ul><li>Protocols and getprotoent() </li></ul><ul><li>Address / Socket binding </li></ul><ul><ul><li>int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen); </li></ul></ul><ul><ul><li>INADDR_ANY, INADDR6_ANY </li></ul></ul>
  19. 19. Reliable Sockets <ul><li>Connect to server address </li></ul><ul><ul><li>int connect(int sockfd, const struct sockaddr *serv_addr, socklen_t addrlen); </li></ul></ul><ul><li>Listening to incoming connections </li></ul><ul><ul><li>int listen(int sockfd, int backlog); </li></ul></ul><ul><li>Accepting a new connection </li></ul><ul><ul><li>int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen); </li></ul></ul><ul><ul><li>Gets a new “child” socket descriptor </li></ul></ul>Stevens et al
  20. 20. Socket States Stevens et al
  21. 21. Socket Options <ul><li>int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen); </li></ul><ul><li>int setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen); </li></ul><ul><li>Some important options: </li></ul><ul><ul><li>SO_KEEPALIVE </li></ul></ul><ul><ul><li>SO_RCVBUF / SO_SNDBUF </li></ul></ul><ul><ul><li>SO_LINGER </li></ul></ul><ul><ul><li>SO_REUSEADDR </li></ul></ul>
  22. 22. Unreliable Communication <ul><li>ssize_t sendto(int s, const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen); </li></ul><ul><li>ssize_t recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen); </li></ul><ul><li>To add reliability: </li></ul><ul><ul><li>Connection (You can still connect, no handshake) </li></ul></ul><ul><ul><li>Sequence </li></ul></ul><ul><ul><li>Replies + timeouts + retransmission </li></ul></ul>
  23. 23. I/O <ul><li>Like File I/O: </li></ul><ul><ul><li>read/write/readv/writev/poll/select/ fcntl-SIGIO... </li></ul></ul><ul><li>ssize_t send(int s, const void *buf, size_t len, int flags); </li></ul><ul><li>ssize_t recv(int s, void *buf, size_t len, int flags); </li></ul><ul><li>Flags only matter on connections </li></ul><ul><ul><li>MSG_{CONFIRM, DONTROUTE, DONTWAIT, EOR, MORE, NOSIGNAL, OOB, WAITALL, PEEK} </li></ul></ul>
  24. 24. Message-Based Transfers <ul><li>ssize_t recvmsg(int s, struct msghdr *msg, int flags); </li></ul><ul><li>ssize_t sendmsg(int s, const struct msghdr *msg, int flags); </li></ul><ul><li>Raw sockets </li></ul><ul><li>Ancillary data </li></ul>struct msghdr { void *msg_name; socklen_t msg_namelen; struct iovec *msg_iov; size_t msg_iovlen; void *msg_control; socklen_t msg_controllen; int msg_flags; }; struct cmsghdr { socklen_t cmsg_len; int cmsg_level; int cmsg_type; /* unsigned char cmsg_data[]; */ }; struct cmsghdr *CMSG_FIRSTHDR(struct msghdr *msgh); struct cmsghdr *CMSG_NXTHDR(struct msghdr *msgh, struct cmsghdr *cmsg); size_t CMSG_ALIGN(size_t length); size_t CMSG_SPACE(size_t length); size_t CMSG_LEN(size_t length); unsigned char *CMSG_DATA(struct cmsghdr *cmsg);
  25. 25. Design Decisions <ul><li>UDP, TCP, Raw </li></ul><ul><li>On connection server </li></ul><ul><ul><li>Iterative vs Concurrent </li></ul></ul><ul><ul><li>Thread vs Process </li></ul></ul><ul><ul><li>Pre vs Post </li></ul></ul>
  26. 26. Some examples <ul><li>TCP sshd </li></ul><ul><li>Raw ping </li></ul><ul><li>UDP snmp </li></ul>
  27. 27. UNIX Domain Sockets <ul><li>IPC </li></ul><ul><li>Ancillary data: </li></ul><ul><ul><li>SOL_SOCKET level </li></ul></ul><ul><ul><li>SCM_RIGHTS </li></ul></ul><ul><li>int socketpair(int d, int type, int protocol, int sv[2]); </li></ul><ul><li>udevmonitor example </li></ul><ul><li>Ioctls: FIONREAD, TIOCOUTQ </li></ul>struct sockaddr_un { sa_family_t sun_family; char sun_path[UNIX_PATH_MAX]; };