SlideShare a Scribd company logo
1 of 30
Download to read offline
The TCP/IP stack in the FreeBSD
kernel: an overview of the
implementation
--------------------
svg version by killasmurf86
killasmurf86@gmail.com
http://killasmurf86.lv
Kevin Lo
msi
The FreeBSD project
Examples of operating systems use
FreeBSD-based network stack
DragonflyBSD,a fork of FreeBSD
OS X from Apple
Osv from Cloudius Systems
RTEMS
Peeking at the Linux kernel changelogs
Increase the initial cwnd to 10 (RFC 6928)
: 2.6.39
Proportional Rate Reduction for TCP
(RFC 6937) : 3.2
Early Retransmit for TCP (RFC 5827)
: 3.5
TCP Fast Open : 3.6, 3.7
Memory buffers (mbufs)
struct mbuf: the most important data structure in
the FreeBSD networking subsystem,
which is defined in <sys/mbuf.h>
Every packet sent/received is handled using the
mbuf structure
Mbufs are fixed size data buffers (256 bytes each)
mbuf structure
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
m_hdr structure
/*
* Header present at the beginning of every mbuf.
* Size ILP32: 24
* LP64: 32
*/
struct m_hdr {
struct mbuf *mh_next; /* next buffer in chain */
struct mbuf *mh_nextpkt; /* next chain in queue/record */
caddr_t mh_data; /* location of data */
int32_t mh_len; /* amount of data in this mbuf */
uint32_t mh_type:8, /* type of data in this mbuf */
mh_flags:24; /* flags; see below */
#if !defined(__LP64__)
uint32_t mh_pad; /* pad for 64bit alignment */
#endif
};
On 64-bit platforms, this results into 3 * 8 bytes + 2 * 4 bytes = 32 bytes
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
Simple mbuf
Simple mbuf
m_next
m_nextpkt
m_data
m_len
m_type
m_flags 0
m_dat
Pointer to the next mbuf
Pointer to the next mbuf chain
Pointer to data attached to this
mbuf
Length of the data in this mbuf
Type of the data in this mbuf
Type of mbuf
MLEN (224 bytes)
m_hdr
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
Packet header mbuf
Packet header mbuf
m_next
m_nextpkt
m_data
m_len
m_type
m_flags M_PKTHDR
m_pkthdr.rcvif
m_pkthdr.len
m_pkthdr.csum_flags
m_pkthdr.csum_data
...
m_pktdat
Pointer to the received interface
Total length of mbuf chain
Used for hardware checksum offloading
Checksum of the data portion of the
packet
MHLEN (168 bytes)
pkthdr
A typical UDP packet
m_next
m_nextpkt
m_data
m_len
m_type
m_flags
m_pkthdr.rcvif
m_pkthdr.len
m_pkthdr.csum_flags
m_pkthdr.csum_data
...
28 bytes for IPv4 + UDP
header
m_next
m_nextpkt
m_data
m_len
m_type
m_flags
150 bytes of data
NULL
NULL
150
MT_DATA
0M_PKTHDR
NULL
MT_DATA
28
178
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
Mbuf page cluster
Mbuf page cluster
m_next
m_nextpkt
m_data
m_len
m_type
m_flags M_EXT
m_ext.ref_cnt
m_ext.ext_buf
m_ext.ext_size
m_ext.ext_type
m_ext.ext_free
...
not used
Pointer to the reference counter
Pointer to the external buffer
Size of the buffer
Type of external storage
Pointer to the function is used to
release the buffer
m_ext
MCLBYTES
(2048 bytes)
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
Package header + page cluster
Packet header + page cluster
m_next
m_nextpkt
m_data
m_len
m_type
m_flags M_PKTHDR | M_EXT
m_pkthdr.rcvif
m_pkthdr.len
m_pkthdr.csum_flags
m_pkthdr.csum_data
...
m_ext.ref_cnt
m_ext.ext_buf
m_ext.ext_size
m_ext.ext_type
m_ext.ext_free
...
not used
pkthdr
m_ext
MCLBYTES
(2048 bytes)
Mbuf utility routines
MGET() / m_get(): allocate an mbuf
MGETHDR() / m_gethdr(): allocate an mbuf with a packet
header
MCLGET() / m_clget(): add an external cluster to an mbuf
m_free(): free a single mbuf
m_freem(): free a chain of mbufs
man mbuf
Protocol data structures
The protocol layer uses three main types
of structures:
● domain structure
● protocol switch structure (protosw &
ip6protosw)
● protocol control block (PCB)
Communication domains
Group of related protocols
Each has address family constant
domain structure
Defined in <sys/domain.h>
struct domain {
int dom_family; /* AF_xxx */
char *dom_name;
...
struct protosw *dom_protosw,*dom_protoswNPROTOSW;
struct domain *dom_next;
...
void *(*dom_ifattach)(struct ifnet *);
void (*dom_ifdetach)(struct ifnet *, void *);
/* af-dependent data on ifnet */
};
Supported address families
AF_LOCAL / AF_UNIX Local communication
AF_INET Internet version 4
AF_INET6 Internet version 6
AF_ROUTE Link layer interface
PF_KEY Internal key-management
AF_NATM Asynchronous transfer
mode
AF_NETGRAPH Netgraph sockets
AF_BLUETOOTH Bluetooth protocols
AF_INET_SDP OFED socket direct
protocol
RFC 2367 section 1.3
The PF_KEY protocol family (PF_KEY) symbol is defined in
<sys/socket.h> in the same manner that other protocol
families are defined. PF_KEY does not use any socket
addresses.
Applications using PF_KEY MUST NOT depend on the
availability of a symbol named AF_KEY, but kernel
implementations are encouraged to define that symbol for
completeness.
int s;
s = socket(AF_KEY, SOCK_RAW,PF_KEY_V2);
inetdomainstruct domain inetdomain = {
.dom_family = AF_INET,
.dom_name = "internet",
.dom_protosw = inetsw,
.dom_protoswNPROTOSW = &inetsw[sizeof(inetsw)/sizeof(inetsw[0])],
#ifdef RADIX_MPATH
.dom_rtattach = rn4_mpath_inithead,
#else
.dom_rtattach = in_inithead,
#endif
#ifdef VIMAGE
.dom_rtdetach = in_detachhead,
#endif
.dom_rtoffset = 32,
.dom_maxrtkey = sizeof(struct sockaddr_in),
.dom_ifattach = in_domifattach,
.dom_ifdetach = in_domifdetach
};
VNET_DOMAIN_SET(inet);
Domains list
domain{} domain{} domain{}
domain{} domain{} domain{}
domain{}
domains:
localdomain: natmdomain: inet6domain:
sdpdomain: inetdomain: ngdomain:
domain{} domain{}
routedomain: ng_btsocket_domain: keydomain:
protosw structure
Defined in <sys/protosw.h>
/* USE THESE FOR YOUR PROTOTYPES ! */
typedef void pr_input_t (struct mbuf *, int);
typedef int pr_input6_t (struct mbuf **, int*, int); /* XXX FIX THIS */
typedef int pr_output_t (struct mbuf *, struct socket *);
typedef void pr_ctlinput_t (int, struct sockaddr *, void *);
typedef int pr_ctloutput_t (struct socket *, struct sockopt *);
typedef void pr_init_t (void);
typedef void pr_destroy_t (void);
typedef void pr_fasttimo_t (void);
typedef void pr_slowtimo_t (void);
typedef void pr_drain_t (void);
protosw structure (cont.)
pr_type
pr_domain
pr_protocol
pr_flags
...
pr_input
pr_output
pr_ctlinput
pr_ctloutput
...
pr_usrreqs
...
protocol identifiers
protocol – protocol
interface
socket – protocol
interface
Protocol type
Pointer to the associated domain{}
Protocol number
Protocol flags
Input to protocol
Output to protocol
Control input
Control output
User – protocol hook
inetsw[] switch table
struct protosw inetsw[] = {
{
.pr_type = SOCK_DGRAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_UDP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = udp_input,
.pr_ctlinput = udp_ctlinput,
.pr_ctloutput = udp_ctloutput,
.pr_init = udp_init,
#ifdef VIMAGE
.pr_destroy = udp_destroy,
#endif
.pr_usrreqs = &udp_usrreqs
},
socket{}
so_count
so_type
so_options
so_linger
so_state
so_qstate
so_pcb
so_proto
...
so_rcv
so_snd
...
protosw{}
pr_type
pr_domain
pr_protocol
pr_flags
...
pr_input
pr_output
pr_ctlinput
pr_ctloutput
...
pr_usrreqs
...
pr_usrreqs{}
pru_aboart
pru_accept
pru_attach
pru_bind
pru_connect
...
pru_detach
pru_disconnect
pru_listen
pru_rcvd
pru_send
...
protocol
layer info
socket
buffers
Protocol control block (pcb)
Hold protocol information
Stored as a doubly linked list
Internet protocol control block (inpcb)
Foreign and local IP addresses
Foreign and local port numbers
Back pointer to socket
Per-protocol pcb
TCP control block (tcpcb)
Protocol state information
socket{}
so_count
so_type
so_options
so_linger
so_state
so_qstate
so_pcb
so_proto
...
so_rcv
so_snd
...
inpcb{}
inp_ppcb
inp_socket
...
inp_fport
inp_lport
inp_faddr
inp_laddr
inp_ip_tos
inp_optons
...
inp_lock
tcpcb{}
t_inpcb
t_state
t_flags
...
rcv_wnd
snd_wnd
snd_cwnd
...
t_maxseg
...
Bibliography

More Related Content

What's hot

Xdp and ebpf_maps
Xdp and ebpf_mapsXdp and ebpf_maps
Xdp and ebpf_mapslcplcp1
 
Trace kernel code tips
Trace kernel code tipsTrace kernel code tips
Trace kernel code tipsViller Hsiao
 
OSSNA 2017 Performance Analysis Superpowers with Linux BPF
OSSNA 2017 Performance Analysis Superpowers with Linux BPFOSSNA 2017 Performance Analysis Superpowers with Linux BPF
OSSNA 2017 Performance Analysis Superpowers with Linux BPFBrendan Gregg
 
Linux 4.x Tracing: Performance Analysis with bcc/BPF
Linux 4.x Tracing: Performance Analysis with bcc/BPFLinux 4.x Tracing: Performance Analysis with bcc/BPF
Linux 4.x Tracing: Performance Analysis with bcc/BPFBrendan Gregg
 
Linux Kernel vs DPDK: HTTP Performance Showdown
Linux Kernel vs DPDK: HTTP Performance ShowdownLinux Kernel vs DPDK: HTTP Performance Showdown
Linux Kernel vs DPDK: HTTP Performance ShowdownScyllaDB
 
DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...
DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...
DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...NECST Lab @ Politecnico di Milano
 
IntelON 2021 Processor Benchmarking
IntelON 2021 Processor BenchmarkingIntelON 2021 Processor Benchmarking
IntelON 2021 Processor BenchmarkingBrendan Gregg
 
Fun with Network Interfaces
Fun with Network InterfacesFun with Network Interfaces
Fun with Network InterfacesKernel TLV
 
Digital Forensics and Incident Response in The Cloud Part 3
Digital Forensics and Incident Response in The Cloud Part 3Digital Forensics and Incident Response in The Cloud Part 3
Digital Forensics and Incident Response in The Cloud Part 3Velocidex Enterprises
 
DPDK: Multi Architecture High Performance Packet Processing
DPDK: Multi Architecture High Performance Packet ProcessingDPDK: Multi Architecture High Performance Packet Processing
DPDK: Multi Architecture High Performance Packet ProcessingMichelle Holley
 
Secure container: Kata container and gVisor
Secure container: Kata container and gVisorSecure container: Kata container and gVisor
Secure container: Kata container and gVisorChing-Hsuan Yen
 
How Linux Processes Your Network Packet - Elazar Leibovich
How Linux Processes Your Network Packet - Elazar LeibovichHow Linux Processes Your Network Packet - Elazar Leibovich
How Linux Processes Your Network Packet - Elazar LeibovichDevOpsDays Tel Aviv
 
BPF / XDP 8월 세미나 KossLab
BPF / XDP 8월 세미나 KossLabBPF / XDP 8월 세미나 KossLab
BPF / XDP 8월 세미나 KossLabTaeung Song
 
Interrupt Affinityについて
Interrupt AffinityについてInterrupt Affinityについて
Interrupt AffinityについてTakuya ASADA
 
Performance Wins with BPF: Getting Started
Performance Wins with BPF: Getting StartedPerformance Wins with BPF: Getting Started
Performance Wins with BPF: Getting StartedBrendan Gregg
 

What's hot (20)

Dpdk performance
Dpdk performanceDpdk performance
Dpdk performance
 
Xdp and ebpf_maps
Xdp and ebpf_mapsXdp and ebpf_maps
Xdp and ebpf_maps
 
Trace kernel code tips
Trace kernel code tipsTrace kernel code tips
Trace kernel code tips
 
Dpdk applications
Dpdk applicationsDpdk applications
Dpdk applications
 
OSSNA 2017 Performance Analysis Superpowers with Linux BPF
OSSNA 2017 Performance Analysis Superpowers with Linux BPFOSSNA 2017 Performance Analysis Superpowers with Linux BPF
OSSNA 2017 Performance Analysis Superpowers with Linux BPF
 
Linux 4.x Tracing: Performance Analysis with bcc/BPF
Linux 4.x Tracing: Performance Analysis with bcc/BPFLinux 4.x Tracing: Performance Analysis with bcc/BPF
Linux 4.x Tracing: Performance Analysis with bcc/BPF
 
Linux Kernel vs DPDK: HTTP Performance Showdown
Linux Kernel vs DPDK: HTTP Performance ShowdownLinux Kernel vs DPDK: HTTP Performance Showdown
Linux Kernel vs DPDK: HTTP Performance Showdown
 
DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...
DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...
DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based i...
 
IntelON 2021 Processor Benchmarking
IntelON 2021 Processor BenchmarkingIntelON 2021 Processor Benchmarking
IntelON 2021 Processor Benchmarking
 
Linux Internals - Interview essentials 4.0
Linux Internals - Interview essentials 4.0Linux Internals - Interview essentials 4.0
Linux Internals - Interview essentials 4.0
 
Fun with Network Interfaces
Fun with Network InterfacesFun with Network Interfaces
Fun with Network Interfaces
 
Digital Forensics and Incident Response in The Cloud Part 3
Digital Forensics and Incident Response in The Cloud Part 3Digital Forensics and Incident Response in The Cloud Part 3
Digital Forensics and Incident Response in The Cloud Part 3
 
OVS v OVS-DPDK
OVS v OVS-DPDKOVS v OVS-DPDK
OVS v OVS-DPDK
 
DPDK: Multi Architecture High Performance Packet Processing
DPDK: Multi Architecture High Performance Packet ProcessingDPDK: Multi Architecture High Performance Packet Processing
DPDK: Multi Architecture High Performance Packet Processing
 
Secure container: Kata container and gVisor
Secure container: Kata container and gVisorSecure container: Kata container and gVisor
Secure container: Kata container and gVisor
 
How Linux Processes Your Network Packet - Elazar Leibovich
How Linux Processes Your Network Packet - Elazar LeibovichHow Linux Processes Your Network Packet - Elazar Leibovich
How Linux Processes Your Network Packet - Elazar Leibovich
 
DPDK In Depth
DPDK In DepthDPDK In Depth
DPDK In Depth
 
BPF / XDP 8월 세미나 KossLab
BPF / XDP 8월 세미나 KossLabBPF / XDP 8월 세미나 KossLab
BPF / XDP 8월 세미나 KossLab
 
Interrupt Affinityについて
Interrupt AffinityについてInterrupt Affinityについて
Interrupt Affinityについて
 
Performance Wins with BPF: Getting Started
Performance Wins with BPF: Getting StartedPerformance Wins with BPF: Getting Started
Performance Wins with BPF: Getting Started
 

Viewers also liked

MOD server & FreeBSD (FreeBSD Day Taiwan)
MOD server & FreeBSD (FreeBSD Day Taiwan)MOD server & FreeBSD (FreeBSD Day Taiwan)
MOD server & FreeBSD (FreeBSD Day Taiwan)Kevin Lo
 
OMFW 2012: Analyzing Linux Kernel Rootkits with Volatlity
OMFW 2012: Analyzing Linux Kernel Rootkits with VolatlityOMFW 2012: Analyzing Linux Kernel Rootkits with Volatlity
OMFW 2012: Analyzing Linux Kernel Rootkits with VolatlityAndrew Case
 
Kernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanity
Kernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanityKernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanity
Kernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanityAnne Nicolas
 
Cybermania Prelims
Cybermania PrelimsCybermania Prelims
Cybermania PrelimsDivye Kapoor
 
Rootkit 102 - Kernel-Based Rootkit
Rootkit 102 - Kernel-Based RootkitRootkit 102 - Kernel-Based Rootkit
Rootkit 102 - Kernel-Based RootkitChia-Hao Tsai
 
A particle filter based scheme for indoor tracking on an Android Smartphone
A particle filter based scheme for indoor tracking on an Android SmartphoneA particle filter based scheme for indoor tracking on an Android Smartphone
A particle filter based scheme for indoor tracking on an Android SmartphoneDivye Kapoor
 
Linux Internals - Kernel/Core
Linux Internals - Kernel/CoreLinux Internals - Kernel/Core
Linux Internals - Kernel/CoreShay Cohen
 
LAS16-403 - GDB Linux Kernel Awareness
LAS16-403 - GDB Linux Kernel Awareness LAS16-403 - GDB Linux Kernel Awareness
LAS16-403 - GDB Linux Kernel Awareness Peter Griffin
 
The Linux Kernel Implementation of Pipes and FIFOs
The Linux Kernel Implementation of Pipes and FIFOsThe Linux Kernel Implementation of Pipes and FIFOs
The Linux Kernel Implementation of Pipes and FIFOsDivye Kapoor
 
Rootkit on Linux X86 v2.6
Rootkit on Linux X86 v2.6Rootkit on Linux X86 v2.6
Rootkit on Linux X86 v2.6fisher.w.y
 
Linux Kernel Exploitation
Linux Kernel ExploitationLinux Kernel Exploitation
Linux Kernel ExploitationScio Security
 
Part 04 Creating a System Call in Linux
Part 04 Creating a System Call in LinuxPart 04 Creating a System Call in Linux
Part 04 Creating a System Call in LinuxTushar B Kute
 
了解网络
了解网络了解网络
了解网络Feng Yu
 
Debugging Applications with GNU Debugger
Debugging Applications with GNU DebuggerDebugging Applications with GNU Debugger
Debugging Applications with GNU DebuggerPriyank Kapadia
 
了解Cpu
了解Cpu了解Cpu
了解CpuFeng Yu
 
Part 02 Linux Kernel Module Programming
Part 02 Linux Kernel Module ProgrammingPart 02 Linux Kernel Module Programming
Part 02 Linux Kernel Module ProgrammingTushar B Kute
 

Viewers also liked (20)

MOD server & FreeBSD (FreeBSD Day Taiwan)
MOD server & FreeBSD (FreeBSD Day Taiwan)MOD server & FreeBSD (FreeBSD Day Taiwan)
MOD server & FreeBSD (FreeBSD Day Taiwan)
 
New sendfile
New sendfileNew sendfile
New sendfile
 
Linux performance
Linux performanceLinux performance
Linux performance
 
OMFW 2012: Analyzing Linux Kernel Rootkits with Volatlity
OMFW 2012: Analyzing Linux Kernel Rootkits with VolatlityOMFW 2012: Analyzing Linux Kernel Rootkits with Volatlity
OMFW 2012: Analyzing Linux Kernel Rootkits with Volatlity
 
Cybermania Mains
Cybermania MainsCybermania Mains
Cybermania Mains
 
Kernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanity
Kernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanityKernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanity
Kernel Recipes 2015: The stable Linux Kernel Tree - 10 years of insanity
 
Cybermania Prelims
Cybermania PrelimsCybermania Prelims
Cybermania Prelims
 
Rootkit 102 - Kernel-Based Rootkit
Rootkit 102 - Kernel-Based RootkitRootkit 102 - Kernel-Based Rootkit
Rootkit 102 - Kernel-Based Rootkit
 
A particle filter based scheme for indoor tracking on an Android Smartphone
A particle filter based scheme for indoor tracking on an Android SmartphoneA particle filter based scheme for indoor tracking on an Android Smartphone
A particle filter based scheme for indoor tracking on an Android Smartphone
 
Linux Internals - Kernel/Core
Linux Internals - Kernel/CoreLinux Internals - Kernel/Core
Linux Internals - Kernel/Core
 
LAS16-403 - GDB Linux Kernel Awareness
LAS16-403 - GDB Linux Kernel Awareness LAS16-403 - GDB Linux Kernel Awareness
LAS16-403 - GDB Linux Kernel Awareness
 
The Linux Kernel Implementation of Pipes and FIFOs
The Linux Kernel Implementation of Pipes and FIFOsThe Linux Kernel Implementation of Pipes and FIFOs
The Linux Kernel Implementation of Pipes and FIFOs
 
Rootkit on Linux X86 v2.6
Rootkit on Linux X86 v2.6Rootkit on Linux X86 v2.6
Rootkit on Linux X86 v2.6
 
Linux Kernel Exploitation
Linux Kernel ExploitationLinux Kernel Exploitation
Linux Kernel Exploitation
 
Part 04 Creating a System Call in Linux
Part 04 Creating a System Call in LinuxPart 04 Creating a System Call in Linux
Part 04 Creating a System Call in Linux
 
了解网络
了解网络了解网络
了解网络
 
Debugging Applications with GNU Debugger
Debugging Applications with GNU DebuggerDebugging Applications with GNU Debugger
Debugging Applications with GNU Debugger
 
了解Cpu
了解Cpu了解Cpu
了解Cpu
 
Part 02 Linux Kernel Module Programming
Part 02 Linux Kernel Module ProgrammingPart 02 Linux Kernel Module Programming
Part 02 Linux Kernel Module Programming
 
The Stack Frame
The Stack FrameThe Stack Frame
The Stack Frame
 

Similar to The TCP/IP stack in the FreeBSD kernel COSCUP 2014

The TCP/IP Stack in the Linux Kernel
The TCP/IP Stack in the Linux KernelThe TCP/IP Stack in the Linux Kernel
The TCP/IP Stack in the Linux KernelDivye Kapoor
 
Introduction to Kernel Programming
Introduction to Kernel ProgrammingIntroduction to Kernel Programming
Introduction to Kernel ProgrammingAhmed Mekkawy
 
Mobile Email Security
Mobile Email SecurityMobile Email Security
Mobile Email SecurityRahul Sihag
 
ch 7 POSIX.pptx
ch 7 POSIX.pptxch 7 POSIX.pptx
ch 7 POSIX.pptxsibokac
 
Socket Programming Intro.pptx
Socket  Programming Intro.pptxSocket  Programming Intro.pptx
Socket Programming Intro.pptxssuserc4a497
 
Multithreaded sockets c++11
Multithreaded sockets c++11Multithreaded sockets c++11
Multithreaded sockets c++11Russell Childs
 
Степан Кольцов — Rust — лучше, чем C++
Степан Кольцов — Rust — лучше, чем C++Степан Кольцов — Rust — лучше, чем C++
Степан Кольцов — Rust — лучше, чем C++Yandex
 
#includestdio.h#includestring.h#includestdlib.h#define M.pdf
#includestdio.h#includestring.h#includestdlib.h#define M.pdf#includestdio.h#includestring.h#includestdlib.h#define M.pdf
#includestdio.h#includestring.h#includestdlib.h#define M.pdfANJALIENTERPRISES1
 
SimpleArray between Python and C++
SimpleArray between Python and C++SimpleArray between Python and C++
SimpleArray between Python and C++Yung-Yu Chen
 
OpenSSL Basic Function Call Flow
OpenSSL Basic Function Call FlowOpenSSL Basic Function Call Flow
OpenSSL Basic Function Call FlowWilliam Lee
 
Chapter 3
Chapter 3Chapter 3
Chapter 3lopjuan
 
#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.h#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.hSilvaGraf83
 
#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.h#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.hMoseStaton39
 
LAS16-300: Mini Conference 2 Cortex-M Software - Device Configuration
LAS16-300: Mini Conference 2 Cortex-M Software - Device ConfigurationLAS16-300: Mini Conference 2 Cortex-M Software - Device Configuration
LAS16-300: Mini Conference 2 Cortex-M Software - Device ConfigurationLinaro
 

Similar to The TCP/IP stack in the FreeBSD kernel COSCUP 2014 (20)

The TCP/IP Stack in the Linux Kernel
The TCP/IP Stack in the Linux KernelThe TCP/IP Stack in the Linux Kernel
The TCP/IP Stack in the Linux Kernel
 
Introduction to Kernel Programming
Introduction to Kernel ProgrammingIntroduction to Kernel Programming
Introduction to Kernel Programming
 
Sysprog17
Sysprog17Sysprog17
Sysprog17
 
Mobile Email Security
Mobile Email SecurityMobile Email Security
Mobile Email Security
 
ch 7 POSIX.pptx
ch 7 POSIX.pptxch 7 POSIX.pptx
ch 7 POSIX.pptx
 
Socket Programming Intro.pptx
Socket  Programming Intro.pptxSocket  Programming Intro.pptx
Socket Programming Intro.pptx
 
Multithreaded sockets c++11
Multithreaded sockets c++11Multithreaded sockets c++11
Multithreaded sockets c++11
 
Степан Кольцов — Rust — лучше, чем C++
Степан Кольцов — Rust — лучше, чем C++Степан Кольцов — Rust — лучше, чем C++
Степан Кольцов — Rust — лучше, чем C++
 
#includestdio.h#includestring.h#includestdlib.h#define M.pdf
#includestdio.h#includestring.h#includestdlib.h#define M.pdf#includestdio.h#includestring.h#includestdlib.h#define M.pdf
#includestdio.h#includestring.h#includestdlib.h#define M.pdf
 
Basic socket programming
Basic socket programmingBasic socket programming
Basic socket programming
 
Sockets and Socket-Buffer
Sockets and Socket-BufferSockets and Socket-Buffer
Sockets and Socket-Buffer
 
SimpleArray between Python and C++
SimpleArray between Python and C++SimpleArray between Python and C++
SimpleArray between Python and C++
 
sockets
socketssockets
sockets
 
Embedded C - Lecture 4
Embedded C - Lecture 4Embedded C - Lecture 4
Embedded C - Lecture 4
 
OpenSSL Basic Function Call Flow
OpenSSL Basic Function Call FlowOpenSSL Basic Function Call Flow
OpenSSL Basic Function Call Flow
 
Chapter 3
Chapter 3Chapter 3
Chapter 3
 
Farewell to Disks: Efficient Processing of Obstinate Data
Farewell to Disks: Efficient Processing of Obstinate DataFarewell to Disks: Efficient Processing of Obstinate Data
Farewell to Disks: Efficient Processing of Obstinate Data
 
#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.h#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.h
 
#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.h#include stdio.h#include systypes.h#include syssocket.h
#include stdio.h#include systypes.h#include syssocket.h
 
LAS16-300: Mini Conference 2 Cortex-M Software - Device Configuration
LAS16-300: Mini Conference 2 Cortex-M Software - Device ConfigurationLAS16-300: Mini Conference 2 Cortex-M Software - Device Configuration
LAS16-300: Mini Conference 2 Cortex-M Software - Device Configuration
 

Recently uploaded

W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...panagenda
 
call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️
call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️
call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️Delhi Call girls
 
A Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxA Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxComplianceQuest1
 
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfLearn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfkalichargn70th171
 
TECUNIQUE: Success Stories: IT Service provider
TECUNIQUE: Success Stories: IT Service providerTECUNIQUE: Success Stories: IT Service provider
TECUNIQUE: Success Stories: IT Service providermohitmore19
 
why an Opensea Clone Script might be your perfect match.pdf
why an Opensea Clone Script might be your perfect match.pdfwhy an Opensea Clone Script might be your perfect match.pdf
why an Opensea Clone Script might be your perfect match.pdfjoe51371421
 
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...OnePlan Solutions
 
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...MyIntelliSource, Inc.
 
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...MyIntelliSource, Inc.
 
Salesforce Certified Field Service Consultant
Salesforce Certified Field Service ConsultantSalesforce Certified Field Service Consultant
Salesforce Certified Field Service ConsultantAxelRicardoTrocheRiq
 
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️anilsa9823
 
HR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comHR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comFatema Valibhai
 
5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdfWave PLM
 
Unlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language ModelsUnlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language Modelsaagamshah0812
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsAndolasoft Inc
 
DNT_Corporate presentation know about us
DNT_Corporate presentation know about usDNT_Corporate presentation know about us
DNT_Corporate presentation know about usDynamic Netsoft
 
Clustering techniques data mining book ....
Clustering techniques data mining book ....Clustering techniques data mining book ....
Clustering techniques data mining book ....ShaimaaMohamedGalal
 

Recently uploaded (20)

W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
 
call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️
call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️
call girls in Vaishali (Ghaziabad) 🔝 >༒8448380779 🔝 genuine Escort Service 🔝✔️✔️
 
A Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docxA Secure and Reliable Document Management System is Essential.docx
A Secure and Reliable Document Management System is Essential.docx
 
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdfLearn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
Learn the Fundamentals of XCUITest Framework_ A Beginner's Guide.pdf
 
Exploring iOS App Development: Simplifying the Process
Exploring iOS App Development: Simplifying the ProcessExploring iOS App Development: Simplifying the Process
Exploring iOS App Development: Simplifying the Process
 
TECUNIQUE: Success Stories: IT Service provider
TECUNIQUE: Success Stories: IT Service providerTECUNIQUE: Success Stories: IT Service provider
TECUNIQUE: Success Stories: IT Service provider
 
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICECHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Pushp Vihar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
 
why an Opensea Clone Script might be your perfect match.pdf
why an Opensea Clone Script might be your perfect match.pdfwhy an Opensea Clone Script might be your perfect match.pdf
why an Opensea Clone Script might be your perfect match.pdf
 
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
Tech Tuesday-Harness the Power of Effective Resource Planning with OnePlan’s ...
 
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
 
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
 
Salesforce Certified Field Service Consultant
Salesforce Certified Field Service ConsultantSalesforce Certified Field Service Consultant
Salesforce Certified Field Service Consultant
 
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online  ☂️
CALL ON ➥8923113531 🔝Call Girls Kakori Lucknow best sexual service Online ☂️
 
HR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comHR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.com
 
5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf
 
Unlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language ModelsUnlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language Models
 
Microsoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdfMicrosoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdf
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.js
 
DNT_Corporate presentation know about us
DNT_Corporate presentation know about usDNT_Corporate presentation know about us
DNT_Corporate presentation know about us
 
Clustering techniques data mining book ....
Clustering techniques data mining book ....Clustering techniques data mining book ....
Clustering techniques data mining book ....
 

The TCP/IP stack in the FreeBSD kernel COSCUP 2014

  • 1. The TCP/IP stack in the FreeBSD kernel: an overview of the implementation -------------------- svg version by killasmurf86 killasmurf86@gmail.com http://killasmurf86.lv Kevin Lo msi The FreeBSD project
  • 2. Examples of operating systems use FreeBSD-based network stack DragonflyBSD,a fork of FreeBSD OS X from Apple Osv from Cloudius Systems RTEMS
  • 3. Peeking at the Linux kernel changelogs Increase the initial cwnd to 10 (RFC 6928) : 2.6.39 Proportional Rate Reduction for TCP (RFC 6937) : 3.2 Early Retransmit for TCP (RFC 5827) : 3.5 TCP Fast Open : 3.6, 3.7
  • 4. Memory buffers (mbufs) struct mbuf: the most important data structure in the FreeBSD networking subsystem, which is defined in <sys/mbuf.h> Every packet sent/received is handled using the mbuf structure Mbufs are fixed size data buffers (256 bytes each)
  • 5. mbuf structure struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; };
  • 6. m_hdr structure /* * Header present at the beginning of every mbuf. * Size ILP32: 24 * LP64: 32 */ struct m_hdr { struct mbuf *mh_next; /* next buffer in chain */ struct mbuf *mh_nextpkt; /* next chain in queue/record */ caddr_t mh_data; /* location of data */ int32_t mh_len; /* amount of data in this mbuf */ uint32_t mh_type:8, /* type of data in this mbuf */ mh_flags:24; /* flags; see below */ #if !defined(__LP64__) uint32_t mh_pad; /* pad for 64bit alignment */ #endif }; On 64-bit platforms, this results into 3 * 8 bytes + 2 * 4 bytes = 32 bytes
  • 7. struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; Simple mbuf
  • 8. Simple mbuf m_next m_nextpkt m_data m_len m_type m_flags 0 m_dat Pointer to the next mbuf Pointer to the next mbuf chain Pointer to data attached to this mbuf Length of the data in this mbuf Type of the data in this mbuf Type of mbuf MLEN (224 bytes) m_hdr
  • 9. struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; Packet header mbuf
  • 10. Packet header mbuf m_next m_nextpkt m_data m_len m_type m_flags M_PKTHDR m_pkthdr.rcvif m_pkthdr.len m_pkthdr.csum_flags m_pkthdr.csum_data ... m_pktdat Pointer to the received interface Total length of mbuf chain Used for hardware checksum offloading Checksum of the data portion of the packet MHLEN (168 bytes) pkthdr
  • 11. A typical UDP packet m_next m_nextpkt m_data m_len m_type m_flags m_pkthdr.rcvif m_pkthdr.len m_pkthdr.csum_flags m_pkthdr.csum_data ... 28 bytes for IPv4 + UDP header m_next m_nextpkt m_data m_len m_type m_flags 150 bytes of data NULL NULL 150 MT_DATA 0M_PKTHDR NULL MT_DATA 28 178
  • 12. struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; Mbuf page cluster
  • 13. Mbuf page cluster m_next m_nextpkt m_data m_len m_type m_flags M_EXT m_ext.ref_cnt m_ext.ext_buf m_ext.ext_size m_ext.ext_type m_ext.ext_free ... not used Pointer to the reference counter Pointer to the external buffer Size of the buffer Type of external storage Pointer to the function is used to release the buffer m_ext MCLBYTES (2048 bytes)
  • 14. struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; Package header + page cluster
  • 15. Packet header + page cluster m_next m_nextpkt m_data m_len m_type m_flags M_PKTHDR | M_EXT m_pkthdr.rcvif m_pkthdr.len m_pkthdr.csum_flags m_pkthdr.csum_data ... m_ext.ref_cnt m_ext.ext_buf m_ext.ext_size m_ext.ext_type m_ext.ext_free ... not used pkthdr m_ext MCLBYTES (2048 bytes)
  • 16. Mbuf utility routines MGET() / m_get(): allocate an mbuf MGETHDR() / m_gethdr(): allocate an mbuf with a packet header MCLGET() / m_clget(): add an external cluster to an mbuf m_free(): free a single mbuf m_freem(): free a chain of mbufs man mbuf
  • 17. Protocol data structures The protocol layer uses three main types of structures: ● domain structure ● protocol switch structure (protosw & ip6protosw) ● protocol control block (PCB)
  • 18. Communication domains Group of related protocols Each has address family constant
  • 19. domain structure Defined in <sys/domain.h> struct domain { int dom_family; /* AF_xxx */ char *dom_name; ... struct protosw *dom_protosw,*dom_protoswNPROTOSW; struct domain *dom_next; ... void *(*dom_ifattach)(struct ifnet *); void (*dom_ifdetach)(struct ifnet *, void *); /* af-dependent data on ifnet */ };
  • 20. Supported address families AF_LOCAL / AF_UNIX Local communication AF_INET Internet version 4 AF_INET6 Internet version 6 AF_ROUTE Link layer interface PF_KEY Internal key-management AF_NATM Asynchronous transfer mode AF_NETGRAPH Netgraph sockets AF_BLUETOOTH Bluetooth protocols AF_INET_SDP OFED socket direct protocol
  • 21. RFC 2367 section 1.3 The PF_KEY protocol family (PF_KEY) symbol is defined in <sys/socket.h> in the same manner that other protocol families are defined. PF_KEY does not use any socket addresses. Applications using PF_KEY MUST NOT depend on the availability of a symbol named AF_KEY, but kernel implementations are encouraged to define that symbol for completeness. int s; s = socket(AF_KEY, SOCK_RAW,PF_KEY_V2);
  • 22. inetdomainstruct domain inetdomain = { .dom_family = AF_INET, .dom_name = "internet", .dom_protosw = inetsw, .dom_protoswNPROTOSW = &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], #ifdef RADIX_MPATH .dom_rtattach = rn4_mpath_inithead, #else .dom_rtattach = in_inithead, #endif #ifdef VIMAGE .dom_rtdetach = in_detachhead, #endif .dom_rtoffset = 32, .dom_maxrtkey = sizeof(struct sockaddr_in), .dom_ifattach = in_domifattach, .dom_ifdetach = in_domifdetach }; VNET_DOMAIN_SET(inet);
  • 23. Domains list domain{} domain{} domain{} domain{} domain{} domain{} domain{} domains: localdomain: natmdomain: inet6domain: sdpdomain: inetdomain: ngdomain: domain{} domain{} routedomain: ng_btsocket_domain: keydomain:
  • 24. protosw structure Defined in <sys/protosw.h> /* USE THESE FOR YOUR PROTOTYPES ! */ typedef void pr_input_t (struct mbuf *, int); typedef int pr_input6_t (struct mbuf **, int*, int); /* XXX FIX THIS */ typedef int pr_output_t (struct mbuf *, struct socket *); typedef void pr_ctlinput_t (int, struct sockaddr *, void *); typedef int pr_ctloutput_t (struct socket *, struct sockopt *); typedef void pr_init_t (void); typedef void pr_destroy_t (void); typedef void pr_fasttimo_t (void); typedef void pr_slowtimo_t (void); typedef void pr_drain_t (void);
  • 25. protosw structure (cont.) pr_type pr_domain pr_protocol pr_flags ... pr_input pr_output pr_ctlinput pr_ctloutput ... pr_usrreqs ... protocol identifiers protocol – protocol interface socket – protocol interface Protocol type Pointer to the associated domain{} Protocol number Protocol flags Input to protocol Output to protocol Control input Control output User – protocol hook
  • 26. inetsw[] switch table struct protosw inetsw[] = { { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_UDP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = udp_input, .pr_ctlinput = udp_ctlinput, .pr_ctloutput = udp_ctloutput, .pr_init = udp_init, #ifdef VIMAGE .pr_destroy = udp_destroy, #endif .pr_usrreqs = &udp_usrreqs },
  • 28. Protocol control block (pcb) Hold protocol information Stored as a doubly linked list Internet protocol control block (inpcb) Foreign and local IP addresses Foreign and local port numbers Back pointer to socket Per-protocol pcb TCP control block (tcpcb) Protocol state information