Introduction to Kernel Programming

Loading...

Flash Player 9 (or above) is needed to view presentations.
We have detected that you do not have it on your computer. To install it, go here.

0 comments

Post a comment

    Post a comment
    Embed Video
    Edit your comment Cancel

    2 Favorites

    Introduction to Kernel Programming - Presentation Transcript

    1. Introduction to Kernel Coding
        • Demystifying Kernel Programming
    2. Outline
      • Context of execution
      • Memory
      • I/O
    3. Mechanism vs Policy
      • Mechanism: Interface to the system resources
      • Policy: How the resource is used
      • Examples:
        • Udev
        • File configuration
    4. Context of execution
      • Possible contexts
        • System Call
        • Interrupt Handling
        • Tasklets
        • Kernel threads
      User space Kernel space Resource Handler Resource User process Kernel thread System Call Handling Interrupt Handling Tasklet
    5. Why do we care?
      • Blocking:
        • Mutual exclusion / Reentrancy
        • Resource Allocation
        • Mixed context code
      • System responsiveness
      • Crashes – what's at stake
    6. Interface
      • General Pattern
        • Central Data Structure
        • Register entry points
        • Entry point definition
      • Know your subsystem
      SUBSYSTEM Resource Handler interface { meth1 meth2 ... } Register deregister meth1 (DS) meth2 (DS) Container consumer
    7. Example – Fileops VFS USER KERNEL DRIVER/FS MODULE fleops { myopen myread myclose } Register deregister myopen (FILE) myread myclose M,M:FOPS open(fd) read write
    8. Registration
      • For certain type, e.g. filesystem
      • For specific objects e.g. file ops
        • Detection by the driver – legacy
        • Detection by a bus driver
    9. struct vfsmount * vfs_kern_mount( struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; int error; mnt = alloc_vfsmnt(name); ... error = type->get_sb(type, flags, name, data, mnt); ... mnt->mnt_mountpoint = mnt->mnt_root; ... return mnt; } static struct file_system_type ** find_filesystem (const char *name, unsigned len) { struct file_system_type **p; for (p=&file_systems; *p; p=&(*p)->next) if (strlen((*p)->name) == len && strncmp((*p)->name, name, len) == 0) break; return p; } struct vfsmount * do_kern_mount( const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); struct vfsmount *mnt; ... mnt = vfs_kern_mount(type, flags, name, data); ... return mnt; } int register_filesystem(struct file_system_type * fs) { int res = 0; struct file_system_type ** p; ... INIT_LIST_HEAD(&fs->fs_supers); write_lock(&file_systems_lock); p = find_filesystem(fs->name, strlen(fs->name)); if (*p) res = -EBUSY; else *p = fs; write_unlock(&file_systems_lock); return res; } struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; unsigned len = ... strlen(name); read_lock(&file_systems_lock); fs = *(find_filesystem(name, len)); read_unlock(&file_systems_lock); if (!fs && (request_module("%.*s", len, name) == 0)) { read_lock(&file_systems_lock); fs = *(find_filesystem(name, len)); if (fs && !try_module_get(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); } return fs; } VFS EXT3 static int ext3_get_sb (struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); } static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", .get_sb = ext3_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; static int __init init_ext3_fs(void) { ... err = register_filesystem (&ext3_fs_type); ... return 0; }
    10. Device Model (Bovet et al) SUBSYSTEM kset kobject attribute1 attribute2 ... Scan actions Resource Handler PCI pci_register_driver probe driver_if{ ... probe } register_device
    11. Interrupts
      • Registering for interrupts
      • Interrupt Handling – fast and alert
        • Critical regions: Spinlocks and SMP systems
        • Memory allocation
        • System is unresponsive, interrupts masked
      • Tasklets – pretty fast, pretty alert
      • Workqueues – sleep all you want
    12. Interrupt Handling DRIVER WORKQ handler ISR Initialization Tasklet request_irq Device Interrupt KERNEL PROPER schedule_ work tasklet_ schedule
    13. static irqreturn_t ipw_isr(int irq, void *data) { struct ipw_priv *priv = data; u32 inta, inta_mask; ... spin_lock(&priv->irq_lock); ... inta_mask = ipw_read32(priv, IPW_INTA_MASK_R); ... if (!(inta & (IPW_INTA_MASK_ALL & inta_mask))) { ... } __ipw_disable_interrupts(priv); inta &= (IPW_INTA_MASK_ALL & inta_mask); ipw_write32(priv, IPW_INTA_RW, inta); priv->isr_inta = inta; tasklet_schedule(&priv->irq_tasklet); spin_unlock(&priv->irq_lock); return IRQ_HANDLED; } static void ipw_bg_link_down(struct work_struct *work) { struct ipw_priv *priv = container_of(work, struct ipw_priv, link_down); mutex_lock(&priv->mutex); ipw_link_down(priv); mutex_unlock(&priv->mutex); } static void ipw_irq_tasklet(struct ipw_priv *priv) { u32 inta, inta_mask, handled = 0; unsigned long flags; spin_lock_irqsave(&priv->irq_lock, flags); inta = ipw_read32(priv, IPW_INTA_RW); inta_mask = ipw_read32(priv, IPW_INTA_MASK_R); inta &= (IPW_INTA_MASK_ALL & inta_mask); spin_unlock_irqrestore(&priv->irq_lock, flags); spin_lock_irqsave(&priv->lock, flags); ... if (inta & IPW_INTA_BIT_RF_KILL_DONE) { ... cancel_delayed_work(&priv->request_scan); ... schedule_work(&priv->link_down); queue_delayed_work(priv->workqueue, &priv->rf_kill, 2 * HZ); handled |= IPW_INTA_BIT_RF_KILL_DONE; } ... spin_unlock_irqrestore(&priv->lock, flags); /* enable all interrupts */ ipw_enable_interrupts(priv); } static int __devinit ipw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { ... struct ipw_priv *priv; ... err = ipw_setup_deferred_work(priv); ... err = request_irq(pdev->irq, ipw_isr, IRQF_SHARED, DRV_NAME, priv); ... } static int __devinit ipw_setup_deferred_work(struct ipw_priv *priv) { priv->workqueue = create_workqueue(DRV_NAME); ... INIT_WORK(&priv->link_down, ipw_bg_link_down); ... tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) ipw_irq_tasklet, (unsigned long)priv); ... } TASKLET ISR WORKQ PROBE
    14. What Address Space?!!!
      • Flat space
        • Access to pointers
        • Symbols
      • Across the boundary
        • copy_to/copy_from
    15. asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct socket *sock; struct sockaddr_storage address; struct iovec *iov = iovstack; struct msghdr msg_sys; int err, iov_size, fput_needed; ... if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; ... sock = sockfd_lookup_light(fd, &err, &fput_needed); ... iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); ... iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); ... err = verify_iovec(&msg_sys, iov, (struct sockaddr *)&address, VERIFY_READ); ... err = sock_sendmsg(sock, &msg_sys, total_len); ... return err; } static struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) return file->private_data; ... } static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { struct file *file; struct socket *sock; file = fget_light(fd, fput_needed); if (file) { sock = sock_from_file(file, err); if (sock) return sock; fput_light(file, *fput_needed); } return NULL; } #define files_fdtable(files) (rcu_dereference((files)->fdt)) static inline void free_fdtable(struct fdtable *fdt) { call_rcu(&fdt->rcu, free_fdtable_rcu); } struct file *fget_light(unsigned int fd, int *fput_needed) { struct file *file; struct files_struct *files = current->files; *fput_needed = 0; ... rcu_read_lock(); file = fcheck_files(files, fd); ... rcu_read_unlock(); ... return file; } static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; struct fdtable *fdt = files_fdtable(files); ... file = rcu_dereference(fdt->fd[fd]); return file; } SOCKETS FS int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) { if (copy_from_user(kaddr, uaddr, ulen)) return -EFAULT; .,, } struct fdtable { ... struct file ** fd; struct rcu_head rcu; ... };
    16. Allocation and flags
      • Page Frame
      • Memory allocation
        • Atomicity : GFP_ATOMIC from Reserved Pfs – no sleep
        • Contiguity
        • Region: GFP_HIGHMEM, GFP_DMA, GFP_KERNEL
      • Slab allocator
    17. Manipulating User memory
      • Remapping page frames
      • Handling page faults
        • Define vm_operations with a page fault handler
        • Mark page frames to fault (e.g. fork in copy on write)
    18. static int fb_mmap(struct file *file, struct vm_area_struct * vma) { int fbidx = iminor(file->f_path.dentry->d_inode); struct fb_info *info = registered_fb[fbidx]; unsigned long off; unsigned long start; u32 len; ... off = vma->vm_pgoff << PAGE_SHIFT; ... lock_kernel(); ... /* frame buffer memory */ start = info->fix.smem_start; len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len); ... unlock_kernel(); start &= PAGE_MASK; .... off += start; vma->vm_pgoff = off >> PAGE_SHIFT; vma->vm_flags |= VM_IO | VM_RESERVED; ... if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN; return 0; } int register_framebuffer(struct fb_info *fb_info) { ... registered_fb[i] = fb_info; ... return 0; } static int __devinit nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) { struct fb_info *info; info = framebuffer_alloc(sizeof(struct nvidia_par), &pd->dev); ... nvidiafb_fix.smem_start = pci_resource_start(pd, 1); ... if (register_framebuffer(info) < 0) { printk(KERN_ERR PFX &quot;error registering nVidia framebuffer &quot;); ... } ... return 0; } NVIDIA FRAME BUFFER
    19. Manipulating VMA static int snd_pcm_mmap_status_fault(struct vm_area_struct *area, struct vm_fault *vmf) { struct snd_pcm_substream *substream = area->vm_private_data; struct snd_pcm_runtime *runtime; runtime = substream->runtime; vmf->page = virt_to_page(runtime->status); get_page(vmf->page); return 0; } static struct vm_operations_struct snd_pcm_vm_ops_status = { .fault = snd_pcm_mmap_status_fault, }; static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area) { long size; if (!(area->vm_flags & VM_READ)) return -EINVAL; size = area->vm_end - area->vm_start; if (size != PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status))) return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_status; area->vm_private_data = substream; area->vm_flags |= VM_RESERVED; return 0; }
    20. I/O
      • Control data:
        • I/O memory remapping
      • Data transfer:
        • DMA
        • PCI Scatter Gather
    21. static int qla2x00_iospace_config(scsi_qla_host_t *ha) { resource_size_t pio; if (pci_request_selected_regions(ha->pdev, ha->bars, QLA2XXX_DRIVER_NAME)) { goto iospace_error_exit; } /* Use MMIO operations for all accesses. */ if (!(pci_resource_flags(ha->pdev, 1) & IORESOURCE_MEM)) { goto iospace_error_exit; } if (pci_resource_len(ha->pdev, 1) < MIN_IOBASE_LEN) { goto iospace_error_exit; } ha->iobase = ioremap(pci_resource_start(ha->pdev, 1), MIN_IOBASE_LEN); if (!ha->iobase) { goto iospace_error_exit; } return (0); iospace_error_exit: return (-ENOMEM); } #define WRT_REG_WORD(addr, data) writew(data,addr) #define RD_REG_WORD_RELAXED(addr) readw_relaxed(addr) #define ISP_REQ_Q_IN(ha, reg) (IS_QLA2100(ha) || IS_QLA2200(ha) ? &(reg)->u.isp2100.mailbox4 : &(reg)->u.isp2300.req_q_in) int qla2x00_start_scsi(srb_t *sp) { scsi_qla_host_t *ha; ... if (scsi_sg_count(cmd)) { nseg = dma_map_sg(&ha->pdev->dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction); } else nseg = 0; ... /* Set chip new ring index. */ WRT_REG_WORD(ISP_REQ_Q_IN(ha, reg), ha->req_ring_index); RD_REG_WORD_RELAXED(ISP_REQ_Q_IN(ha, reg)); /* PCI Posting. */ }
    22. Know your Subsystem
      • Specific structures
        • Interface (entry points)
        • The resource objects
      • Specific registration interface
      • Specific objects
    23. References
      • Understanding the Linux Kernel (Daniel Bovet, Marco Cesati)
      • Linux Device Drivers (Alessandro Rubini)
      • Linux Kernel Development (Robert Lowe)
      • Essential Linux Device Drivers (Sreekrishman Venkateswaran)
      • Kernel Documentation
      • Code
      • http://www.gelato.unsw.edu.au/~dsw/public-files/kernel-docs/kernel-api/
    SlideShare Zeitgeist 2009

    + Ahmed MekkawyAhmed Mekkawy Nominate

    custom

    707 views, 2 favs, 2 embeds more stats

    The introduction to Kernel Programming session that more

    More info about this document

    © All Rights Reserved

    Go to text version

    • Total Views 707
      • 684 on SlideShare
      • 23 from embeds
    • Comments 0
    • Favorites 2
    • Downloads 43
    Most viewed embeds
    • 14 views on http://www.eglug.org
    • 9 views on http://eglug.org

    more

    All embeds
    • 14 views on http://www.eglug.org
    • 9 views on http://eglug.org

    less

    Flagged as inappropriate Flag as inappropriate
    Flag as inappropriate

    Select your reason for flagging this presentation as inappropriate. If needed, use the feedback form to let us know more details.

    Cancel
    File a copyright complaint
    Having problems? Go to our helpdesk?

    Categories