Your SlideShare is downloading. ×
0
sys_read()                                                                    fs/read_write.c                             ...
mm/filemap.cdo_generic_mapping_read()                                                                  no                 ...
do_generic_mapping_read()                     mm/readahead.c                                                              ...
submit_bio()                                                                     mm/mpage.c                               ...
drivers/ide/ide-generic.cide_generic_init()                                                                         driver...
include/linux/blkdev.h q->make_request_fn()                                                                block/elevator....
include/linux/blkdev.h q->make_request_fn()                                                              block/elevator.c ...
drivers/ide/ide-io.cide_do_request()                                                   block/elevator.c                   ...
static void *noop_init_queue(request_queue_t *q, elevator_t *e)                       block/noop-iosched.c{             st...
The runtime data of Deadline i/o scheduler                      block/deadline-iosched.cstruct deadline_data {     /* requ...
block/deadline-iosched.cAdd a request to both rb tree and fifo liststatic voiddeadline_add_request(struct request_queue *q...
struct list_head fifo_list[READ]           6   4   5                      block/deadline-iosched.cstruct rb_root sort_list...
struct list_head fifo_list[READ]        6   4   5                   block/deadline-iosched.cstruct rb_root sort_list[READ]...
struct list_head fifo_list[READ]            6   4   5                      block/deadline-iosched.cstruct rb_root sort_lis...
block/deadline-iosched.cDispatch the request, remove it from the elevator’s private queueand put it in the dispatch queue....
block/as-iosched.cstatic void as_add_request(request_queue_t *q, struct request *rq){         struct as_data *ad = q->elev...
as_add_request()                                                           include/linux/list.h                           ...
as_add_request()                                                           include/linux/list.h                           ...
static voidas_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq){    ...    if (data_dir == ...
as_add_request()                                                           include/linux/list.h                           ...
enum anticipation_status {     ANTIC_OFF = 0,   /* Not anticipating (normal operation) */     ANTIC_WAIT_REQ, /* The last ...
/* * This is called directly by the functions in this file to stop anticipation. * We kill the timer and schedule a call t...
/* * as_update_rq must be called whenever a request (rq) is added to * the sort_list. This function keeps caches up to dat...
/* * This is executed in a "deferred" process context,   by kblockd. It calls the * drivers request_fn so the driver can s...
/* * as_antic_timeout is the timer function set by as_antic_waitnext. */static void as_antic_timeout(unsigned long data){ ...
static void as_put_io_context(struct request *rq){             struct as_io_context *aic;            if (unlikely(!RQ_IOC(...
as_update_rq()                                  as_choose_req()                     as_can_anticipate()as_update_seekdist(...
genhd_device_init()   genhd_device_init()       blk_dev_init()          create_workqueue("kblockd")                       ...
void blk_start_queueing(request_queue_t *q){            if (!blk_queue_plugged(q))                              blk_start_...
block/ll_rw_blk.cvoid blk_queue_make_request(request_queue_t * q, make_request_fn * mfn){                      q->nr_reque...
include/linux/blkdev.hrequest_queue_t *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id){         r...
sys_read()ssize_t sys_read(unsigned int fd, char __user * buf, size_t count){   struct file *file;   ssize_t ret = -EBADF;...
vfs_read()ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos){   if (!(file->f_mode & FMODE_R...
do_sync_read()ssize_t do_sync_read(struct file *filp, char __user *buf,                 size_t len, loff_t *ppos){   struc...
generic_file_aio_read()                                 1/2ssize_t generic_file_aio_read(struct kiocb *iocb, const struct ...
generic_file_aio_read()                                2/2   retval = 0;   if (count) {        for (seg = 0; seg < nr_segs...
do_generic_file_read()static inline void do_generic_file_read(struct file * filp,                               loff_t *pp...
Upcoming SlideShare
Loading in...5
×

Linux I/O path_20070116

2,327

Published on

outdated

1 Comment
5 Likes
Statistics
Notes
No Downloads
Views
Total Views
2,327
On Slideshare
0
From Embeds
0
Number of Embeds
4
Actions
Shares
0
Downloads
136
Comments
1
Likes
5
Embeds 0
No embeds

No notes for slide

Transcript of "Linux I/O path_20070116"

  1. 1. sys_read() fs/read_write.c mm/filemap.c fget_light() include/linux/fs.h fs/ext2/file.c vfs_read() file->f_op->read() do_sync_read() filp->f_op->aio_read() generic_file_aio_read() generic_file_direct_IO() do_generic_file_read() do_generic_mapping_read() Page Cache Generic Block Layer Elevator I/O Scheduler Request Queue 1 Device Driver
  2. 2. mm/filemap.cdo_generic_mapping_read() no find page cached page page cache readahead page readpage not readpage error up to date continue page ok out 2
  3. 3. do_generic_mapping_read() mm/readahead.c mm/filemap.cpage_cache_readahead() fs/ext2/inode.c fs/mpage.c blockable_page_cache_readahead() __do_page_cache_readahead() read_pages() mapping->a_ops->readpages() mapping->a_ops->readpage() ext2_readpages() ext2_get_block() ext2_readpage() mpage_readpages() mpage_readpage() do_mpage_readpage() mpage_bio_submit() submit_bio() Generic Block Layer Elevator I/O Scheduler Request Queue 3 Device Driver
  4. 4. submit_bio() mm/mpage.c include/linux/blkdev.h generic_make_request(bio) BLK_TA_QUEUE block/elevator.c block/ll_rw_blk.c q->make_request_fn(q,bio) block/as-iosched.c __make_request(q,bio) elv_merge(q,req,bio) BLK_TA_SLEEPRQ q->back_merge_fn() init_request_from_bio() BLK_TA_GETRQBLK_TA_MERGE ll_merge_requests_fn() get_request_wait() elv_may_queue() elv_merged_request() add_request() get_request() e->ops->elevator_merged_fn() __elv_add_request() current_io_context() elv_insert() blk_alloc_request() BLK_TA_INSERT e->ops->elevator_add_req_fn(q, rq) I/O Scheduler Request Queue 4 Device Driver
  5. 5. drivers/ide/ide-generic.cide_generic_init() drivers/ide/ide-probes.c drivers/ide-io.c ideprobe_init() block/ll_rw_blk.c hwif_init() init_irq() request_irq(hwif->irq,&ide_intr, ...) register IRQ handler ide_init_queue() blk_init_queue_node(do_ide_request, ...) ide_intr() q->request_fn = rfn register I/O request dispatcher do_ide_request() ide_do_request() 5
  6. 6. include/linux/blkdev.h q->make_request_fn() block/elevator.c add_request() block/ll_rw_blk.c block/as-iosched.c __elv_add_request() BLK_TA_PLUG elv_insert() Disk request queue without an I/O scheduler list_add_tail() q->request_fn()BLK_TA_INSERT ide_do_request() elv_next_request() rq = __elv_next_request(q) start_request()BLK_TA_ISSUE(D) Disk interrupt for completion 6
  7. 7. include/linux/blkdev.h q->make_request_fn() block/elevator.c block/ll_rw_blk.c add_request() block/as-iosched.c __elv_add_request() BLK_TA_PLUG elv_insert() Disk request queue with an I/O scheduler e->ops->elevator_add_req_fn(q, rq) q->request_fn()BLK_TA_INSERT ide_do_request() elv_next_request() rq = __elv_next_request(q) e->ops->elevator_dispatch_fn() start_request()BLK_TA_ISSUE(D) Disk interrupt for completion 7
  8. 8. drivers/ide/ide-io.cide_do_request() block/elevator.c block/as-iosched.c rq = elv_next_request(drive->queue) block/ll_rw_blk.c rq = __elv_next_request(q) q->elevator->ops->elevator_dispatch_fn(q, 0)) ISSUE start_request(drive, rq)static inline struct request *__elv_next_request(request_queue_t *q){ struct request *rq; while (1) { Disk while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); if (blk_do_ordered(q, &rq)) return rq; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) return NULL; } 8}
  9. 9. static void *noop_init_queue(request_queue_t *q, elevator_t *e) block/noop-iosched.c{ struct noop_data *nd; nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); if (!nd) Eevator private return NULL; data structure INIT_LIST_HEAD(&nd->queue); return nd;}static void noop_add_request(request_queue_t *q, struct request *rq){ struct noop_data *nd = q->elevator->elevator_data; list_add_tail(&rq->queuelist, &nd->queue);}static int noop_dispatch(request_queue_t *q, int force){ struct noop_data *nd = q->elevator->elevator_data; Disk if (!list_empty(&nd->queue)) { struct request *rq; rq = list_entry(nd->queue.next, struct request, queuelist); list_del_init(&rq->queuelist); elv_dispatch_sort(q, rq); return 1; } return 0; 9}
  10. 10. The runtime data of Deadline i/o scheduler block/deadline-iosched.cstruct deadline_data { /* requests (deadline_rq s) are present on both sort_list and fifo_list */ struct rb_root sort_list[2]; struct list_head fifo_list[2]; /* next in sort order. read, write or both are NULL */ struct request *next_rq[2]; unsigned int batching; /* number of sequential requests made */ sector_t last_sector; /* head position */ unsigned int starved; /* times reads have starved writes */ /* settings that change how the i/o scheduler behaves */ int fifo_expire[2]; int fifo_batch; int writes_starved; int front_merges;}; struct list_head fifo_list[READ] 6 4 5 struct rb_root sort_list[READ] 6 5 4 next_rq[READ] next_rq[WRITE] struct rb_root sort_list[WRITE] 9 8 7 struct list_head fifo_list[WRITE] 10 7 8 9
  11. 11. block/deadline-iosched.cAdd a request to both rb tree and fifo liststatic voiddeadline_add_request(struct request_queue *q, struct request *rq){ struct deadline_data *dd = q->elevator->elevator_data; const int data_dir = rq_data_dir(rq); deadline_add_rq_rb(dd, rq); /* * set expire time (only used for reads) and add to fifo list */ rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);} struct list_head fifo_list[READ] 6 4 5 struct rb_root sort_list[READ] 6 5 4 next_rq[READ] next_rq[WRITE]struct rb_root sort_list[WRITE] 9 8 7 struct list_head fifo_list[WRITE] 11 7 8 9
  12. 12. struct list_head fifo_list[READ] 6 4 5 block/deadline-iosched.cstruct rb_root sort_list[READ] 6 5 4 next_rq[READ] next_rq[WRITE]struct rb_root sort_list[WRITE] 9 8 7struct list_head fifo_list[WRITE] 7 8 91.Check if we are running a sequential batch, and it is still entitled. if (dd->next_rq[WRITE]) rq = dd->next_rq[WRITE]; else rq = dd->next_rq[READ]; if (rq) { /* we have a "next request" */ if (dd->last_sector != rq->sector) /* end the batch on a non sequential request */ dd->batching += dd->fifo_batch; if (dd->batching < dd->fifo_batch) /* we are still entitled to batch */ goto dispatch_request; } 12
  13. 13. struct list_head fifo_list[READ] 6 4 5 block/deadline-iosched.cstruct rb_root sort_list[READ] 6 5 4struct rb_root sort_list[WRITE] 9 8 7struct list_head fifo_list[WRITE] 7 8 92. If we are not running a batch. Choose a new direction to serve requests. A read request is always favored, unless write has been starved. if (reads) { if (writes && (dd->starved++ >= dd->writes_starved)) goto dispatch_writes; data_dir = READ; goto dispatch_find_request; } if (writes) { dispatch_writes: dd->starved = 0; data_dir = WRITE; goto dispatch_find_request; } 13
  14. 14. struct list_head fifo_list[READ] 6 4 5 block/deadline-iosched.cstruct rb_root sort_list[READ] 6 5 4 next_rq[READ]struct rb_root sort_list[READ] 6 5 4 next_rq[READ] 3.Choose an appropriate request.. If the first request of the fifo list has expired, serve it. Otherwise, behave as an “One-way Elevator” dispatch_find_request: if (deadline_check_fifo(dd, data_dir)) { dd->batching = 0; rq = rq_entry_fifo(dd->fifo_list[data_dir].next); } else if (dd->next_rq[data_dir]) { rq = dd->next_rq[data_dir]; } else { struct rb_node *node; dd->batching = 0; node = rb_first(&dd->sort_list[data_dir]); if (node) rq = rb_entry_rq(node); } 14
  15. 15. block/deadline-iosched.cDispatch the request, remove it from the elevator’s private queueand put it in the dispatch queue.Also update the information about the “last” and the “next” request.struct list_head fifo_list[READ] 6 4 5 4struct rb_root sort_list[READ] 6 5 Suppose the request 4 was picked in the previous step. next_rq[READ]static voiddeadline_move_request(struct deadline_data *dd, struct request *rq){ const int data_dir = rq_data_dir(rq); struct rb_node *rbnext = rb_next(&rq->rb_node); Disk dd->next_rq[READ] = NULL; dd->next_rq[WRITE] = NULL; if (rbnext) dd->next_rq[data_dir] = rb_entry_rq(rbnext); dd->last_sector = rq->sector + rq->nr_sectors; deadline_move_to_dispatch(dd, rq);} 15
  16. 16. block/as-iosched.cstatic void as_add_request(request_queue_t *q, struct request *rq){ struct as_data *ad = q->elevator->elevator_data; int data_dir; RQ_SET_STATE(rq, AS_RQ_NEW); data_dir = rq_is_sync(rq); rq->elevator_private = as_get_io_context(q->node); if (RQ_IOC(rq)) { as_update_iohist(ad, RQ_IOC(rq)->aic, rq); atomic_inc(&RQ_IOC(rq)->aic->nr_queued); } as_add_rq_rb(ad, rq); /* * set expire time (only used for reads) and add to fifo list */ rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]); list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]); as_update_rq(ad, rq); /* keep state machine up to date */ RQ_SET_STATE(rq, AS_RQ_QUEUED);} 16
  17. 17. as_add_request() include/linux/list.h include/linux/elevator.h as_get_io_context(q->node) block/ll_rw_blk.c get_io_context() block/as-iosched.c current_io_context() alloc_as_io_context() task_struct io_context as_io_context as_update_iohist() as_update_thinktime() as_update_seekdist() as_add_rq_rb() request request request rq_set_fifo_time() list_add_tail() as_update_rq(ad, rq) task_struct io_context as_io_context as_choose_req() as_can_break_anticipation() as_update_iohist() as_antic_stop() request request del_timer() 17 kblockd_schedule_work()
  18. 18. as_add_request() include/linux/list.h include/linux/elevator.h as_get_io_context(q->node) block/ll_rw_blk.c get_io_context() block/as-iosched.c current_io_context() alloc_as_io_context() task_struct io_context as_io_context as_update_iohist() as_update_thinktime() as_update_seekdist() as_add_rq_rb() request request request rq_set_fifo_time() list_add_tail() as_update_rq(ad, rq) task_struct io_context as_io_context as_choose_req() as_can_break_anticipation() as_update_iohist() as_antic_stop() request request del_timer() 18 kblockd_schedule_work()
  19. 19. static voidas_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq){ ... if (data_dir == REQ_SYNC) { unsigned long in_flight = atomic_read(&aic->nr_queued) + atomic_read(&aic->nr_dispatched); spin_lock(&aic->lock); if (test_bit(AS_TASK_IORUNNING, &aic->state) || test_bit(AS_TASK_IOSTARTED, &aic->state)) { /* Calculate read -> read thinktime */ if (test_bit(AS_TASK_IORUNNING, &aic->state) && in_flight == 0) { thinktime = jiffies - aic->last_end_request; thinktime = min(thinktime, MAX_THINKTIME-1); } as_update_thinktime(ad, aic, thinktime); /* Calculate read -> read seek distance */ if (aic->last_request_pos < rq->sector) seek_dist = rq->sector - aic->last_request_pos; else seek_dist = aic->last_request_pos - rq->sector; as_update_seekdist(ad, aic, seek_dist); } aic->last_request_pos = rq->sector + rq->nr_sectors; set_bit(AS_TASK_IOSTARTED, &aic->state); spin_unlock(&aic->lock); } 19}
  20. 20. as_add_request() include/linux/list.h include/linux/elevator.h as_get_io_context(q->node) block/ll_rw_blk.c get_io_context() block/as-iosched.c current_io_context() alloc_as_io_context() task_struct io_context as_io_context as_update_iohist() as_update_thinktime() as_update_seekdist() as_add_rq_rb() request request request rq_set_fifo_time() list_add_tail() as_update_rq(ad, rq) task_struct io_context as_io_context as_choose_req() as_can_break_anticipation() as_update_iohist() as_antic_stop() request request del_timer() 20 kblockd_schedule_work()
  21. 21. enum anticipation_status { ANTIC_OFF = 0, /* Not anticipating (normal operation) */ ANTIC_WAIT_REQ, /* The last read has not yet completed */ ANTIC_WAIT_NEXT, /* Currently anticipating a request vs last read (which has completed) */ ANTIC_FINISHED, /* Anticipating but have found a candidate or timed out */ }; as_add_request() as_dispatch_request() as_completed_request() as_add_rq_rb()as_update_rq() as_move_to_dispatch() as_antic_waitreq() as_antic_stop() as_antic_timeout() as_antic_waitnext() ANTIC_FINISHED ANTIC_OFF ANTIC_WAIT_REQ ANTIC_WAIT_NEXT 21 kblockd_schedule_work()
  22. 22. /* * This is called directly by the functions in this file to stop anticipation. * We kill the timer and schedule a call to the request_fn asap. */static void as_antic_stop(struct as_data *ad){ int status = ad->antic_status; if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) { if (status == ANTIC_WAIT_NEXT) del_timer(&ad->antic_timer); ad->antic_status = ANTIC_FINISHED; /* see as_work_handler */ kblockd_schedule_work(&ad->antic_work); }} 22
  23. 23. /* * as_update_rq must be called whenever a request (rq) is added to * the sort_list. This function keeps caches up to date, and checks if the * request might be one we are "anticipating" */static void as_update_rq(struct as_data *ad, struct request *rq){ const int data_dir = rq_is_sync(rq); /* keep the next_rq cache up to date */ ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]); /* * have we been anticipating this request? * or does it come from the same process as the one we are anticipating * for? */ if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { if (as_can_break_anticipation(ad, rq)) as_antic_stop(ad); }} 23
  24. 24. /* * This is executed in a "deferred" process context, by kblockd. It calls the * drivers request_fn so the driver can submit that request. * * IMPORTANT! This guy will reenter the elevator, so set up all queue global * state before calling, and dont rely on any state over calls. * * FIXME! dispatch queue is not a queue at all! */static void as_work_handler(void *data){ struct request_queue *q = data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags);} 24
  25. 25. /* * as_antic_timeout is the timer function set by as_antic_waitnext. */static void as_antic_timeout(unsigned long data){ struct request_queue *q = (struct request_queue *)data; struct as_data *ad = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { struct as_io_context *aic = ad->io_context->aic; ad->antic_status = ANTIC_FINISHED; kblockd_schedule_work(&ad->antic_work); if (aic->ttime_samples == 0) { /* process anticipated on has exited or timed out*/ ad->exit_prob = (7*ad->exit_prob + 256)/8; } if (!test_bit(AS_TASK_RUNNING, &aic->state)) { /* process not "saved" by a cooperating request */ ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; } } spin_unlock_irqrestore(q->queue_lock, flags);} 25
  26. 26. static void as_put_io_context(struct request *rq){ struct as_io_context *aic; if (unlikely(!RQ_IOC(rq))) return; aic = RQ_IOC(rq)->aic; if (rq_is_sync(rq) && aic) { spin_lock(&aic->lock); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; spin_unlock(&aic->lock); } put_io_context(RQ_IOC(rq));} 26
  27. 27. as_update_rq() as_choose_req() as_can_anticipate()as_update_seekdist() as_can_break_anticipation() as_can_break_anticipation()as_update_iohist()as_update_thinktime() as_antic_stop() as_antic_expired() as_close_req() as_update_iohist() as_completed_request() as_dispatch_request(() kblockd_schedule_work() as_move_to_dispatch() update_write_batch() as_antic_stop() as_get_io_context() as_antic_waitnext() copy_io_context() get_io_context() as_put_io_context() put_io_context() alloc_as_io_context() as_remove_queued_request() elv_dispatch_sort() as_find_next_rq() as_antic_waitreq() as_fifo_expired() as_batch_expired() 27
  28. 28. genhd_device_init() genhd_device_init() blk_dev_init() create_workqueue("kblockd") 28
  29. 29. void blk_start_queueing(request_queue_t *q){ if (!blk_queue_plugged(q)) blk_start_queueing() q->request_fn(q); else __generic_unplug_device(q);} blk_queue_plugged() not plugged pluggedvoid __generic_unplug_device(request_queue_t *q){ if (unlikely(blk_queue_stopped(q))) __generic_unplug_device() return; if (!blk_remove_plug(q)) blk_remove_plug() return; q->request_fn(q); del_timer()}int blk_remove_plug(request_queue_t *q) q->request_fn(){ WARN_ON(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; del_timer(&q->unplug_timer); return 1; 29}
  30. 30. block/ll_rw_blk.cvoid blk_queue_make_request(request_queue_t * q, make_request_fn * mfn){ q->nr_requests = BLKDEV_MAX_RQ; blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); q->make_request_fn = mfn; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; blk_queue_max_sectors(q, SAFE_MAX_SECTORS); blk_queue_hardsect_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); q->nr_batching = BLK_BATCH_REQ; q->unplug_thresh = 4; /* hmm */ q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ if (q->unplug_delay == 0) q->unplug_delay = 1; INIT_WORK(&q->unplug_work, blk_unplug_work, q); q->unplug_timer.function = blk_unplug_timeout; q->unplug_timer.data = (unsigned long)q; blk_queue_activity_fn(q, NULL, NULL);} 30
  31. 31. include/linux/blkdev.hrequest_queue_t *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id){ request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); if (!q) return NULL; q->node = node_id; blk_init_free_list(q); q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); q->queue_lock = lock; blk_queue_segment_boundary(q, 0xffffffff); blk_queue_make_request(q, __make_request); blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); /* all done */ elevator_init(q, NULL); 31}
  32. 32. sys_read()ssize_t sys_read(unsigned int fd, char __user * buf, size_t count){ struct file *file; ssize_t ret = -EBADF; int fput_needed; file = fget_light(fd, &fput_needed); if (file) { loff_t pos = file_pos_read(file); ret = vfs_read(file, buf, count, &pos); file_pos_write(file, pos); fput_light(file, fput_needed); } return ret;} 32
  33. 33. vfs_read()ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos){ if (!(file->f_mode & FMODE_READ)) return -EBADF; if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) return -EINVAL; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) return -EFAULT; ret = rw_verify_area(READ, file, pos, count); if (ret >= 0) { count = ret; if (file->f_op->read) ret = file->f_op->read(file, buf, count, pos); else ret = do_sync_read(file, buf, count, pos); if (ret > 0) { fsnotify_access(file->f_dentry); current->rchar += ret; } current->syscr++; } return ret;} 33
  34. 34. do_sync_read()ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos){ struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; kiocb.ki_left = len; for (;;) { ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); if (ret != -EIOCBRETRY) break; wait_on_retry_sync_kiocb(&kiocb); } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; return ret;} 34
  35. 35. generic_file_aio_read() 1/2ssize_t generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct file *filp = iocb->ki_filp; ssize_t retval; unsigned long seg; size_t count; loff_t *ppos = &iocb->ki_pos; count = 0; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *iv = &iov[seg]; ... } /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { ... } 35
  36. 36. generic_file_aio_read() 2/2 retval = 0; if (count) { for (seg = 0; seg < nr_segs; seg++) { read_descriptor_t desc; desc.written = 0; desc.arg.buf = iov[seg].iov_base; desc.count = iov[seg].iov_len; if (desc.count == 0) continue; desc.error = 0; do_generic_file_read(filp,ppos,&desc,file_read_actor); retval += desc.written; if (desc.error) { retval = retval ?: desc.error; break; } } }out: return retval;} 36
  37. 37. do_generic_file_read()static inline void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor){ do_generic_mapping_read(filp->f_mapping, &filp->f_ra, filp, ppos, desc, actor);} 37
  1. A particular slide catching your eye?

    Clipping is a handy way to collect important slides you want to go back to later.

×