参照元†
返り値†
/*
* Walk the user pages, and the file, mapping blocks to disk and generating
* a sequence of (page,offset,len,block) mappings. These mappings are injected
* into submit_page_section(), which takes care of the next stage of submission
*
* Direct IO against a blockdev is different from a file. Because we can
* happily perform page-sized but 512-byte aligned IOs. It is important that
* blockdev IO be able to have fine alignment and large sizes.
*
* So what we do is to permit the ->get_block function to populate bh.b_size
* with the size of IO which is permitted at this offset and this i_blkbits.
*
* For best results, the blockdev should be set up with 512-byte i_blkbits and
* it should set b_size to PAGE_SIZE or more inside get_block(). This gives
* fine alignment but still allows this function to work in PAGE_SIZE units.
*/
static int do_direct_IO(struct dio *dio)
{
const unsigned blkbits = dio->blkbits;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
struct page *page;
unsigned block_in_page;
struct buffer_head *map_bh = &dio->map_bh;
int ret = 0;
/* The I/O can start at any block offset within the first page */
block_in_page = dio->first_block_in_page;
while (dio->block_in_file < dio->final_block_in_request) {
page = dio_get_page(dio);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto out;
}
while (block_in_page < blocks_per_page) {
unsigned offset_in_page = block_in_page << blkbits;
unsigned this_chunk_bytes; /* # of bytes mapped */
unsigned this_chunk_blocks; /* # of blocks */
unsigned u;
if (dio->blocks_available == 0) {
/*
* Need to go and map some more disk
*/
unsigned long blkmask;
unsigned long dio_remainder;
ret = get_more_blocks(dio);
if (ret) {
page_cache_release(page);
goto out;
}
if (!buffer_mapped(map_bh))
goto do_holes;
dio->blocks_available =
map_bh->b_size >> dio->blkbits;
dio->next_block_for_io =
map_bh->b_blocknr << dio->blkfactor;
if (buffer_new(map_bh))
clean_blockdev_aliases(dio);
if (!dio->blkfactor)
goto do_holes;
blkmask = (1 << dio->blkfactor) - 1;
dio_remainder = (dio->block_in_file & blkmask);
/*
* If we are at the start of IO and that IO
* starts partway into a fs-block,
* dio_remainder will be non-zero. If the IO
* is a read then we can simply advance the IO
* cursor to the first block which is to be
* read. But if the IO is a write and the
* block was newly allocated we cannot do that;
* the start of the fs block must be zeroed out
* on-disk
*/
if (!buffer_new(map_bh))
dio->next_block_for_io += dio_remainder;
dio->blocks_available -= dio_remainder;
}
do_holes:
/* Handle holes */
if (!buffer_mapped(map_bh)) {
loff_t i_size_aligned;
/* AKPM: eargh, -ENOTBLK is a hack */
if (dio->rw & WRITE) {
page_cache_release(page);
return -ENOTBLK;
}
/*
* Be sure to account for a partial block as the
* last block in the file
*/
i_size_aligned = ALIGN(i_size_read(dio->inode),
1 << blkbits);
if (dio->block_in_file >=
i_size_aligned >> blkbits) {
/* We hit eof */
page_cache_release(page);
goto out;
}
zero_user(page, block_in_page << blkbits,
1 << blkbits);
dio->block_in_file++;
block_in_page++;
goto next_block;
}
/*
* If we're performing IO which has an alignment which
* is finer than the underlying fs, go check to see if
* we must zero out the start of this block.
*/
if (unlikely(dio->blkfactor && !dio->start_zero_done))
dio_zero_block(dio, 0);
/*
* Work out, in this_chunk_blocks, how much disk we
* can add to this page
*/
this_chunk_blocks = dio->blocks_available;
u = (PAGE_SIZE - offset_in_page) >> blkbits;
if (this_chunk_blocks > u)
this_chunk_blocks = u;
u = dio->final_block_in_request - dio->block_in_file;
if (this_chunk_blocks > u)
this_chunk_blocks = u;
this_chunk_bytes = this_chunk_blocks << blkbits;
BUG_ON(this_chunk_bytes == 0);
dio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, page, offset_in_page,
this_chunk_bytes, dio->next_block_for_io);
if (ret) {
page_cache_release(page);
goto out;
}
dio->next_block_for_io += this_chunk_blocks;
dio->block_in_file += this_chunk_blocks;
block_in_page += this_chunk_blocks;
dio->blocks_available -= this_chunk_blocks;
next_block:
BUG_ON(dio->block_in_file > dio->final_block_in_request);
if (dio->block_in_file == dio->final_block_in_request)
break;
}
/* Drop the ref which was taken in get_user_pages() */
page_cache_release(page);
block_in_page = 0;
}
out:
return ret;
}
コメント†