Root/
1 | /* |
2 | * Copyright (C) 2008 Oracle. All rights reserved. |
3 | * |
4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public |
6 | * License v2 as published by the Free Software Foundation. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public |
14 | * License along with this program; if not, write to the |
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
16 | * Boston, MA 021110-1307, USA. |
17 | */ |
18 | |
19 | #include <linux/kernel.h> |
20 | #include <linux/bio.h> |
21 | #include <linux/buffer_head.h> |
22 | #include <linux/file.h> |
23 | #include <linux/fs.h> |
24 | #include <linux/pagemap.h> |
25 | #include <linux/highmem.h> |
26 | #include <linux/time.h> |
27 | #include <linux/init.h> |
28 | #include <linux/string.h> |
29 | #include <linux/backing-dev.h> |
30 | #include <linux/mpage.h> |
31 | #include <linux/swap.h> |
32 | #include <linux/writeback.h> |
33 | #include <linux/bit_spinlock.h> |
34 | #include <linux/slab.h> |
35 | #include "compat.h" |
36 | #include "ctree.h" |
37 | #include "disk-io.h" |
38 | #include "transaction.h" |
39 | #include "btrfs_inode.h" |
40 | #include "volumes.h" |
41 | #include "ordered-data.h" |
42 | #include "compression.h" |
43 | #include "extent_io.h" |
44 | #include "extent_map.h" |
45 | |
46 | struct compressed_bio { |
47 | /* number of bios pending for this compressed extent */ |
48 | atomic_t pending_bios; |
49 | |
50 | /* the pages with the compressed data on them */ |
51 | struct page **compressed_pages; |
52 | |
53 | /* inode that owns this data */ |
54 | struct inode *inode; |
55 | |
56 | /* starting offset in the inode for our pages */ |
57 | u64 start; |
58 | |
59 | /* number of bytes in the inode we're working on */ |
60 | unsigned long len; |
61 | |
62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; |
64 | |
65 | /* the compression algorithm for this bio */ |
66 | int compress_type; |
67 | |
68 | /* number of compressed pages in the array */ |
69 | unsigned long nr_pages; |
70 | |
71 | /* IO errors */ |
72 | int errors; |
73 | int mirror_num; |
74 | |
75 | /* for reads, this is the bio we are copying the data into */ |
76 | struct bio *orig_bio; |
77 | |
78 | /* |
79 | * the start of a variable length array of checksums only |
80 | * used by reads |
81 | */ |
82 | u32 sums; |
83 | }; |
84 | |
85 | static inline int compressed_bio_size(struct btrfs_root *root, |
86 | unsigned long disk_size) |
87 | { |
88 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); |
89 | return sizeof(struct compressed_bio) + |
90 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * |
91 | csum_size; |
92 | } |
93 | |
94 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
95 | u64 first_byte, gfp_t gfp_flags) |
96 | { |
97 | int nr_vecs; |
98 | |
99 | nr_vecs = bio_get_nr_vecs(bdev); |
100 | return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); |
101 | } |
102 | |
103 | static int check_compressed_csum(struct inode *inode, |
104 | struct compressed_bio *cb, |
105 | u64 disk_start) |
106 | { |
107 | int ret; |
108 | struct btrfs_root *root = BTRFS_I(inode)->root; |
109 | struct page *page; |
110 | unsigned long i; |
111 | char *kaddr; |
112 | u32 csum; |
113 | u32 *cb_sum = &cb->sums; |
114 | |
115 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
116 | return 0; |
117 | |
118 | for (i = 0; i < cb->nr_pages; i++) { |
119 | page = cb->compressed_pages[i]; |
120 | csum = ~(u32)0; |
121 | |
122 | kaddr = kmap_atomic(page, KM_USER0); |
123 | csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); |
124 | btrfs_csum_final(csum, (char *)&csum); |
125 | kunmap_atomic(kaddr, KM_USER0); |
126 | |
127 | if (csum != *cb_sum) { |
128 | printk(KERN_INFO "btrfs csum failed ino %llu " |
129 | "extent %llu csum %u " |
130 | "wanted %u mirror %d\n", |
131 | (unsigned long long)btrfs_ino(inode), |
132 | (unsigned long long)disk_start, |
133 | csum, *cb_sum, cb->mirror_num); |
134 | ret = -EIO; |
135 | goto fail; |
136 | } |
137 | cb_sum++; |
138 | |
139 | } |
140 | ret = 0; |
141 | fail: |
142 | return ret; |
143 | } |
144 | |
145 | /* when we finish reading compressed pages from the disk, we |
146 | * decompress them and then run the bio end_io routines on the |
147 | * decompressed pages (in the inode address space). |
148 | * |
149 | * This allows the checksumming and other IO error handling routines |
150 | * to work normally |
151 | * |
152 | * The compressed pages are freed here, and it must be run |
153 | * in process context |
154 | */ |
155 | static void end_compressed_bio_read(struct bio *bio, int err) |
156 | { |
157 | struct compressed_bio *cb = bio->bi_private; |
158 | struct inode *inode; |
159 | struct page *page; |
160 | unsigned long index; |
161 | int ret; |
162 | |
163 | if (err) |
164 | cb->errors = 1; |
165 | |
166 | /* if there are more bios still pending for this compressed |
167 | * extent, just exit |
168 | */ |
169 | if (!atomic_dec_and_test(&cb->pending_bios)) |
170 | goto out; |
171 | |
172 | inode = cb->inode; |
173 | ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9); |
174 | if (ret) |
175 | goto csum_failed; |
176 | |
177 | /* ok, we're the last bio for this extent, lets start |
178 | * the decompression. |
179 | */ |
180 | ret = btrfs_decompress_biovec(cb->compress_type, |
181 | cb->compressed_pages, |
182 | cb->start, |
183 | cb->orig_bio->bi_io_vec, |
184 | cb->orig_bio->bi_vcnt, |
185 | cb->compressed_len); |
186 | csum_failed: |
187 | if (ret) |
188 | cb->errors = 1; |
189 | |
190 | /* release the compressed pages */ |
191 | index = 0; |
192 | for (index = 0; index < cb->nr_pages; index++) { |
193 | page = cb->compressed_pages[index]; |
194 | page->mapping = NULL; |
195 | page_cache_release(page); |
196 | } |
197 | |
198 | /* do io completion on the original bio */ |
199 | if (cb->errors) { |
200 | bio_io_error(cb->orig_bio); |
201 | } else { |
202 | int bio_index = 0; |
203 | struct bio_vec *bvec = cb->orig_bio->bi_io_vec; |
204 | |
205 | /* |
206 | * we have verified the checksum already, set page |
207 | * checked so the end_io handlers know about it |
208 | */ |
209 | while (bio_index < cb->orig_bio->bi_vcnt) { |
210 | SetPageChecked(bvec->bv_page); |
211 | bvec++; |
212 | bio_index++; |
213 | } |
214 | bio_endio(cb->orig_bio, 0); |
215 | } |
216 | |
217 | /* finally free the cb struct */ |
218 | kfree(cb->compressed_pages); |
219 | kfree(cb); |
220 | out: |
221 | bio_put(bio); |
222 | } |
223 | |
224 | /* |
225 | * Clear the writeback bits on all of the file |
226 | * pages for a compressed write |
227 | */ |
228 | static noinline int end_compressed_writeback(struct inode *inode, u64 start, |
229 | unsigned long ram_size) |
230 | { |
231 | unsigned long index = start >> PAGE_CACHE_SHIFT; |
232 | unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; |
233 | struct page *pages[16]; |
234 | unsigned long nr_pages = end_index - index + 1; |
235 | int i; |
236 | int ret; |
237 | |
238 | while (nr_pages > 0) { |
239 | ret = find_get_pages_contig(inode->i_mapping, index, |
240 | min_t(unsigned long, |
241 | nr_pages, ARRAY_SIZE(pages)), pages); |
242 | if (ret == 0) { |
243 | nr_pages -= 1; |
244 | index += 1; |
245 | continue; |
246 | } |
247 | for (i = 0; i < ret; i++) { |
248 | end_page_writeback(pages[i]); |
249 | page_cache_release(pages[i]); |
250 | } |
251 | nr_pages -= ret; |
252 | index += ret; |
253 | } |
254 | /* the inode may be gone now */ |
255 | return 0; |
256 | } |
257 | |
258 | /* |
259 | * do the cleanup once all the compressed pages hit the disk. |
260 | * This will clear writeback on the file pages and free the compressed |
261 | * pages. |
262 | * |
263 | * This also calls the writeback end hooks for the file pages so that |
264 | * metadata and checksums can be updated in the file. |
265 | */ |
266 | static void end_compressed_bio_write(struct bio *bio, int err) |
267 | { |
268 | struct extent_io_tree *tree; |
269 | struct compressed_bio *cb = bio->bi_private; |
270 | struct inode *inode; |
271 | struct page *page; |
272 | unsigned long index; |
273 | |
274 | if (err) |
275 | cb->errors = 1; |
276 | |
277 | /* if there are more bios still pending for this compressed |
278 | * extent, just exit |
279 | */ |
280 | if (!atomic_dec_and_test(&cb->pending_bios)) |
281 | goto out; |
282 | |
283 | /* ok, we're the last bio for this extent, step one is to |
284 | * call back into the FS and do all the end_io operations |
285 | */ |
286 | inode = cb->inode; |
287 | tree = &BTRFS_I(inode)->io_tree; |
288 | cb->compressed_pages[0]->mapping = cb->inode->i_mapping; |
289 | tree->ops->writepage_end_io_hook(cb->compressed_pages[0], |
290 | cb->start, |
291 | cb->start + cb->len - 1, |
292 | NULL, 1); |
293 | cb->compressed_pages[0]->mapping = NULL; |
294 | |
295 | end_compressed_writeback(inode, cb->start, cb->len); |
296 | /* note, our inode could be gone now */ |
297 | |
298 | /* |
299 | * release the compressed pages, these came from alloc_page and |
300 | * are not attached to the inode at all |
301 | */ |
302 | index = 0; |
303 | for (index = 0; index < cb->nr_pages; index++) { |
304 | page = cb->compressed_pages[index]; |
305 | page->mapping = NULL; |
306 | page_cache_release(page); |
307 | } |
308 | |
309 | /* finally free the cb struct */ |
310 | kfree(cb->compressed_pages); |
311 | kfree(cb); |
312 | out: |
313 | bio_put(bio); |
314 | } |
315 | |
316 | /* |
317 | * worker function to build and submit bios for previously compressed pages. |
318 | * The corresponding pages in the inode should be marked for writeback |
319 | * and the compressed pages should have a reference on them for dropping |
320 | * when the IO is complete. |
321 | * |
322 | * This also checksums the file bytes and gets things ready for |
323 | * the end io hooks. |
324 | */ |
325 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
326 | unsigned long len, u64 disk_start, |
327 | unsigned long compressed_len, |
328 | struct page **compressed_pages, |
329 | unsigned long nr_pages) |
330 | { |
331 | struct bio *bio = NULL; |
332 | struct btrfs_root *root = BTRFS_I(inode)->root; |
333 | struct compressed_bio *cb; |
334 | unsigned long bytes_left; |
335 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
336 | int pg_index = 0; |
337 | struct page *page; |
338 | u64 first_byte = disk_start; |
339 | struct block_device *bdev; |
340 | int ret; |
341 | |
342 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
343 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
344 | if (!cb) |
345 | return -ENOMEM; |
346 | atomic_set(&cb->pending_bios, 0); |
347 | cb->errors = 0; |
348 | cb->inode = inode; |
349 | cb->start = start; |
350 | cb->len = len; |
351 | cb->mirror_num = 0; |
352 | cb->compressed_pages = compressed_pages; |
353 | cb->compressed_len = compressed_len; |
354 | cb->orig_bio = NULL; |
355 | cb->nr_pages = nr_pages; |
356 | |
357 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
358 | |
359 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
360 | if(!bio) { |
361 | kfree(cb); |
362 | return -ENOMEM; |
363 | } |
364 | bio->bi_private = cb; |
365 | bio->bi_end_io = end_compressed_bio_write; |
366 | atomic_inc(&cb->pending_bios); |
367 | |
368 | /* create and submit bios for the compressed pages */ |
369 | bytes_left = compressed_len; |
370 | for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { |
371 | page = compressed_pages[pg_index]; |
372 | page->mapping = inode->i_mapping; |
373 | if (bio->bi_size) |
374 | ret = io_tree->ops->merge_bio_hook(page, 0, |
375 | PAGE_CACHE_SIZE, |
376 | bio, 0); |
377 | else |
378 | ret = 0; |
379 | |
380 | page->mapping = NULL; |
381 | if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < |
382 | PAGE_CACHE_SIZE) { |
383 | bio_get(bio); |
384 | |
385 | /* |
386 | * inc the count before we submit the bio so |
387 | * we know the end IO handler won't happen before |
388 | * we inc the count. Otherwise, the cb might get |
389 | * freed before we're done setting it up |
390 | */ |
391 | atomic_inc(&cb->pending_bios); |
392 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
393 | BUG_ON(ret); |
394 | |
395 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); |
396 | BUG_ON(ret); |
397 | |
398 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
399 | BUG_ON(ret); |
400 | |
401 | bio_put(bio); |
402 | |
403 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); |
404 | bio->bi_private = cb; |
405 | bio->bi_end_io = end_compressed_bio_write; |
406 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); |
407 | } |
408 | if (bytes_left < PAGE_CACHE_SIZE) { |
409 | printk("bytes left %lu compress len %lu nr %lu\n", |
410 | bytes_left, cb->compressed_len, cb->nr_pages); |
411 | } |
412 | bytes_left -= PAGE_CACHE_SIZE; |
413 | first_byte += PAGE_CACHE_SIZE; |
414 | cond_resched(); |
415 | } |
416 | bio_get(bio); |
417 | |
418 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
419 | BUG_ON(ret); |
420 | |
421 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); |
422 | BUG_ON(ret); |
423 | |
424 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
425 | BUG_ON(ret); |
426 | |
427 | bio_put(bio); |
428 | return 0; |
429 | } |
430 | |
431 | static noinline int add_ra_bio_pages(struct inode *inode, |
432 | u64 compressed_end, |
433 | struct compressed_bio *cb) |
434 | { |
435 | unsigned long end_index; |
436 | unsigned long pg_index; |
437 | u64 last_offset; |
438 | u64 isize = i_size_read(inode); |
439 | int ret; |
440 | struct page *page; |
441 | unsigned long nr_pages = 0; |
442 | struct extent_map *em; |
443 | struct address_space *mapping = inode->i_mapping; |
444 | struct extent_map_tree *em_tree; |
445 | struct extent_io_tree *tree; |
446 | u64 end; |
447 | int misses = 0; |
448 | |
449 | page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page; |
450 | last_offset = (page_offset(page) + PAGE_CACHE_SIZE); |
451 | em_tree = &BTRFS_I(inode)->extent_tree; |
452 | tree = &BTRFS_I(inode)->io_tree; |
453 | |
454 | if (isize == 0) |
455 | return 0; |
456 | |
457 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; |
458 | |
459 | while (last_offset < compressed_end) { |
460 | pg_index = last_offset >> PAGE_CACHE_SHIFT; |
461 | |
462 | if (pg_index > end_index) |
463 | break; |
464 | |
465 | rcu_read_lock(); |
466 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
467 | rcu_read_unlock(); |
468 | if (page) { |
469 | misses++; |
470 | if (misses > 4) |
471 | break; |
472 | goto next; |
473 | } |
474 | |
475 | page = __page_cache_alloc(mapping_gfp_mask(mapping) & |
476 | ~__GFP_FS); |
477 | if (!page) |
478 | break; |
479 | |
480 | if (add_to_page_cache_lru(page, mapping, pg_index, |
481 | GFP_NOFS)) { |
482 | page_cache_release(page); |
483 | goto next; |
484 | } |
485 | |
486 | end = last_offset + PAGE_CACHE_SIZE - 1; |
487 | /* |
488 | * at this point, we have a locked page in the page cache |
489 | * for these bytes in the file. But, we have to make |
490 | * sure they map to this compressed extent on disk. |
491 | */ |
492 | set_page_extent_mapped(page); |
493 | lock_extent(tree, last_offset, end, GFP_NOFS); |
494 | read_lock(&em_tree->lock); |
495 | em = lookup_extent_mapping(em_tree, last_offset, |
496 | PAGE_CACHE_SIZE); |
497 | read_unlock(&em_tree->lock); |
498 | |
499 | if (!em || last_offset < em->start || |
500 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || |
501 | (em->block_start >> 9) != cb->orig_bio->bi_sector) { |
502 | free_extent_map(em); |
503 | unlock_extent(tree, last_offset, end, GFP_NOFS); |
504 | unlock_page(page); |
505 | page_cache_release(page); |
506 | break; |
507 | } |
508 | free_extent_map(em); |
509 | |
510 | if (page->index == end_index) { |
511 | char *userpage; |
512 | size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1); |
513 | |
514 | if (zero_offset) { |
515 | int zeros; |
516 | zeros = PAGE_CACHE_SIZE - zero_offset; |
517 | userpage = kmap_atomic(page, KM_USER0); |
518 | memset(userpage + zero_offset, 0, zeros); |
519 | flush_dcache_page(page); |
520 | kunmap_atomic(userpage, KM_USER0); |
521 | } |
522 | } |
523 | |
524 | ret = bio_add_page(cb->orig_bio, page, |
525 | PAGE_CACHE_SIZE, 0); |
526 | |
527 | if (ret == PAGE_CACHE_SIZE) { |
528 | nr_pages++; |
529 | page_cache_release(page); |
530 | } else { |
531 | unlock_extent(tree, last_offset, end, GFP_NOFS); |
532 | unlock_page(page); |
533 | page_cache_release(page); |
534 | break; |
535 | } |
536 | next: |
537 | last_offset += PAGE_CACHE_SIZE; |
538 | } |
539 | return 0; |
540 | } |
541 | |
542 | /* |
543 | * for a compressed read, the bio we get passed has all the inode pages |
544 | * in it. We don't actually do IO on those pages but allocate new ones |
545 | * to hold the compressed pages on disk. |
546 | * |
547 | * bio->bi_sector points to the compressed extent on disk |
548 | * bio->bi_io_vec points to all of the inode pages |
549 | * bio->bi_vcnt is a count of pages |
550 | * |
551 | * After the compressed pages are read, we copy the bytes into the |
552 | * bio we were passed and then call the bio end_io calls |
553 | */ |
554 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
555 | int mirror_num, unsigned long bio_flags) |
556 | { |
557 | struct extent_io_tree *tree; |
558 | struct extent_map_tree *em_tree; |
559 | struct compressed_bio *cb; |
560 | struct btrfs_root *root = BTRFS_I(inode)->root; |
561 | unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; |
562 | unsigned long compressed_len; |
563 | unsigned long nr_pages; |
564 | unsigned long pg_index; |
565 | struct page *page; |
566 | struct block_device *bdev; |
567 | struct bio *comp_bio; |
568 | u64 cur_disk_byte = (u64)bio->bi_sector << 9; |
569 | u64 em_len; |
570 | u64 em_start; |
571 | struct extent_map *em; |
572 | int ret = -ENOMEM; |
573 | u32 *sums; |
574 | |
575 | tree = &BTRFS_I(inode)->io_tree; |
576 | em_tree = &BTRFS_I(inode)->extent_tree; |
577 | |
578 | /* we need the actual starting offset of this extent in the file */ |
579 | read_lock(&em_tree->lock); |
580 | em = lookup_extent_mapping(em_tree, |
581 | page_offset(bio->bi_io_vec->bv_page), |
582 | PAGE_CACHE_SIZE); |
583 | read_unlock(&em_tree->lock); |
584 | |
585 | compressed_len = em->block_len; |
586 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
587 | if (!cb) |
588 | goto out; |
589 | |
590 | atomic_set(&cb->pending_bios, 0); |
591 | cb->errors = 0; |
592 | cb->inode = inode; |
593 | cb->mirror_num = mirror_num; |
594 | sums = &cb->sums; |
595 | |
596 | cb->start = em->orig_start; |
597 | em_len = em->len; |
598 | em_start = em->start; |
599 | |
600 | free_extent_map(em); |
601 | em = NULL; |
602 | |
603 | cb->len = uncompressed_len; |
604 | cb->compressed_len = compressed_len; |
605 | cb->compress_type = extent_compress_type(bio_flags); |
606 | cb->orig_bio = bio; |
607 | |
608 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
609 | PAGE_CACHE_SIZE; |
610 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
611 | GFP_NOFS); |
612 | if (!cb->compressed_pages) |
613 | goto fail1; |
614 | |
615 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
616 | |
617 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
618 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | |
619 | __GFP_HIGHMEM); |
620 | if (!cb->compressed_pages[pg_index]) |
621 | goto fail2; |
622 | } |
623 | cb->nr_pages = nr_pages; |
624 | |
625 | add_ra_bio_pages(inode, em_start + em_len, cb); |
626 | |
627 | /* include any pages we added in add_ra-bio_pages */ |
628 | uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; |
629 | cb->len = uncompressed_len; |
630 | |
631 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
632 | if (!comp_bio) |
633 | goto fail2; |
634 | comp_bio->bi_private = cb; |
635 | comp_bio->bi_end_io = end_compressed_bio_read; |
636 | atomic_inc(&cb->pending_bios); |
637 | |
638 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
639 | page = cb->compressed_pages[pg_index]; |
640 | page->mapping = inode->i_mapping; |
641 | page->index = em_start >> PAGE_CACHE_SHIFT; |
642 | |
643 | if (comp_bio->bi_size) |
644 | ret = tree->ops->merge_bio_hook(page, 0, |
645 | PAGE_CACHE_SIZE, |
646 | comp_bio, 0); |
647 | else |
648 | ret = 0; |
649 | |
650 | page->mapping = NULL; |
651 | if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) < |
652 | PAGE_CACHE_SIZE) { |
653 | bio_get(comp_bio); |
654 | |
655 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
656 | BUG_ON(ret); |
657 | |
658 | /* |
659 | * inc the count before we submit the bio so |
660 | * we know the end IO handler won't happen before |
661 | * we inc the count. Otherwise, the cb might get |
662 | * freed before we're done setting it up |
663 | */ |
664 | atomic_inc(&cb->pending_bios); |
665 | |
666 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
667 | ret = btrfs_lookup_bio_sums(root, inode, |
668 | comp_bio, sums); |
669 | BUG_ON(ret); |
670 | } |
671 | sums += (comp_bio->bi_size + root->sectorsize - 1) / |
672 | root->sectorsize; |
673 | |
674 | ret = btrfs_map_bio(root, READ, comp_bio, |
675 | mirror_num, 0); |
676 | BUG_ON(ret); |
677 | |
678 | bio_put(comp_bio); |
679 | |
680 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, |
681 | GFP_NOFS); |
682 | comp_bio->bi_private = cb; |
683 | comp_bio->bi_end_io = end_compressed_bio_read; |
684 | |
685 | bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0); |
686 | } |
687 | cur_disk_byte += PAGE_CACHE_SIZE; |
688 | } |
689 | bio_get(comp_bio); |
690 | |
691 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
692 | BUG_ON(ret); |
693 | |
694 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
695 | ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
696 | BUG_ON(ret); |
697 | } |
698 | |
699 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
700 | BUG_ON(ret); |
701 | |
702 | bio_put(comp_bio); |
703 | return 0; |
704 | |
705 | fail2: |
706 | for (pg_index = 0; pg_index < nr_pages; pg_index++) |
707 | free_page((unsigned long)cb->compressed_pages[pg_index]); |
708 | |
709 | kfree(cb->compressed_pages); |
710 | fail1: |
711 | kfree(cb); |
712 | out: |
713 | free_extent_map(em); |
714 | return ret; |
715 | } |
716 | |
717 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; |
718 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; |
719 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; |
720 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; |
721 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; |
722 | |
723 | struct btrfs_compress_op *btrfs_compress_op[] = { |
724 | &btrfs_zlib_compress, |
725 | &btrfs_lzo_compress, |
726 | }; |
727 | |
728 | int __init btrfs_init_compress(void) |
729 | { |
730 | int i; |
731 | |
732 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { |
733 | INIT_LIST_HEAD(&comp_idle_workspace[i]); |
734 | spin_lock_init(&comp_workspace_lock[i]); |
735 | atomic_set(&comp_alloc_workspace[i], 0); |
736 | init_waitqueue_head(&comp_workspace_wait[i]); |
737 | } |
738 | return 0; |
739 | } |
740 | |
741 | /* |
742 | * this finds an available workspace or allocates a new one |
743 | * ERR_PTR is returned if things go bad. |
744 | */ |
745 | static struct list_head *find_workspace(int type) |
746 | { |
747 | struct list_head *workspace; |
748 | int cpus = num_online_cpus(); |
749 | int idx = type - 1; |
750 | |
751 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; |
752 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; |
753 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; |
754 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; |
755 | int *num_workspace = &comp_num_workspace[idx]; |
756 | again: |
757 | spin_lock(workspace_lock); |
758 | if (!list_empty(idle_workspace)) { |
759 | workspace = idle_workspace->next; |
760 | list_del(workspace); |
761 | (*num_workspace)--; |
762 | spin_unlock(workspace_lock); |
763 | return workspace; |
764 | |
765 | } |
766 | if (atomic_read(alloc_workspace) > cpus) { |
767 | DEFINE_WAIT(wait); |
768 | |
769 | spin_unlock(workspace_lock); |
770 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); |
771 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) |
772 | schedule(); |
773 | finish_wait(workspace_wait, &wait); |
774 | goto again; |
775 | } |
776 | atomic_inc(alloc_workspace); |
777 | spin_unlock(workspace_lock); |
778 | |
779 | workspace = btrfs_compress_op[idx]->alloc_workspace(); |
780 | if (IS_ERR(workspace)) { |
781 | atomic_dec(alloc_workspace); |
782 | wake_up(workspace_wait); |
783 | } |
784 | return workspace; |
785 | } |
786 | |
787 | /* |
788 | * put a workspace struct back on the list or free it if we have enough |
789 | * idle ones sitting around |
790 | */ |
791 | static void free_workspace(int type, struct list_head *workspace) |
792 | { |
793 | int idx = type - 1; |
794 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; |
795 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; |
796 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; |
797 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; |
798 | int *num_workspace = &comp_num_workspace[idx]; |
799 | |
800 | spin_lock(workspace_lock); |
801 | if (*num_workspace < num_online_cpus()) { |
802 | list_add_tail(workspace, idle_workspace); |
803 | (*num_workspace)++; |
804 | spin_unlock(workspace_lock); |
805 | goto wake; |
806 | } |
807 | spin_unlock(workspace_lock); |
808 | |
809 | btrfs_compress_op[idx]->free_workspace(workspace); |
810 | atomic_dec(alloc_workspace); |
811 | wake: |
812 | if (waitqueue_active(workspace_wait)) |
813 | wake_up(workspace_wait); |
814 | } |
815 | |
816 | /* |
817 | * cleanup function for module exit |
818 | */ |
819 | static void free_workspaces(void) |
820 | { |
821 | struct list_head *workspace; |
822 | int i; |
823 | |
824 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { |
825 | while (!list_empty(&comp_idle_workspace[i])) { |
826 | workspace = comp_idle_workspace[i].next; |
827 | list_del(workspace); |
828 | btrfs_compress_op[i]->free_workspace(workspace); |
829 | atomic_dec(&comp_alloc_workspace[i]); |
830 | } |
831 | } |
832 | } |
833 | |
834 | /* |
835 | * given an address space and start/len, compress the bytes. |
836 | * |
837 | * pages are allocated to hold the compressed result and stored |
838 | * in 'pages' |
839 | * |
840 | * out_pages is used to return the number of pages allocated. There |
841 | * may be pages allocated even if we return an error |
842 | * |
843 | * total_in is used to return the number of bytes actually read. It |
844 | * may be smaller then len if we had to exit early because we |
845 | * ran out of room in the pages array or because we cross the |
846 | * max_out threshold. |
847 | * |
848 | * total_out is used to return the total number of compressed bytes |
849 | * |
850 | * max_out tells us the max number of bytes that we're allowed to |
851 | * stuff into pages |
852 | */ |
853 | int btrfs_compress_pages(int type, struct address_space *mapping, |
854 | u64 start, unsigned long len, |
855 | struct page **pages, |
856 | unsigned long nr_dest_pages, |
857 | unsigned long *out_pages, |
858 | unsigned long *total_in, |
859 | unsigned long *total_out, |
860 | unsigned long max_out) |
861 | { |
862 | struct list_head *workspace; |
863 | int ret; |
864 | |
865 | workspace = find_workspace(type); |
866 | if (IS_ERR(workspace)) |
867 | return -1; |
868 | |
869 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, |
870 | start, len, pages, |
871 | nr_dest_pages, out_pages, |
872 | total_in, total_out, |
873 | max_out); |
874 | free_workspace(type, workspace); |
875 | return ret; |
876 | } |
877 | |
878 | /* |
879 | * pages_in is an array of pages with compressed data. |
880 | * |
881 | * disk_start is the starting logical offset of this array in the file |
882 | * |
883 | * bvec is a bio_vec of pages from the file that we want to decompress into |
884 | * |
885 | * vcnt is the count of pages in the biovec |
886 | * |
887 | * srclen is the number of bytes in pages_in |
888 | * |
889 | * The basic idea is that we have a bio that was created by readpages. |
890 | * The pages in the bio are for the uncompressed data, and they may not |
891 | * be contiguous. They all correspond to the range of bytes covered by |
892 | * the compressed extent. |
893 | */ |
894 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
895 | struct bio_vec *bvec, int vcnt, size_t srclen) |
896 | { |
897 | struct list_head *workspace; |
898 | int ret; |
899 | |
900 | workspace = find_workspace(type); |
901 | if (IS_ERR(workspace)) |
902 | return -ENOMEM; |
903 | |
904 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, |
905 | disk_start, |
906 | bvec, vcnt, srclen); |
907 | free_workspace(type, workspace); |
908 | return ret; |
909 | } |
910 | |
911 | /* |
912 | * a less complex decompression routine. Our compressed data fits in a |
913 | * single page, and we want to read a single page out of it. |
914 | * start_byte tells us the offset into the compressed data we're interested in |
915 | */ |
916 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
917 | unsigned long start_byte, size_t srclen, size_t destlen) |
918 | { |
919 | struct list_head *workspace; |
920 | int ret; |
921 | |
922 | workspace = find_workspace(type); |
923 | if (IS_ERR(workspace)) |
924 | return -ENOMEM; |
925 | |
926 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, |
927 | dest_page, start_byte, |
928 | srclen, destlen); |
929 | |
930 | free_workspace(type, workspace); |
931 | return ret; |
932 | } |
933 | |
934 | void btrfs_exit_compress(void) |
935 | { |
936 | free_workspaces(); |
937 | } |
938 | |
939 | /* |
940 | * Copy uncompressed data from working buffer to pages. |
941 | * |
942 | * buf_start is the byte offset we're of the start of our workspace buffer. |
943 | * |
944 | * total_out is the last byte of the buffer |
945 | */ |
946 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
947 | unsigned long total_out, u64 disk_start, |
948 | struct bio_vec *bvec, int vcnt, |
949 | unsigned long *pg_index, |
950 | unsigned long *pg_offset) |
951 | { |
952 | unsigned long buf_offset; |
953 | unsigned long current_buf_start; |
954 | unsigned long start_byte; |
955 | unsigned long working_bytes = total_out - buf_start; |
956 | unsigned long bytes; |
957 | char *kaddr; |
958 | struct page *page_out = bvec[*pg_index].bv_page; |
959 | |
960 | /* |
961 | * start byte is the first byte of the page we're currently |
962 | * copying into relative to the start of the compressed data. |
963 | */ |
964 | start_byte = page_offset(page_out) - disk_start; |
965 | |
966 | /* we haven't yet hit data corresponding to this page */ |
967 | if (total_out <= start_byte) |
968 | return 1; |
969 | |
970 | /* |
971 | * the start of the data we care about is offset into |
972 | * the middle of our working buffer |
973 | */ |
974 | if (total_out > start_byte && buf_start < start_byte) { |
975 | buf_offset = start_byte - buf_start; |
976 | working_bytes -= buf_offset; |
977 | } else { |
978 | buf_offset = 0; |
979 | } |
980 | current_buf_start = buf_start; |
981 | |
982 | /* copy bytes from the working buffer into the pages */ |
983 | while (working_bytes > 0) { |
984 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, |
985 | PAGE_CACHE_SIZE - buf_offset); |
986 | bytes = min(bytes, working_bytes); |
987 | kaddr = kmap_atomic(page_out, KM_USER0); |
988 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); |
989 | kunmap_atomic(kaddr, KM_USER0); |
990 | flush_dcache_page(page_out); |
991 | |
992 | *pg_offset += bytes; |
993 | buf_offset += bytes; |
994 | working_bytes -= bytes; |
995 | current_buf_start += bytes; |
996 | |
997 | /* check if we need to pick another page */ |
998 | if (*pg_offset == PAGE_CACHE_SIZE) { |
999 | (*pg_index)++; |
1000 | if (*pg_index >= vcnt) |
1001 | return 0; |
1002 | |
1003 | page_out = bvec[*pg_index].bv_page; |
1004 | *pg_offset = 0; |
1005 | start_byte = page_offset(page_out) - disk_start; |
1006 | |
1007 | /* |
1008 | * make sure our new page is covered by this |
1009 | * working buffer |
1010 | */ |
1011 | if (total_out <= start_byte) |
1012 | return 1; |
1013 | |
1014 | /* |
1015 | * the next page in the biovec might not be adjacent |
1016 | * to the last page, but it might still be found |
1017 | * inside this working buffer. bump our offset pointer |
1018 | */ |
1019 | if (total_out > start_byte && |
1020 | current_buf_start < start_byte) { |
1021 | buf_offset = start_byte - buf_start; |
1022 | working_bytes = total_out - start_byte; |
1023 | current_buf_start = buf_start + buf_offset; |
1024 | } |
1025 | } |
1026 | } |
1027 | |
1028 | return 1; |
1029 | } |
1030 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9