Root/
1 | /* |
2 | * fs/logfs/segment.c - Handling the Object Store |
3 | * |
4 | * As should be obvious for Linux kernel code, license is GPLv2 |
5 | * |
6 | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> |
7 | * |
8 | * Object store or ostore makes up the complete device with exception of |
9 | * the superblock and journal areas. Apart from its own metadata it stores |
10 | * three kinds of objects: inodes, dentries and blocks, both data and indirect. |
11 | */ |
12 | #include "logfs.h" |
13 | #include <linux/slab.h> |
14 | |
15 | static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) |
16 | { |
17 | struct logfs_super *super = logfs_super(sb); |
18 | struct btree_head32 *head = &super->s_reserved_segments; |
19 | int err; |
20 | |
21 | err = btree_insert32(head, segno, (void *)1, GFP_NOFS); |
22 | if (err) |
23 | return err; |
24 | logfs_super(sb)->s_bad_segments++; |
25 | /* FIXME: write to journal */ |
26 | return 0; |
27 | } |
28 | |
29 | int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase) |
30 | { |
31 | struct logfs_super *super = logfs_super(sb); |
32 | |
33 | super->s_gec++; |
34 | |
35 | return super->s_devops->erase(sb, (u64)segno << super->s_segshift, |
36 | super->s_segsize, ensure_erase); |
37 | } |
38 | |
39 | static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes) |
40 | { |
41 | s32 ofs; |
42 | |
43 | logfs_open_area(area, bytes); |
44 | |
45 | ofs = area->a_used_bytes; |
46 | area->a_used_bytes += bytes; |
47 | BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize); |
48 | |
49 | return dev_ofs(area->a_sb, area->a_segno, ofs); |
50 | } |
51 | |
52 | static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, |
53 | int use_filler) |
54 | { |
55 | struct logfs_super *super = logfs_super(sb); |
56 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
57 | filler_t *filler = super->s_devops->readpage; |
58 | struct page *page; |
59 | |
60 | BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); |
61 | if (use_filler) |
62 | page = read_cache_page(mapping, index, filler, sb); |
63 | else { |
64 | page = find_or_create_page(mapping, index, GFP_NOFS); |
65 | unlock_page(page); |
66 | } |
67 | return page; |
68 | } |
69 | |
70 | void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, |
71 | int use_filler) |
72 | { |
73 | pgoff_t index = ofs >> PAGE_SHIFT; |
74 | struct page *page; |
75 | long offset = ofs & (PAGE_SIZE-1); |
76 | long copylen; |
77 | |
78 | /* Only logfs_wbuf_recover may use len==0 */ |
79 | BUG_ON(!len && !use_filler); |
80 | do { |
81 | copylen = min((ulong)len, PAGE_SIZE - offset); |
82 | |
83 | page = get_mapping_page(area->a_sb, index, use_filler); |
84 | SetPageUptodate(page); |
85 | BUG_ON(!page); /* FIXME: reserve a pool */ |
86 | memcpy(page_address(page) + offset, buf, copylen); |
87 | SetPagePrivate(page); |
88 | page_cache_release(page); |
89 | |
90 | buf += copylen; |
91 | len -= copylen; |
92 | offset = 0; |
93 | index++; |
94 | } while (len); |
95 | } |
96 | |
97 | static void pad_partial_page(struct logfs_area *area) |
98 | { |
99 | struct super_block *sb = area->a_sb; |
100 | struct page *page; |
101 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); |
102 | pgoff_t index = ofs >> PAGE_SHIFT; |
103 | long offset = ofs & (PAGE_SIZE-1); |
104 | u32 len = PAGE_SIZE - offset; |
105 | |
106 | if (len % PAGE_SIZE) { |
107 | page = get_mapping_page(sb, index, 0); |
108 | BUG_ON(!page); /* FIXME: reserve a pool */ |
109 | memset(page_address(page) + offset, 0xff, len); |
110 | SetPagePrivate(page); |
111 | page_cache_release(page); |
112 | } |
113 | } |
114 | |
115 | static void pad_full_pages(struct logfs_area *area) |
116 | { |
117 | struct super_block *sb = area->a_sb; |
118 | struct logfs_super *super = logfs_super(sb); |
119 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); |
120 | u32 len = super->s_segsize - area->a_used_bytes; |
121 | pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; |
122 | pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; |
123 | struct page *page; |
124 | |
125 | while (no_indizes) { |
126 | page = get_mapping_page(sb, index, 0); |
127 | BUG_ON(!page); /* FIXME: reserve a pool */ |
128 | SetPageUptodate(page); |
129 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); |
130 | SetPagePrivate(page); |
131 | page_cache_release(page); |
132 | index++; |
133 | no_indizes--; |
134 | } |
135 | } |
136 | |
137 | /* |
138 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. |
139 | * Also make sure we allocate (and memset) all pages for final writeout. |
140 | */ |
141 | static void pad_wbuf(struct logfs_area *area, int final) |
142 | { |
143 | pad_partial_page(area); |
144 | if (final) |
145 | pad_full_pages(area); |
146 | } |
147 | |
148 | /* |
149 | * We have to be careful with the alias tree. Since lookup is done by bix, |
150 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with |
151 | * indirect blocks. So always use it through accessor functions. |
152 | */ |
153 | static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix, |
154 | level_t level) |
155 | { |
156 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; |
157 | pgoff_t index = logfs_pack_index(bix, level); |
158 | |
159 | return btree_lookup128(head, ino, index); |
160 | } |
161 | |
162 | static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix, |
163 | level_t level, void *val) |
164 | { |
165 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; |
166 | pgoff_t index = logfs_pack_index(bix, level); |
167 | |
168 | return btree_insert128(head, ino, index, val, GFP_NOFS); |
169 | } |
170 | |
171 | static int btree_write_alias(struct super_block *sb, struct logfs_block *block, |
172 | write_alias_t *write_one_alias) |
173 | { |
174 | struct object_alias_item *item; |
175 | int err; |
176 | |
177 | list_for_each_entry(item, &block->item_list, list) { |
178 | err = write_alias_journal(sb, block->ino, block->bix, |
179 | block->level, item->child_no, item->val); |
180 | if (err) |
181 | return err; |
182 | } |
183 | return 0; |
184 | } |
185 | |
186 | static gc_level_t btree_block_level(struct logfs_block *block) |
187 | { |
188 | return expand_level(block->ino, block->level); |
189 | } |
190 | |
191 | static struct logfs_block_ops btree_block_ops = { |
192 | .write_block = btree_write_block, |
193 | .block_level = btree_block_level, |
194 | .free_block = __free_block, |
195 | .write_alias = btree_write_alias, |
196 | }; |
197 | |
198 | int logfs_load_object_aliases(struct super_block *sb, |
199 | struct logfs_obj_alias *oa, int count) |
200 | { |
201 | struct logfs_super *super = logfs_super(sb); |
202 | struct logfs_block *block; |
203 | struct object_alias_item *item; |
204 | u64 ino, bix; |
205 | level_t level; |
206 | int i, err; |
207 | |
208 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; |
209 | count /= sizeof(*oa); |
210 | for (i = 0; i < count; i++) { |
211 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); |
212 | if (!item) |
213 | return -ENOMEM; |
214 | memset(item, 0, sizeof(*item)); |
215 | |
216 | super->s_no_object_aliases++; |
217 | item->val = oa[i].val; |
218 | item->child_no = be16_to_cpu(oa[i].child_no); |
219 | |
220 | ino = be64_to_cpu(oa[i].ino); |
221 | bix = be64_to_cpu(oa[i].bix); |
222 | level = LEVEL(oa[i].level); |
223 | |
224 | log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n", |
225 | ino, bix, level, item->child_no, |
226 | be64_to_cpu(item->val)); |
227 | block = alias_tree_lookup(sb, ino, bix, level); |
228 | if (!block) { |
229 | block = __alloc_block(sb, ino, bix, level); |
230 | block->ops = &btree_block_ops; |
231 | err = alias_tree_insert(sb, ino, bix, level, block); |
232 | BUG_ON(err); /* mempool empty */ |
233 | } |
234 | if (test_and_set_bit(item->child_no, block->alias_map)) { |
235 | printk(KERN_ERR"LogFS: Alias collision detected\n"); |
236 | return -EIO; |
237 | } |
238 | list_move_tail(&block->alias_list, &super->s_object_alias); |
239 | list_add(&item->list, &block->item_list); |
240 | } |
241 | return 0; |
242 | } |
243 | |
244 | static void kill_alias(void *_block, unsigned long ignore0, |
245 | u64 ignore1, u64 ignore2, size_t ignore3) |
246 | { |
247 | struct logfs_block *block = _block; |
248 | struct super_block *sb = block->sb; |
249 | struct logfs_super *super = logfs_super(sb); |
250 | struct object_alias_item *item; |
251 | |
252 | while (!list_empty(&block->item_list)) { |
253 | item = list_entry(block->item_list.next, typeof(*item), list); |
254 | list_del(&item->list); |
255 | mempool_free(item, super->s_alias_pool); |
256 | } |
257 | block->ops->free_block(sb, block); |
258 | } |
259 | |
260 | static int obj_type(struct inode *inode, level_t level) |
261 | { |
262 | if (level == 0) { |
263 | if (S_ISDIR(inode->i_mode)) |
264 | return OBJ_DENTRY; |
265 | if (inode->i_ino == LOGFS_INO_MASTER) |
266 | return OBJ_INODE; |
267 | } |
268 | return OBJ_BLOCK; |
269 | } |
270 | |
271 | static int obj_len(struct super_block *sb, int obj_type) |
272 | { |
273 | switch (obj_type) { |
274 | case OBJ_DENTRY: |
275 | return sizeof(struct logfs_disk_dentry); |
276 | case OBJ_INODE: |
277 | return sizeof(struct logfs_disk_inode); |
278 | case OBJ_BLOCK: |
279 | return sb->s_blocksize; |
280 | default: |
281 | BUG(); |
282 | } |
283 | } |
284 | |
285 | static int __logfs_segment_write(struct inode *inode, void *buf, |
286 | struct logfs_shadow *shadow, int type, int len, int compr) |
287 | { |
288 | struct logfs_area *area; |
289 | struct super_block *sb = inode->i_sb; |
290 | s64 ofs; |
291 | struct logfs_object_header h; |
292 | int acc_len; |
293 | |
294 | if (shadow->gc_level == 0) |
295 | acc_len = len; |
296 | else |
297 | acc_len = obj_len(sb, type); |
298 | |
299 | area = get_area(sb, shadow->gc_level); |
300 | ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE); |
301 | LOGFS_BUG_ON(ofs <= 0, sb); |
302 | /* |
303 | * Order is important. logfs_get_free_bytes(), by modifying the |
304 | * segment file, may modify the content of the very page we're about |
305 | * to write now. Which is fine, as long as the calculated crc and |
306 | * written data still match. So do the modifications _before_ |
307 | * calculating the crc. |
308 | */ |
309 | |
310 | h.len = cpu_to_be16(len); |
311 | h.type = type; |
312 | h.compr = compr; |
313 | h.ino = cpu_to_be64(inode->i_ino); |
314 | h.bix = cpu_to_be64(shadow->bix); |
315 | h.crc = logfs_crc32(&h, sizeof(h) - 4, 4); |
316 | h.data_crc = logfs_crc32(buf, len, 0); |
317 | |
318 | logfs_buf_write(area, ofs, &h, sizeof(h)); |
319 | logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len); |
320 | |
321 | shadow->new_ofs = ofs; |
322 | shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE; |
323 | |
324 | return 0; |
325 | } |
326 | |
327 | static s64 logfs_segment_write_compress(struct inode *inode, void *buf, |
328 | struct logfs_shadow *shadow, int type, int len) |
329 | { |
330 | struct super_block *sb = inode->i_sb; |
331 | void *compressor_buf = logfs_super(sb)->s_compressed_je; |
332 | ssize_t compr_len; |
333 | int ret; |
334 | |
335 | mutex_lock(&logfs_super(sb)->s_journal_mutex); |
336 | compr_len = logfs_compress(buf, compressor_buf, len, len); |
337 | |
338 | if (compr_len >= 0) { |
339 | ret = __logfs_segment_write(inode, compressor_buf, shadow, |
340 | type, compr_len, COMPR_ZLIB); |
341 | } else { |
342 | ret = __logfs_segment_write(inode, buf, shadow, type, len, |
343 | COMPR_NONE); |
344 | } |
345 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
346 | return ret; |
347 | } |
348 | |
349 | /** |
350 | * logfs_segment_write - write data block to object store |
351 | * @inode: inode containing data |
352 | * |
353 | * Returns an errno or zero. |
354 | */ |
355 | int logfs_segment_write(struct inode *inode, struct page *page, |
356 | struct logfs_shadow *shadow) |
357 | { |
358 | struct super_block *sb = inode->i_sb; |
359 | struct logfs_super *super = logfs_super(sb); |
360 | int do_compress, type, len; |
361 | int ret; |
362 | void *buf; |
363 | |
364 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; |
365 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); |
366 | do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED; |
367 | if (shadow->gc_level != 0) { |
368 | /* temporarily disable compression for indirect blocks */ |
369 | do_compress = 0; |
370 | } |
371 | |
372 | type = obj_type(inode, shrink_level(shadow->gc_level)); |
373 | len = obj_len(sb, type); |
374 | buf = kmap(page); |
375 | if (do_compress) |
376 | ret = logfs_segment_write_compress(inode, buf, shadow, type, |
377 | len); |
378 | else |
379 | ret = __logfs_segment_write(inode, buf, shadow, type, len, |
380 | COMPR_NONE); |
381 | kunmap(page); |
382 | |
383 | log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n", |
384 | shadow->ino, shadow->bix, shadow->gc_level, |
385 | shadow->old_ofs, shadow->new_ofs, |
386 | shadow->old_len, shadow->new_len); |
387 | /* this BUG_ON did catch a locking bug. useful */ |
388 | BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1))); |
389 | return ret; |
390 | } |
391 | |
392 | int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf) |
393 | { |
394 | pgoff_t index = ofs >> PAGE_SHIFT; |
395 | struct page *page; |
396 | long offset = ofs & (PAGE_SIZE-1); |
397 | long copylen; |
398 | |
399 | while (len) { |
400 | copylen = min((ulong)len, PAGE_SIZE - offset); |
401 | |
402 | page = get_mapping_page(sb, index, 1); |
403 | if (IS_ERR(page)) |
404 | return PTR_ERR(page); |
405 | memcpy(buf, page_address(page) + offset, copylen); |
406 | page_cache_release(page); |
407 | |
408 | buf += copylen; |
409 | len -= copylen; |
410 | offset = 0; |
411 | index++; |
412 | } |
413 | return 0; |
414 | } |
415 | |
416 | /* |
417 | * The "position" of indirect blocks is ambiguous. It can be the position |
418 | * of any data block somewhere behind this indirect block. So we need to |
419 | * normalize the positions through logfs_block_mask() before comparing. |
420 | */ |
421 | static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level) |
422 | { |
423 | return (pos1 & logfs_block_mask(sb, level)) != |
424 | (pos2 & logfs_block_mask(sb, level)); |
425 | } |
426 | |
427 | #if 0 |
428 | static int read_seg_header(struct super_block *sb, u64 ofs, |
429 | struct logfs_segment_header *sh) |
430 | { |
431 | __be32 crc; |
432 | int err; |
433 | |
434 | err = wbuf_read(sb, ofs, sizeof(*sh), sh); |
435 | if (err) |
436 | return err; |
437 | crc = logfs_crc32(sh, sizeof(*sh), 4); |
438 | if (crc != sh->crc) { |
439 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " |
440 | "got %x\n", ofs, be32_to_cpu(sh->crc), |
441 | be32_to_cpu(crc)); |
442 | return -EIO; |
443 | } |
444 | return 0; |
445 | } |
446 | #endif |
447 | |
448 | static int read_obj_header(struct super_block *sb, u64 ofs, |
449 | struct logfs_object_header *oh) |
450 | { |
451 | __be32 crc; |
452 | int err; |
453 | |
454 | err = wbuf_read(sb, ofs, sizeof(*oh), oh); |
455 | if (err) |
456 | return err; |
457 | crc = logfs_crc32(oh, sizeof(*oh) - 4, 4); |
458 | if (crc != oh->crc) { |
459 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " |
460 | "got %x\n", ofs, be32_to_cpu(oh->crc), |
461 | be32_to_cpu(crc)); |
462 | return -EIO; |
463 | } |
464 | return 0; |
465 | } |
466 | |
467 | static void move_btree_to_page(struct inode *inode, struct page *page, |
468 | __be64 *data) |
469 | { |
470 | struct super_block *sb = inode->i_sb; |
471 | struct logfs_super *super = logfs_super(sb); |
472 | struct btree_head128 *head = &super->s_object_alias_tree; |
473 | struct logfs_block *block; |
474 | struct object_alias_item *item, *next; |
475 | |
476 | if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS)) |
477 | return; |
478 | |
479 | block = btree_remove128(head, inode->i_ino, page->index); |
480 | if (!block) |
481 | return; |
482 | |
483 | log_blockmove("move_btree_to_page(%llx, %llx, %x)\n", |
484 | block->ino, block->bix, block->level); |
485 | list_for_each_entry_safe(item, next, &block->item_list, list) { |
486 | data[item->child_no] = item->val; |
487 | list_del(&item->list); |
488 | mempool_free(item, super->s_alias_pool); |
489 | } |
490 | block->page = page; |
491 | SetPagePrivate(page); |
492 | page->private = (unsigned long)block; |
493 | block->ops = &indirect_block_ops; |
494 | initialize_block_counters(page, block, data, 0); |
495 | } |
496 | |
497 | /* |
498 | * This silences a false, yet annoying gcc warning. I hate it when my editor |
499 | * jumps into bitops.h each time I recompile this file. |
500 | * TODO: Complain to gcc folks about this and upgrade compiler. |
501 | */ |
502 | static unsigned long fnb(const unsigned long *addr, |
503 | unsigned long size, unsigned long offset) |
504 | { |
505 | return find_next_bit(addr, size, offset); |
506 | } |
507 | |
508 | void move_page_to_btree(struct page *page) |
509 | { |
510 | struct logfs_block *block = logfs_block(page); |
511 | struct super_block *sb = block->sb; |
512 | struct logfs_super *super = logfs_super(sb); |
513 | struct object_alias_item *item; |
514 | unsigned long pos; |
515 | __be64 *child; |
516 | int err; |
517 | |
518 | if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) { |
519 | block->ops->free_block(sb, block); |
520 | return; |
521 | } |
522 | log_blockmove("move_page_to_btree(%llx, %llx, %x)\n", |
523 | block->ino, block->bix, block->level); |
524 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; |
525 | |
526 | for (pos = 0; ; pos++) { |
527 | pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos); |
528 | if (pos >= LOGFS_BLOCK_FACTOR) |
529 | break; |
530 | |
531 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); |
532 | BUG_ON(!item); /* mempool empty */ |
533 | memset(item, 0, sizeof(*item)); |
534 | |
535 | child = kmap_atomic(page, KM_USER0); |
536 | item->val = child[pos]; |
537 | kunmap_atomic(child, KM_USER0); |
538 | item->child_no = pos; |
539 | list_add(&item->list, &block->item_list); |
540 | } |
541 | block->page = NULL; |
542 | ClearPagePrivate(page); |
543 | page->private = 0; |
544 | block->ops = &btree_block_ops; |
545 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, |
546 | block); |
547 | BUG_ON(err); /* mempool empty */ |
548 | ClearPageUptodate(page); |
549 | } |
550 | |
551 | static int __logfs_segment_read(struct inode *inode, void *buf, |
552 | u64 ofs, u64 bix, level_t level) |
553 | { |
554 | struct super_block *sb = inode->i_sb; |
555 | void *compressor_buf = logfs_super(sb)->s_compressed_je; |
556 | struct logfs_object_header oh; |
557 | __be32 crc; |
558 | u16 len; |
559 | int err, block_len; |
560 | |
561 | block_len = obj_len(sb, obj_type(inode, level)); |
562 | err = read_obj_header(sb, ofs, &oh); |
563 | if (err) |
564 | goto out_err; |
565 | |
566 | err = -EIO; |
567 | if (be64_to_cpu(oh.ino) != inode->i_ino |
568 | || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) { |
569 | printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: " |
570 | "expected (%lx, %llx), got (%llx, %llx)\n", |
571 | ofs, inode->i_ino, bix, |
572 | be64_to_cpu(oh.ino), be64_to_cpu(oh.bix)); |
573 | goto out_err; |
574 | } |
575 | |
576 | len = be16_to_cpu(oh.len); |
577 | |
578 | switch (oh.compr) { |
579 | case COMPR_NONE: |
580 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf); |
581 | if (err) |
582 | goto out_err; |
583 | crc = logfs_crc32(buf, len, 0); |
584 | if (crc != oh.data_crc) { |
585 | printk(KERN_ERR"LOGFS: uncompressed data crc error at " |
586 | "%llx: expected %x, got %x\n", ofs, |
587 | be32_to_cpu(oh.data_crc), |
588 | be32_to_cpu(crc)); |
589 | goto out_err; |
590 | } |
591 | break; |
592 | case COMPR_ZLIB: |
593 | mutex_lock(&logfs_super(sb)->s_journal_mutex); |
594 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, |
595 | compressor_buf); |
596 | if (err) { |
597 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
598 | goto out_err; |
599 | } |
600 | crc = logfs_crc32(compressor_buf, len, 0); |
601 | if (crc != oh.data_crc) { |
602 | printk(KERN_ERR"LOGFS: compressed data crc error at " |
603 | "%llx: expected %x, got %x\n", ofs, |
604 | be32_to_cpu(oh.data_crc), |
605 | be32_to_cpu(crc)); |
606 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
607 | goto out_err; |
608 | } |
609 | err = logfs_uncompress(compressor_buf, buf, len, block_len); |
610 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
611 | if (err) { |
612 | printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs); |
613 | goto out_err; |
614 | } |
615 | break; |
616 | default: |
617 | LOGFS_BUG(sb); |
618 | err = -EIO; |
619 | goto out_err; |
620 | } |
621 | return 0; |
622 | |
623 | out_err: |
624 | logfs_set_ro(sb); |
625 | printk(KERN_ERR"LOGFS: device is read-only now\n"); |
626 | LOGFS_BUG(sb); |
627 | return err; |
628 | } |
629 | |
630 | /** |
631 | * logfs_segment_read - read data block from object store |
632 | * @inode: inode containing data |
633 | * @buf: data buffer |
634 | * @ofs: physical data offset |
635 | * @bix: block index |
636 | * @level: block level |
637 | * |
638 | * Returns 0 on success or a negative errno. |
639 | */ |
640 | int logfs_segment_read(struct inode *inode, struct page *page, |
641 | u64 ofs, u64 bix, level_t level) |
642 | { |
643 | int err; |
644 | void *buf; |
645 | |
646 | if (PageUptodate(page)) |
647 | return 0; |
648 | |
649 | ofs &= ~LOGFS_FULLY_POPULATED; |
650 | |
651 | buf = kmap(page); |
652 | err = __logfs_segment_read(inode, buf, ofs, bix, level); |
653 | if (!err) { |
654 | move_btree_to_page(inode, page, buf); |
655 | SetPageUptodate(page); |
656 | } |
657 | kunmap(page); |
658 | log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n", |
659 | inode->i_ino, bix, level, ofs, err); |
660 | return err; |
661 | } |
662 | |
663 | int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) |
664 | { |
665 | struct super_block *sb = inode->i_sb; |
666 | struct logfs_super *super = logfs_super(sb); |
667 | struct logfs_object_header h; |
668 | u16 len; |
669 | int err; |
670 | |
671 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; |
672 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); |
673 | BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED); |
674 | if (!shadow->old_ofs) |
675 | return 0; |
676 | |
677 | log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n", |
678 | shadow->ino, shadow->bix, shadow->gc_level, |
679 | shadow->old_ofs, shadow->new_ofs, |
680 | shadow->old_len, shadow->new_len); |
681 | err = read_obj_header(sb, shadow->old_ofs, &h); |
682 | LOGFS_BUG_ON(err, sb); |
683 | LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb); |
684 | LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix), |
685 | shrink_level(shadow->gc_level)), sb); |
686 | |
687 | if (shadow->gc_level == 0) |
688 | len = be16_to_cpu(h.len); |
689 | else |
690 | len = obj_len(sb, h.type); |
691 | shadow->old_len = len + sizeof(h); |
692 | return 0; |
693 | } |
694 | |
695 | void freeseg(struct super_block *sb, u32 segno) |
696 | { |
697 | struct logfs_super *super = logfs_super(sb); |
698 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
699 | struct page *page; |
700 | u64 ofs, start, end; |
701 | |
702 | start = dev_ofs(sb, segno, 0); |
703 | end = dev_ofs(sb, segno + 1, 0); |
704 | for (ofs = start; ofs < end; ofs += PAGE_SIZE) { |
705 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); |
706 | if (!page) |
707 | continue; |
708 | ClearPagePrivate(page); |
709 | page_cache_release(page); |
710 | } |
711 | } |
712 | |
713 | int logfs_open_area(struct logfs_area *area, size_t bytes) |
714 | { |
715 | struct super_block *sb = area->a_sb; |
716 | struct logfs_super *super = logfs_super(sb); |
717 | int err, closed = 0; |
718 | |
719 | if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize) |
720 | return 0; |
721 | |
722 | if (area->a_is_open) { |
723 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); |
724 | u32 len = super->s_segsize - area->a_written_bytes; |
725 | |
726 | log_gc("logfs_close_area(%x)\n", area->a_segno); |
727 | pad_wbuf(area, 1); |
728 | super->s_devops->writeseg(area->a_sb, ofs, len); |
729 | freeseg(sb, area->a_segno); |
730 | closed = 1; |
731 | } |
732 | |
733 | area->a_used_bytes = 0; |
734 | area->a_written_bytes = 0; |
735 | again: |
736 | area->a_ops->get_free_segment(area); |
737 | area->a_ops->get_erase_count(area); |
738 | |
739 | log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level); |
740 | err = area->a_ops->erase_segment(area); |
741 | if (err) { |
742 | printk(KERN_WARNING "LogFS: Error erasing segment %x\n", |
743 | area->a_segno); |
744 | logfs_mark_segment_bad(sb, area->a_segno); |
745 | goto again; |
746 | } |
747 | area->a_is_open = 1; |
748 | return closed; |
749 | } |
750 | |
751 | void logfs_sync_area(struct logfs_area *area) |
752 | { |
753 | struct super_block *sb = area->a_sb; |
754 | struct logfs_super *super = logfs_super(sb); |
755 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); |
756 | u32 len = (area->a_used_bytes - area->a_written_bytes); |
757 | |
758 | if (super->s_writesize) |
759 | len &= ~(super->s_writesize - 1); |
760 | if (len == 0) |
761 | return; |
762 | pad_wbuf(area, 0); |
763 | super->s_devops->writeseg(sb, ofs, len); |
764 | area->a_written_bytes += len; |
765 | } |
766 | |
767 | void logfs_sync_segments(struct super_block *sb) |
768 | { |
769 | struct logfs_super *super = logfs_super(sb); |
770 | int i; |
771 | |
772 | for_each_area(i) |
773 | logfs_sync_area(super->s_area[i]); |
774 | } |
775 | |
776 | /* |
777 | * Pick a free segment to be used for this area. Effectively takes a |
778 | * candidate from the free list (not really a candidate anymore). |
779 | */ |
780 | static void ostore_get_free_segment(struct logfs_area *area) |
781 | { |
782 | struct super_block *sb = area->a_sb; |
783 | struct logfs_super *super = logfs_super(sb); |
784 | |
785 | if (super->s_free_list.count == 0) { |
786 | printk(KERN_ERR"LOGFS: ran out of free segments\n"); |
787 | LOGFS_BUG(sb); |
788 | } |
789 | |
790 | area->a_segno = get_best_cand(sb, &super->s_free_list, NULL); |
791 | } |
792 | |
793 | static void ostore_get_erase_count(struct logfs_area *area) |
794 | { |
795 | struct logfs_segment_entry se; |
796 | u32 ec_level; |
797 | |
798 | logfs_get_segment_entry(area->a_sb, area->a_segno, &se); |
799 | BUG_ON(se.ec_level == cpu_to_be32(BADSEG) || |
800 | se.valid == cpu_to_be32(RESERVED)); |
801 | |
802 | ec_level = be32_to_cpu(se.ec_level); |
803 | area->a_erase_count = (ec_level >> 4) + 1; |
804 | } |
805 | |
806 | static int ostore_erase_segment(struct logfs_area *area) |
807 | { |
808 | struct super_block *sb = area->a_sb; |
809 | struct logfs_segment_header sh; |
810 | u64 ofs; |
811 | int err; |
812 | |
813 | err = logfs_erase_segment(sb, area->a_segno, 0); |
814 | if (err) |
815 | return err; |
816 | |
817 | sh.pad = 0; |
818 | sh.type = SEG_OSTORE; |
819 | sh.level = (__force u8)area->a_level; |
820 | sh.segno = cpu_to_be32(area->a_segno); |
821 | sh.ec = cpu_to_be32(area->a_erase_count); |
822 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); |
823 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); |
824 | |
825 | logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, |
826 | area->a_level); |
827 | |
828 | ofs = dev_ofs(sb, area->a_segno, 0); |
829 | area->a_used_bytes = sizeof(sh); |
830 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); |
831 | return 0; |
832 | } |
833 | |
834 | static const struct logfs_area_ops ostore_area_ops = { |
835 | .get_free_segment = ostore_get_free_segment, |
836 | .get_erase_count = ostore_get_erase_count, |
837 | .erase_segment = ostore_erase_segment, |
838 | }; |
839 | |
840 | static void free_area(struct logfs_area *area) |
841 | { |
842 | if (area) |
843 | freeseg(area->a_sb, area->a_segno); |
844 | kfree(area); |
845 | } |
846 | |
847 | static struct logfs_area *alloc_area(struct super_block *sb) |
848 | { |
849 | struct logfs_area *area; |
850 | |
851 | area = kzalloc(sizeof(*area), GFP_KERNEL); |
852 | if (!area) |
853 | return NULL; |
854 | |
855 | area->a_sb = sb; |
856 | return area; |
857 | } |
858 | |
859 | static void map_invalidatepage(struct page *page, unsigned long l) |
860 | { |
861 | BUG(); |
862 | } |
863 | |
864 | static int map_releasepage(struct page *page, gfp_t g) |
865 | { |
866 | /* Don't release these pages */ |
867 | return 0; |
868 | } |
869 | |
870 | static const struct address_space_operations mapping_aops = { |
871 | .invalidatepage = map_invalidatepage, |
872 | .releasepage = map_releasepage, |
873 | .set_page_dirty = __set_page_dirty_nobuffers, |
874 | }; |
875 | |
876 | int logfs_init_mapping(struct super_block *sb) |
877 | { |
878 | struct logfs_super *super = logfs_super(sb); |
879 | struct address_space *mapping; |
880 | struct inode *inode; |
881 | |
882 | inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING); |
883 | if (IS_ERR(inode)) |
884 | return PTR_ERR(inode); |
885 | super->s_mapping_inode = inode; |
886 | mapping = inode->i_mapping; |
887 | mapping->a_ops = &mapping_aops; |
888 | /* Would it be possible to use __GFP_HIGHMEM as well? */ |
889 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
890 | return 0; |
891 | } |
892 | |
893 | int logfs_init_areas(struct super_block *sb) |
894 | { |
895 | struct logfs_super *super = logfs_super(sb); |
896 | int i = -1; |
897 | |
898 | super->s_alias_pool = mempool_create_kmalloc_pool(600, |
899 | sizeof(struct object_alias_item)); |
900 | if (!super->s_alias_pool) |
901 | return -ENOMEM; |
902 | |
903 | super->s_journal_area = alloc_area(sb); |
904 | if (!super->s_journal_area) |
905 | goto err; |
906 | |
907 | for_each_area(i) { |
908 | super->s_area[i] = alloc_area(sb); |
909 | if (!super->s_area[i]) |
910 | goto err; |
911 | super->s_area[i]->a_level = GC_LEVEL(i); |
912 | super->s_area[i]->a_ops = &ostore_area_ops; |
913 | } |
914 | btree_init_mempool128(&super->s_object_alias_tree, |
915 | super->s_btree_pool); |
916 | return 0; |
917 | |
918 | err: |
919 | for (i--; i >= 0; i--) |
920 | free_area(super->s_area[i]); |
921 | free_area(super->s_journal_area); |
922 | mempool_destroy(super->s_alias_pool); |
923 | return -ENOMEM; |
924 | } |
925 | |
926 | void logfs_cleanup_areas(struct super_block *sb) |
927 | { |
928 | struct logfs_super *super = logfs_super(sb); |
929 | int i; |
930 | |
931 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); |
932 | for_each_area(i) |
933 | free_area(super->s_area[i]); |
934 | free_area(super->s_journal_area); |
935 | destroy_meta_inode(super->s_mapping_inode); |
936 | } |
937 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9