Root/
1 | /* |
2 | * fs/logfs/journal.c - journal handling code |
3 | * |
4 | * As should be obvious for Linux kernel code, license is GPLv2 |
5 | * |
6 | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> |
7 | */ |
8 | #include "logfs.h" |
9 | #include <linux/slab.h> |
10 | |
11 | static void logfs_calc_free(struct super_block *sb) |
12 | { |
13 | struct logfs_super *super = logfs_super(sb); |
14 | u64 reserve, no_segs = super->s_no_segs; |
15 | s64 free; |
16 | int i; |
17 | |
18 | /* superblock segments */ |
19 | no_segs -= 2; |
20 | super->s_no_journal_segs = 0; |
21 | /* journal */ |
22 | journal_for_each(i) |
23 | if (super->s_journal_seg[i]) { |
24 | no_segs--; |
25 | super->s_no_journal_segs++; |
26 | } |
27 | |
28 | /* open segments plus one extra per level for GC */ |
29 | no_segs -= 2 * super->s_total_levels; |
30 | |
31 | free = no_segs * (super->s_segsize - LOGFS_SEGMENT_RESERVE); |
32 | free -= super->s_used_bytes; |
33 | /* just a bit extra */ |
34 | free -= super->s_total_levels * 4096; |
35 | |
36 | /* Bad blocks are 'paid' for with speed reserve - the filesystem |
37 | * simply gets slower as bad blocks accumulate. Until the bad blocks |
38 | * exceed the speed reserve - then the filesystem gets smaller. |
39 | */ |
40 | reserve = super->s_bad_segments + super->s_bad_seg_reserve; |
41 | reserve *= super->s_segsize - LOGFS_SEGMENT_RESERVE; |
42 | reserve = max(reserve, super->s_speed_reserve); |
43 | free -= reserve; |
44 | if (free < 0) |
45 | free = 0; |
46 | |
47 | super->s_free_bytes = free; |
48 | } |
49 | |
50 | static void reserve_sb_and_journal(struct super_block *sb) |
51 | { |
52 | struct logfs_super *super = logfs_super(sb); |
53 | struct btree_head32 *head = &super->s_reserved_segments; |
54 | int i, err; |
55 | |
56 | err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[0]), (void *)1, |
57 | GFP_KERNEL); |
58 | BUG_ON(err); |
59 | |
60 | err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[1]), (void *)1, |
61 | GFP_KERNEL); |
62 | BUG_ON(err); |
63 | |
64 | journal_for_each(i) { |
65 | if (!super->s_journal_seg[i]) |
66 | continue; |
67 | err = btree_insert32(head, super->s_journal_seg[i], (void *)1, |
68 | GFP_KERNEL); |
69 | BUG_ON(err); |
70 | } |
71 | } |
72 | |
73 | static void read_dynsb(struct super_block *sb, |
74 | struct logfs_je_dynsb *dynsb) |
75 | { |
76 | struct logfs_super *super = logfs_super(sb); |
77 | |
78 | super->s_gec = be64_to_cpu(dynsb->ds_gec); |
79 | super->s_sweeper = be64_to_cpu(dynsb->ds_sweeper); |
80 | super->s_victim_ino = be64_to_cpu(dynsb->ds_victim_ino); |
81 | super->s_rename_dir = be64_to_cpu(dynsb->ds_rename_dir); |
82 | super->s_rename_pos = be64_to_cpu(dynsb->ds_rename_pos); |
83 | super->s_used_bytes = be64_to_cpu(dynsb->ds_used_bytes); |
84 | super->s_generation = be32_to_cpu(dynsb->ds_generation); |
85 | } |
86 | |
87 | static void read_anchor(struct super_block *sb, |
88 | struct logfs_je_anchor *da) |
89 | { |
90 | struct logfs_super *super = logfs_super(sb); |
91 | struct inode *inode = super->s_master_inode; |
92 | struct logfs_inode *li = logfs_inode(inode); |
93 | int i; |
94 | |
95 | super->s_last_ino = be64_to_cpu(da->da_last_ino); |
96 | li->li_flags = 0; |
97 | li->li_height = da->da_height; |
98 | i_size_write(inode, be64_to_cpu(da->da_size)); |
99 | li->li_used_bytes = be64_to_cpu(da->da_used_bytes); |
100 | |
101 | for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) |
102 | li->li_data[i] = be64_to_cpu(da->da_data[i]); |
103 | } |
104 | |
105 | static void read_erasecount(struct super_block *sb, |
106 | struct logfs_je_journal_ec *ec) |
107 | { |
108 | struct logfs_super *super = logfs_super(sb); |
109 | int i; |
110 | |
111 | journal_for_each(i) |
112 | super->s_journal_ec[i] = be32_to_cpu(ec->ec[i]); |
113 | } |
114 | |
115 | static int read_area(struct super_block *sb, struct logfs_je_area *a) |
116 | { |
117 | struct logfs_super *super = logfs_super(sb); |
118 | struct logfs_area *area = super->s_area[a->gc_level]; |
119 | u64 ofs; |
120 | u32 writemask = ~(super->s_writesize - 1); |
121 | |
122 | if (a->gc_level >= LOGFS_NO_AREAS) |
123 | return -EIO; |
124 | if (a->vim != VIM_DEFAULT) |
125 | return -EIO; /* TODO: close area and continue */ |
126 | |
127 | area->a_used_bytes = be32_to_cpu(a->used_bytes); |
128 | area->a_written_bytes = area->a_used_bytes & writemask; |
129 | area->a_segno = be32_to_cpu(a->segno); |
130 | if (area->a_segno) |
131 | area->a_is_open = 1; |
132 | |
133 | ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); |
134 | if (super->s_writesize > 1) |
135 | logfs_buf_recover(area, ofs, a + 1, super->s_writesize); |
136 | else |
137 | logfs_buf_recover(area, ofs, NULL, 0); |
138 | return 0; |
139 | } |
140 | |
141 | static void *unpack(void *from, void *to) |
142 | { |
143 | struct logfs_journal_header *jh = from; |
144 | void *data = from + sizeof(struct logfs_journal_header); |
145 | int err; |
146 | size_t inlen, outlen; |
147 | |
148 | inlen = be16_to_cpu(jh->h_len); |
149 | outlen = be16_to_cpu(jh->h_datalen); |
150 | |
151 | if (jh->h_compr == COMPR_NONE) |
152 | memcpy(to, data, inlen); |
153 | else { |
154 | err = logfs_uncompress(data, to, inlen, outlen); |
155 | BUG_ON(err); |
156 | } |
157 | return to; |
158 | } |
159 | |
160 | static int __read_je_header(struct super_block *sb, u64 ofs, |
161 | struct logfs_journal_header *jh) |
162 | { |
163 | struct logfs_super *super = logfs_super(sb); |
164 | size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize) |
165 | + MAX_JOURNAL_HEADER; |
166 | u16 type, len, datalen; |
167 | int err; |
168 | |
169 | /* read header only */ |
170 | err = wbuf_read(sb, ofs, sizeof(*jh), jh); |
171 | if (err) |
172 | return err; |
173 | type = be16_to_cpu(jh->h_type); |
174 | len = be16_to_cpu(jh->h_len); |
175 | datalen = be16_to_cpu(jh->h_datalen); |
176 | if (len > sb->s_blocksize) |
177 | return -EIO; |
178 | if ((type < JE_FIRST) || (type > JE_LAST)) |
179 | return -EIO; |
180 | if (datalen > bufsize) |
181 | return -EIO; |
182 | return 0; |
183 | } |
184 | |
185 | static int __read_je_payload(struct super_block *sb, u64 ofs, |
186 | struct logfs_journal_header *jh) |
187 | { |
188 | u16 len; |
189 | int err; |
190 | |
191 | len = be16_to_cpu(jh->h_len); |
192 | err = wbuf_read(sb, ofs + sizeof(*jh), len, jh + 1); |
193 | if (err) |
194 | return err; |
195 | if (jh->h_crc != logfs_crc32(jh, len + sizeof(*jh), 4)) { |
196 | /* Old code was confused. It forgot about the header length |
197 | * and stopped calculating the crc 16 bytes before the end |
198 | * of data - ick! |
199 | * FIXME: Remove this hack once the old code is fixed. |
200 | */ |
201 | if (jh->h_crc == logfs_crc32(jh, len, 4)) |
202 | WARN_ON_ONCE(1); |
203 | else |
204 | return -EIO; |
205 | } |
206 | return 0; |
207 | } |
208 | |
209 | /* |
210 | * jh needs to be large enough to hold the complete entry, not just the header |
211 | */ |
212 | static int __read_je(struct super_block *sb, u64 ofs, |
213 | struct logfs_journal_header *jh) |
214 | { |
215 | int err; |
216 | |
217 | err = __read_je_header(sb, ofs, jh); |
218 | if (err) |
219 | return err; |
220 | return __read_je_payload(sb, ofs, jh); |
221 | } |
222 | |
223 | static int read_je(struct super_block *sb, u64 ofs) |
224 | { |
225 | struct logfs_super *super = logfs_super(sb); |
226 | struct logfs_journal_header *jh = super->s_compressed_je; |
227 | void *scratch = super->s_je; |
228 | u16 type, datalen; |
229 | int err; |
230 | |
231 | err = __read_je(sb, ofs, jh); |
232 | if (err) |
233 | return err; |
234 | type = be16_to_cpu(jh->h_type); |
235 | datalen = be16_to_cpu(jh->h_datalen); |
236 | |
237 | switch (type) { |
238 | case JE_DYNSB: |
239 | read_dynsb(sb, unpack(jh, scratch)); |
240 | break; |
241 | case JE_ANCHOR: |
242 | read_anchor(sb, unpack(jh, scratch)); |
243 | break; |
244 | case JE_ERASECOUNT: |
245 | read_erasecount(sb, unpack(jh, scratch)); |
246 | break; |
247 | case JE_AREA: |
248 | read_area(sb, unpack(jh, scratch)); |
249 | break; |
250 | case JE_OBJ_ALIAS: |
251 | err = logfs_load_object_aliases(sb, unpack(jh, scratch), |
252 | datalen); |
253 | break; |
254 | default: |
255 | WARN_ON_ONCE(1); |
256 | return -EIO; |
257 | } |
258 | return err; |
259 | } |
260 | |
261 | static int logfs_read_segment(struct super_block *sb, u32 segno) |
262 | { |
263 | struct logfs_super *super = logfs_super(sb); |
264 | struct logfs_journal_header *jh = super->s_compressed_je; |
265 | u64 ofs, seg_ofs = dev_ofs(sb, segno, 0); |
266 | u32 h_ofs, last_ofs = 0; |
267 | u16 len, datalen, last_len = 0; |
268 | int i, err; |
269 | |
270 | /* search for most recent commit */ |
271 | for (h_ofs = 0; h_ofs < super->s_segsize; h_ofs += sizeof(*jh)) { |
272 | ofs = seg_ofs + h_ofs; |
273 | err = __read_je_header(sb, ofs, jh); |
274 | if (err) |
275 | continue; |
276 | if (jh->h_type != cpu_to_be16(JE_COMMIT)) |
277 | continue; |
278 | err = __read_je_payload(sb, ofs, jh); |
279 | if (err) |
280 | continue; |
281 | len = be16_to_cpu(jh->h_len); |
282 | datalen = be16_to_cpu(jh->h_datalen); |
283 | if ((datalen > sizeof(super->s_je_array)) || |
284 | (datalen % sizeof(__be64))) |
285 | continue; |
286 | last_ofs = h_ofs; |
287 | last_len = datalen; |
288 | h_ofs += ALIGN(len, sizeof(*jh)) - sizeof(*jh); |
289 | } |
290 | /* read commit */ |
291 | if (last_ofs == 0) |
292 | return -ENOENT; |
293 | ofs = seg_ofs + last_ofs; |
294 | log_journal("Read commit from %llx\n", ofs); |
295 | err = __read_je(sb, ofs, jh); |
296 | BUG_ON(err); /* We should have caught it in the scan loop already */ |
297 | if (err) |
298 | return err; |
299 | /* uncompress */ |
300 | unpack(jh, super->s_je_array); |
301 | super->s_no_je = last_len / sizeof(__be64); |
302 | /* iterate over array */ |
303 | for (i = 0; i < super->s_no_je; i++) { |
304 | err = read_je(sb, be64_to_cpu(super->s_je_array[i])); |
305 | if (err) |
306 | return err; |
307 | } |
308 | super->s_journal_area->a_segno = segno; |
309 | return 0; |
310 | } |
311 | |
312 | static u64 read_gec(struct super_block *sb, u32 segno) |
313 | { |
314 | struct logfs_segment_header sh; |
315 | __be32 crc; |
316 | int err; |
317 | |
318 | if (!segno) |
319 | return 0; |
320 | err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh); |
321 | if (err) |
322 | return 0; |
323 | crc = logfs_crc32(&sh, sizeof(sh), 4); |
324 | if (crc != sh.crc) { |
325 | WARN_ON(sh.gec != cpu_to_be64(0xffffffffffffffffull)); |
326 | /* Most likely it was just erased */ |
327 | return 0; |
328 | } |
329 | return be64_to_cpu(sh.gec); |
330 | } |
331 | |
332 | static int logfs_read_journal(struct super_block *sb) |
333 | { |
334 | struct logfs_super *super = logfs_super(sb); |
335 | u64 gec[LOGFS_JOURNAL_SEGS], max; |
336 | u32 segno; |
337 | int i, max_i; |
338 | |
339 | max = 0; |
340 | max_i = -1; |
341 | journal_for_each(i) { |
342 | segno = super->s_journal_seg[i]; |
343 | gec[i] = read_gec(sb, super->s_journal_seg[i]); |
344 | if (gec[i] > max) { |
345 | max = gec[i]; |
346 | max_i = i; |
347 | } |
348 | } |
349 | if (max_i == -1) |
350 | return -EIO; |
351 | /* FIXME: Try older segments in case of error */ |
352 | return logfs_read_segment(sb, super->s_journal_seg[max_i]); |
353 | } |
354 | |
355 | /* |
356 | * First search the current segment (outer loop), then pick the next segment |
357 | * in the array, skipping any zero entries (inner loop). |
358 | */ |
359 | static void journal_get_free_segment(struct logfs_area *area) |
360 | { |
361 | struct logfs_super *super = logfs_super(area->a_sb); |
362 | int i; |
363 | |
364 | journal_for_each(i) { |
365 | if (area->a_segno != super->s_journal_seg[i]) |
366 | continue; |
367 | |
368 | do { |
369 | i++; |
370 | if (i == LOGFS_JOURNAL_SEGS) |
371 | i = 0; |
372 | } while (!super->s_journal_seg[i]); |
373 | |
374 | area->a_segno = super->s_journal_seg[i]; |
375 | area->a_erase_count = ++(super->s_journal_ec[i]); |
376 | log_journal("Journal now at %x (ec %x)\n", area->a_segno, |
377 | area->a_erase_count); |
378 | return; |
379 | } |
380 | BUG(); |
381 | } |
382 | |
383 | static void journal_get_erase_count(struct logfs_area *area) |
384 | { |
385 | /* erase count is stored globally and incremented in |
386 | * journal_get_free_segment() - nothing to do here */ |
387 | } |
388 | |
389 | static int journal_erase_segment(struct logfs_area *area) |
390 | { |
391 | struct super_block *sb = area->a_sb; |
392 | struct logfs_segment_header sh; |
393 | u64 ofs; |
394 | int err; |
395 | |
396 | err = logfs_erase_segment(sb, area->a_segno, 1); |
397 | if (err) |
398 | return err; |
399 | |
400 | sh.pad = 0; |
401 | sh.type = SEG_JOURNAL; |
402 | sh.level = 0; |
403 | sh.segno = cpu_to_be32(area->a_segno); |
404 | sh.ec = cpu_to_be32(area->a_erase_count); |
405 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); |
406 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); |
407 | |
408 | /* This causes a bug in segment.c. Not yet. */ |
409 | //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); |
410 | |
411 | ofs = dev_ofs(sb, area->a_segno, 0); |
412 | area->a_used_bytes = ALIGN(sizeof(sh), 16); |
413 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); |
414 | return 0; |
415 | } |
416 | |
417 | static size_t __logfs_write_header(struct logfs_super *super, |
418 | struct logfs_journal_header *jh, size_t len, size_t datalen, |
419 | u16 type, u8 compr) |
420 | { |
421 | jh->h_len = cpu_to_be16(len); |
422 | jh->h_type = cpu_to_be16(type); |
423 | jh->h_datalen = cpu_to_be16(datalen); |
424 | jh->h_compr = compr; |
425 | jh->h_pad[0] = 'H'; |
426 | jh->h_pad[1] = 'E'; |
427 | jh->h_pad[2] = 'A'; |
428 | jh->h_pad[3] = 'D'; |
429 | jh->h_pad[4] = 'R'; |
430 | jh->h_crc = logfs_crc32(jh, len + sizeof(*jh), 4); |
431 | return ALIGN(len, 16) + sizeof(*jh); |
432 | } |
433 | |
434 | static size_t logfs_write_header(struct logfs_super *super, |
435 | struct logfs_journal_header *jh, size_t datalen, u16 type) |
436 | { |
437 | size_t len = datalen; |
438 | |
439 | return __logfs_write_header(super, jh, len, datalen, type, COMPR_NONE); |
440 | } |
441 | |
442 | static inline size_t logfs_journal_erasecount_size(struct logfs_super *super) |
443 | { |
444 | return LOGFS_JOURNAL_SEGS * sizeof(__be32); |
445 | } |
446 | |
447 | static void *logfs_write_erasecount(struct super_block *sb, void *_ec, |
448 | u16 *type, size_t *len) |
449 | { |
450 | struct logfs_super *super = logfs_super(sb); |
451 | struct logfs_je_journal_ec *ec = _ec; |
452 | int i; |
453 | |
454 | journal_for_each(i) |
455 | ec->ec[i] = cpu_to_be32(super->s_journal_ec[i]); |
456 | *type = JE_ERASECOUNT; |
457 | *len = logfs_journal_erasecount_size(super); |
458 | return ec; |
459 | } |
460 | |
461 | static void account_shadow(void *_shadow, unsigned long _sb, u64 ignore, |
462 | size_t ignore2) |
463 | { |
464 | struct logfs_shadow *shadow = _shadow; |
465 | struct super_block *sb = (void *)_sb; |
466 | struct logfs_super *super = logfs_super(sb); |
467 | |
468 | /* consume new space */ |
469 | super->s_free_bytes -= shadow->new_len; |
470 | super->s_used_bytes += shadow->new_len; |
471 | super->s_dirty_used_bytes -= shadow->new_len; |
472 | |
473 | /* free up old space */ |
474 | super->s_free_bytes += shadow->old_len; |
475 | super->s_used_bytes -= shadow->old_len; |
476 | super->s_dirty_free_bytes -= shadow->old_len; |
477 | |
478 | logfs_set_segment_used(sb, shadow->old_ofs, -shadow->old_len); |
479 | logfs_set_segment_used(sb, shadow->new_ofs, shadow->new_len); |
480 | |
481 | log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n", |
482 | shadow->ino, shadow->bix, shadow->gc_level, |
483 | shadow->old_ofs, shadow->new_ofs, |
484 | shadow->old_len, shadow->new_len); |
485 | mempool_free(shadow, super->s_shadow_pool); |
486 | } |
487 | |
488 | static void account_shadows(struct super_block *sb) |
489 | { |
490 | struct logfs_super *super = logfs_super(sb); |
491 | struct inode *inode = super->s_master_inode; |
492 | struct logfs_inode *li = logfs_inode(inode); |
493 | struct shadow_tree *tree = &super->s_shadow_tree; |
494 | |
495 | btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); |
496 | btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); |
497 | |
498 | if (li->li_block) { |
499 | /* |
500 | * We never actually use the structure, when attached to the |
501 | * master inode. But it is easier to always free it here than |
502 | * to have checks in several places elsewhere when allocating |
503 | * it. |
504 | */ |
505 | li->li_block->ops->free_block(sb, li->li_block); |
506 | } |
507 | BUG_ON((s64)li->li_used_bytes < 0); |
508 | } |
509 | |
510 | static void *__logfs_write_anchor(struct super_block *sb, void *_da, |
511 | u16 *type, size_t *len) |
512 | { |
513 | struct logfs_super *super = logfs_super(sb); |
514 | struct logfs_je_anchor *da = _da; |
515 | struct inode *inode = super->s_master_inode; |
516 | struct logfs_inode *li = logfs_inode(inode); |
517 | int i; |
518 | |
519 | da->da_height = li->li_height; |
520 | da->da_last_ino = cpu_to_be64(super->s_last_ino); |
521 | da->da_size = cpu_to_be64(i_size_read(inode)); |
522 | da->da_used_bytes = cpu_to_be64(li->li_used_bytes); |
523 | for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) |
524 | da->da_data[i] = cpu_to_be64(li->li_data[i]); |
525 | *type = JE_ANCHOR; |
526 | *len = sizeof(*da); |
527 | return da; |
528 | } |
529 | |
530 | static void *logfs_write_dynsb(struct super_block *sb, void *_dynsb, |
531 | u16 *type, size_t *len) |
532 | { |
533 | struct logfs_super *super = logfs_super(sb); |
534 | struct logfs_je_dynsb *dynsb = _dynsb; |
535 | |
536 | dynsb->ds_gec = cpu_to_be64(super->s_gec); |
537 | dynsb->ds_sweeper = cpu_to_be64(super->s_sweeper); |
538 | dynsb->ds_victim_ino = cpu_to_be64(super->s_victim_ino); |
539 | dynsb->ds_rename_dir = cpu_to_be64(super->s_rename_dir); |
540 | dynsb->ds_rename_pos = cpu_to_be64(super->s_rename_pos); |
541 | dynsb->ds_used_bytes = cpu_to_be64(super->s_used_bytes); |
542 | dynsb->ds_generation = cpu_to_be32(super->s_generation); |
543 | *type = JE_DYNSB; |
544 | *len = sizeof(*dynsb); |
545 | return dynsb; |
546 | } |
547 | |
548 | static void write_wbuf(struct super_block *sb, struct logfs_area *area, |
549 | void *wbuf) |
550 | { |
551 | struct logfs_super *super = logfs_super(sb); |
552 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
553 | u64 ofs; |
554 | pgoff_t index; |
555 | int page_ofs; |
556 | struct page *page; |
557 | |
558 | ofs = dev_ofs(sb, area->a_segno, |
559 | area->a_used_bytes & ~(super->s_writesize - 1)); |
560 | index = ofs >> PAGE_SHIFT; |
561 | page_ofs = ofs & (PAGE_SIZE - 1); |
562 | |
563 | page = find_lock_page(mapping, index); |
564 | BUG_ON(!page); |
565 | memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); |
566 | unlock_page(page); |
567 | } |
568 | |
569 | static void *logfs_write_area(struct super_block *sb, void *_a, |
570 | u16 *type, size_t *len) |
571 | { |
572 | struct logfs_super *super = logfs_super(sb); |
573 | struct logfs_area *area = super->s_area[super->s_sum_index]; |
574 | struct logfs_je_area *a = _a; |
575 | |
576 | a->vim = VIM_DEFAULT; |
577 | a->gc_level = super->s_sum_index; |
578 | a->used_bytes = cpu_to_be32(area->a_used_bytes); |
579 | a->segno = cpu_to_be32(area->a_segno); |
580 | if (super->s_writesize > 1) |
581 | write_wbuf(sb, area, a + 1); |
582 | |
583 | *type = JE_AREA; |
584 | *len = sizeof(*a) + super->s_writesize; |
585 | return a; |
586 | } |
587 | |
588 | static void *logfs_write_commit(struct super_block *sb, void *h, |
589 | u16 *type, size_t *len) |
590 | { |
591 | struct logfs_super *super = logfs_super(sb); |
592 | |
593 | *type = JE_COMMIT; |
594 | *len = super->s_no_je * sizeof(__be64); |
595 | return super->s_je_array; |
596 | } |
597 | |
598 | static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, |
599 | size_t len) |
600 | { |
601 | struct logfs_super *super = logfs_super(sb); |
602 | void *header = super->s_compressed_je; |
603 | void *data = header + sizeof(struct logfs_journal_header); |
604 | ssize_t compr_len, pad_len; |
605 | u8 compr = COMPR_ZLIB; |
606 | |
607 | if (len == 0) |
608 | return logfs_write_header(super, header, 0, type); |
609 | |
610 | compr_len = logfs_compress(buf, data, len, sb->s_blocksize); |
611 | if (compr_len < 0 || type == JE_ANCHOR) { |
612 | BUG_ON(len > sb->s_blocksize); |
613 | memcpy(data, buf, len); |
614 | compr_len = len; |
615 | compr = COMPR_NONE; |
616 | } |
617 | |
618 | pad_len = ALIGN(compr_len, 16); |
619 | memset(data + compr_len, 0, pad_len - compr_len); |
620 | |
621 | return __logfs_write_header(super, header, compr_len, len, type, compr); |
622 | } |
623 | |
624 | static s64 logfs_get_free_bytes(struct logfs_area *area, size_t *bytes, |
625 | int must_pad) |
626 | { |
627 | u32 writesize = logfs_super(area->a_sb)->s_writesize; |
628 | s32 ofs; |
629 | int ret; |
630 | |
631 | ret = logfs_open_area(area, *bytes); |
632 | if (ret) |
633 | return -EAGAIN; |
634 | |
635 | ofs = area->a_used_bytes; |
636 | area->a_used_bytes += *bytes; |
637 | |
638 | if (must_pad) { |
639 | area->a_used_bytes = ALIGN(area->a_used_bytes, writesize); |
640 | *bytes = area->a_used_bytes - ofs; |
641 | } |
642 | |
643 | return dev_ofs(area->a_sb, area->a_segno, ofs); |
644 | } |
645 | |
646 | static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type, |
647 | size_t buf_len) |
648 | { |
649 | struct logfs_super *super = logfs_super(sb); |
650 | struct logfs_area *area = super->s_journal_area; |
651 | struct logfs_journal_header *jh = super->s_compressed_je; |
652 | size_t len; |
653 | int must_pad = 0; |
654 | s64 ofs; |
655 | |
656 | len = __logfs_write_je(sb, buf, type, buf_len); |
657 | if (jh->h_type == cpu_to_be16(JE_COMMIT)) |
658 | must_pad = 1; |
659 | |
660 | ofs = logfs_get_free_bytes(area, &len, must_pad); |
661 | if (ofs < 0) |
662 | return ofs; |
663 | logfs_buf_write(area, ofs, super->s_compressed_je, len); |
664 | super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); |
665 | return 0; |
666 | } |
667 | |
668 | static int logfs_write_je(struct super_block *sb, |
669 | void* (*write)(struct super_block *sb, void *scratch, |
670 | u16 *type, size_t *len)) |
671 | { |
672 | void *buf; |
673 | size_t len; |
674 | u16 type; |
675 | |
676 | buf = write(sb, logfs_super(sb)->s_je, &type, &len); |
677 | return logfs_write_je_buf(sb, buf, type, len); |
678 | } |
679 | |
680 | int write_alias_journal(struct super_block *sb, u64 ino, u64 bix, |
681 | level_t level, int child_no, __be64 val) |
682 | { |
683 | struct logfs_super *super = logfs_super(sb); |
684 | struct logfs_obj_alias *oa = super->s_je; |
685 | int err = 0, fill = super->s_je_fill; |
686 | |
687 | log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n", |
688 | fill, ino, bix, level, child_no, be64_to_cpu(val)); |
689 | oa[fill].ino = cpu_to_be64(ino); |
690 | oa[fill].bix = cpu_to_be64(bix); |
691 | oa[fill].val = val; |
692 | oa[fill].level = (__force u8)level; |
693 | oa[fill].child_no = cpu_to_be16(child_no); |
694 | fill++; |
695 | if (fill >= sb->s_blocksize / sizeof(*oa)) { |
696 | err = logfs_write_je_buf(sb, oa, JE_OBJ_ALIAS, sb->s_blocksize); |
697 | fill = 0; |
698 | } |
699 | |
700 | super->s_je_fill = fill; |
701 | return err; |
702 | } |
703 | |
704 | static int logfs_write_obj_aliases(struct super_block *sb) |
705 | { |
706 | struct logfs_super *super = logfs_super(sb); |
707 | int err; |
708 | |
709 | log_journal("logfs_write_obj_aliases: %d aliases to write\n", |
710 | super->s_no_object_aliases); |
711 | super->s_je_fill = 0; |
712 | err = logfs_write_obj_aliases_pagecache(sb); |
713 | if (err) |
714 | return err; |
715 | |
716 | if (super->s_je_fill) |
717 | err = logfs_write_je_buf(sb, super->s_je, JE_OBJ_ALIAS, |
718 | super->s_je_fill |
719 | * sizeof(struct logfs_obj_alias)); |
720 | return err; |
721 | } |
722 | |
723 | /* |
724 | * Write all journal entries. The goto logic ensures that all journal entries |
725 | * are written whenever a new segment is used. It is ugly and potentially a |
726 | * bit wasteful, but robustness is more important. With this we can *always* |
727 | * erase all journal segments except the one containing the most recent commit. |
728 | */ |
729 | void logfs_write_anchor(struct super_block *sb) |
730 | { |
731 | struct logfs_super *super = logfs_super(sb); |
732 | struct logfs_area *area = super->s_journal_area; |
733 | int i, err; |
734 | |
735 | if (!(super->s_flags & LOGFS_SB_FLAG_DIRTY)) |
736 | return; |
737 | super->s_flags &= ~LOGFS_SB_FLAG_DIRTY; |
738 | |
739 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); |
740 | mutex_lock(&super->s_journal_mutex); |
741 | |
742 | /* Do this first or suffer corruption */ |
743 | logfs_sync_segments(sb); |
744 | account_shadows(sb); |
745 | |
746 | again: |
747 | super->s_no_je = 0; |
748 | for_each_area(i) { |
749 | if (!super->s_area[i]->a_is_open) |
750 | continue; |
751 | super->s_sum_index = i; |
752 | err = logfs_write_je(sb, logfs_write_area); |
753 | if (err) |
754 | goto again; |
755 | } |
756 | err = logfs_write_obj_aliases(sb); |
757 | if (err) |
758 | goto again; |
759 | err = logfs_write_je(sb, logfs_write_erasecount); |
760 | if (err) |
761 | goto again; |
762 | err = logfs_write_je(sb, __logfs_write_anchor); |
763 | if (err) |
764 | goto again; |
765 | err = logfs_write_je(sb, logfs_write_dynsb); |
766 | if (err) |
767 | goto again; |
768 | /* |
769 | * Order is imperative. First we sync all writes, including the |
770 | * non-committed journal writes. Then we write the final commit and |
771 | * sync the current journal segment. |
772 | * There is a theoretical bug here. Syncing the journal segment will |
773 | * write a number of journal entries and the final commit. All these |
774 | * are written in a single operation. If the device layer writes the |
775 | * data back-to-front, the commit will precede the other journal |
776 | * entries, leaving a race window. |
777 | * Two fixes are possible. Preferred is to fix the device layer to |
778 | * ensure writes happen front-to-back. Alternatively we can insert |
779 | * another logfs_sync_area() super->s_devops->sync() combo before |
780 | * writing the commit. |
781 | */ |
782 | /* |
783 | * On another subject, super->s_devops->sync is usually not necessary. |
784 | * Unless called from sys_sync or friends, a barrier would suffice. |
785 | */ |
786 | super->s_devops->sync(sb); |
787 | err = logfs_write_je(sb, logfs_write_commit); |
788 | if (err) |
789 | goto again; |
790 | log_journal("Write commit to %llx\n", |
791 | be64_to_cpu(super->s_je_array[super->s_no_je - 1])); |
792 | logfs_sync_area(area); |
793 | BUG_ON(area->a_used_bytes != area->a_written_bytes); |
794 | super->s_devops->sync(sb); |
795 | |
796 | mutex_unlock(&super->s_journal_mutex); |
797 | return; |
798 | } |
799 | |
800 | void do_logfs_journal_wl_pass(struct super_block *sb) |
801 | { |
802 | struct logfs_super *super = logfs_super(sb); |
803 | struct logfs_area *area = super->s_journal_area; |
804 | struct btree_head32 *head = &super->s_reserved_segments; |
805 | u32 segno, ec; |
806 | int i, err; |
807 | |
808 | log_journal("Journal requires wear-leveling.\n"); |
809 | /* Drop old segments */ |
810 | journal_for_each(i) |
811 | if (super->s_journal_seg[i]) { |
812 | btree_remove32(head, super->s_journal_seg[i]); |
813 | logfs_set_segment_unreserved(sb, |
814 | super->s_journal_seg[i], |
815 | super->s_journal_ec[i]); |
816 | super->s_journal_seg[i] = 0; |
817 | super->s_journal_ec[i] = 0; |
818 | } |
819 | /* Get new segments */ |
820 | for (i = 0; i < super->s_no_journal_segs; i++) { |
821 | segno = get_best_cand(sb, &super->s_reserve_list, &ec); |
822 | super->s_journal_seg[i] = segno; |
823 | super->s_journal_ec[i] = ec; |
824 | logfs_set_segment_reserved(sb, segno); |
825 | err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); |
826 | BUG_ON(err); /* mempool should prevent this */ |
827 | err = logfs_erase_segment(sb, segno, 1); |
828 | BUG_ON(err); /* FIXME: remount-ro would be nicer */ |
829 | } |
830 | /* Manually move journal_area */ |
831 | freeseg(sb, area->a_segno); |
832 | area->a_segno = super->s_journal_seg[0]; |
833 | area->a_is_open = 0; |
834 | area->a_used_bytes = 0; |
835 | /* Write journal */ |
836 | logfs_write_anchor(sb); |
837 | /* Write superblocks */ |
838 | err = logfs_write_sb(sb); |
839 | BUG_ON(err); |
840 | } |
841 | |
842 | static const struct logfs_area_ops journal_area_ops = { |
843 | .get_free_segment = journal_get_free_segment, |
844 | .get_erase_count = journal_get_erase_count, |
845 | .erase_segment = journal_erase_segment, |
846 | }; |
847 | |
848 | int logfs_init_journal(struct super_block *sb) |
849 | { |
850 | struct logfs_super *super = logfs_super(sb); |
851 | size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize) |
852 | + MAX_JOURNAL_HEADER; |
853 | int ret = -ENOMEM; |
854 | |
855 | mutex_init(&super->s_journal_mutex); |
856 | btree_init_mempool32(&super->s_reserved_segments, super->s_btree_pool); |
857 | |
858 | super->s_je = kzalloc(bufsize, GFP_KERNEL); |
859 | if (!super->s_je) |
860 | return ret; |
861 | |
862 | super->s_compressed_je = kzalloc(bufsize, GFP_KERNEL); |
863 | if (!super->s_compressed_je) |
864 | return ret; |
865 | |
866 | super->s_master_inode = logfs_new_meta_inode(sb, LOGFS_INO_MASTER); |
867 | if (IS_ERR(super->s_master_inode)) |
868 | return PTR_ERR(super->s_master_inode); |
869 | |
870 | ret = logfs_read_journal(sb); |
871 | if (ret) |
872 | return -EIO; |
873 | |
874 | reserve_sb_and_journal(sb); |
875 | logfs_calc_free(sb); |
876 | |
877 | super->s_journal_area->a_ops = &journal_area_ops; |
878 | return 0; |
879 | } |
880 | |
881 | void logfs_cleanup_journal(struct super_block *sb) |
882 | { |
883 | struct logfs_super *super = logfs_super(sb); |
884 | |
885 | btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); |
886 | destroy_meta_inode(super->s_master_inode); |
887 | super->s_master_inode = NULL; |
888 | |
889 | kfree(super->s_compressed_je); |
890 | kfree(super->s_je); |
891 | } |
892 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9