Root/
1 | /* |
2 | * This file is part of UBIFS. |
3 | * |
4 | * Copyright (C) 2006-2008 Nokia Corporation. |
5 | * Copyright (C) 2006, 2007 University of Szeged, Hungary |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License version 2 as published by |
9 | * the Free Software Foundation. |
10 | * |
11 | * This program is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
14 | * more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along with |
17 | * this program; if not, write to the Free Software Foundation, Inc., 51 |
18 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
19 | * |
20 | * Authors: Artem Bityutskiy (Битюцкий Артём) |
21 | * Adrian Hunter |
22 | * Zoltan Sogor |
23 | */ |
24 | |
25 | /* |
26 | * This file implements UBIFS I/O subsystem which provides various I/O-related |
27 | * helper functions (reading/writing/checking/validating nodes) and implements |
28 | * write-buffering support. Write buffers help to save space which otherwise |
29 | * would have been wasted for padding to the nearest minimal I/O unit boundary. |
30 | * Instead, data first goes to the write-buffer and is flushed when the |
31 | * buffer is full or when it is not used for some time (by timer). This is |
32 | * similar to the mechanism is used by JFFS2. |
33 | * |
34 | * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum |
35 | * write size (@c->max_write_size). The latter is the maximum amount of bytes |
36 | * the underlying flash is able to program at a time, and writing in |
37 | * @c->max_write_size units should presumably be faster. Obviously, |
38 | * @c->min_io_size <= @c->max_write_size. Write-buffers are of |
39 | * @c->max_write_size bytes in size for maximum performance. However, when a |
40 | * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size |
41 | * boundary) which contains data is written, not the whole write-buffer, |
42 | * because this is more space-efficient. |
43 | * |
44 | * This optimization adds few complications to the code. Indeed, on the one |
45 | * hand, we want to write in optimal @c->max_write_size bytes chunks, which |
46 | * also means aligning writes at the @c->max_write_size bytes offsets. On the |
47 | * other hand, we do not want to waste space when synchronizing the write |
48 | * buffer, so during synchronization we writes in smaller chunks. And this makes |
49 | * the next write offset to be not aligned to @c->max_write_size bytes. So the |
50 | * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned |
51 | * to @c->max_write_size bytes again. We do this by temporarily shrinking |
52 | * write-buffer size (@wbuf->size). |
53 | * |
54 | * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by |
55 | * mutexes defined inside these objects. Since sometimes upper-level code |
56 | * has to lock the write-buffer (e.g. journal space reservation code), many |
57 | * functions related to write-buffers have "nolock" suffix which means that the |
58 | * caller has to lock the write-buffer before calling this function. |
59 | * |
60 | * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not |
61 | * aligned, UBIFS starts the next node from the aligned address, and the padded |
62 | * bytes may contain any rubbish. In other words, UBIFS does not put padding |
63 | * bytes in those small gaps. Common headers of nodes store real node lengths, |
64 | * not aligned lengths. Indexing nodes also store real lengths in branches. |
65 | * |
66 | * UBIFS uses padding when it pads to the next min. I/O unit. In this case it |
67 | * uses padding nodes or padding bytes, if the padding node does not fit. |
68 | * |
69 | * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when |
70 | * they are read from the flash media. |
71 | */ |
72 | |
73 | #include <linux/crc32.h> |
74 | #include <linux/slab.h> |
75 | #include "ubifs.h" |
76 | |
77 | /** |
78 | * ubifs_ro_mode - switch UBIFS to read read-only mode. |
79 | * @c: UBIFS file-system description object |
80 | * @err: error code which is the reason of switching to R/O mode |
81 | */ |
82 | void ubifs_ro_mode(struct ubifs_info *c, int err) |
83 | { |
84 | if (!c->ro_error) { |
85 | c->ro_error = 1; |
86 | c->no_chk_data_crc = 0; |
87 | c->vfs_sb->s_flags |= MS_RDONLY; |
88 | ubifs_warn("switched to read-only mode, error %d", err); |
89 | dbg_dump_stack(); |
90 | } |
91 | } |
92 | |
93 | /** |
94 | * ubifs_check_node - check node. |
95 | * @c: UBIFS file-system description object |
96 | * @buf: node to check |
97 | * @lnum: logical eraseblock number |
98 | * @offs: offset within the logical eraseblock |
99 | * @quiet: print no messages |
100 | * @must_chk_crc: indicates whether to always check the CRC |
101 | * |
102 | * This function checks node magic number and CRC checksum. This function also |
103 | * validates node length to prevent UBIFS from becoming crazy when an attacker |
104 | * feeds it a file-system image with incorrect nodes. For example, too large |
105 | * node length in the common header could cause UBIFS to read memory outside of |
106 | * allocated buffer when checking the CRC checksum. |
107 | * |
108 | * This function may skip data nodes CRC checking if @c->no_chk_data_crc is |
109 | * true, which is controlled by corresponding UBIFS mount option. However, if |
110 | * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is |
111 | * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are |
112 | * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC |
113 | * is checked. This is because during mounting or re-mounting from R/O mode to |
114 | * R/W mode we may read journal nodes (when replying the journal or doing the |
115 | * recovery) and the journal nodes may potentially be corrupted, so checking is |
116 | * required. |
117 | * |
118 | * This function returns zero in case of success and %-EUCLEAN in case of bad |
119 | * CRC or magic. |
120 | */ |
121 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, |
122 | int offs, int quiet, int must_chk_crc) |
123 | { |
124 | int err = -EINVAL, type, node_len; |
125 | uint32_t crc, node_crc, magic; |
126 | const struct ubifs_ch *ch = buf; |
127 | |
128 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
129 | ubifs_assert(!(offs & 7) && offs < c->leb_size); |
130 | |
131 | magic = le32_to_cpu(ch->magic); |
132 | if (magic != UBIFS_NODE_MAGIC) { |
133 | if (!quiet) |
134 | ubifs_err("bad magic %#08x, expected %#08x", |
135 | magic, UBIFS_NODE_MAGIC); |
136 | err = -EUCLEAN; |
137 | goto out; |
138 | } |
139 | |
140 | type = ch->node_type; |
141 | if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { |
142 | if (!quiet) |
143 | ubifs_err("bad node type %d", type); |
144 | goto out; |
145 | } |
146 | |
147 | node_len = le32_to_cpu(ch->len); |
148 | if (node_len + offs > c->leb_size) |
149 | goto out_len; |
150 | |
151 | if (c->ranges[type].max_len == 0) { |
152 | if (node_len != c->ranges[type].len) |
153 | goto out_len; |
154 | } else if (node_len < c->ranges[type].min_len || |
155 | node_len > c->ranges[type].max_len) |
156 | goto out_len; |
157 | |
158 | if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && |
159 | !c->remounting_rw && c->no_chk_data_crc) |
160 | return 0; |
161 | |
162 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); |
163 | node_crc = le32_to_cpu(ch->crc); |
164 | if (crc != node_crc) { |
165 | if (!quiet) |
166 | ubifs_err("bad CRC: calculated %#08x, read %#08x", |
167 | crc, node_crc); |
168 | err = -EUCLEAN; |
169 | goto out; |
170 | } |
171 | |
172 | return 0; |
173 | |
174 | out_len: |
175 | if (!quiet) |
176 | ubifs_err("bad node length %d", node_len); |
177 | out: |
178 | if (!quiet) { |
179 | ubifs_err("bad node at LEB %d:%d", lnum, offs); |
180 | dbg_dump_node(c, buf); |
181 | dbg_dump_stack(); |
182 | } |
183 | return err; |
184 | } |
185 | |
186 | /** |
187 | * ubifs_pad - pad flash space. |
188 | * @c: UBIFS file-system description object |
189 | * @buf: buffer to put padding to |
190 | * @pad: how many bytes to pad |
191 | * |
192 | * The flash media obliges us to write only in chunks of %c->min_io_size and |
193 | * when we have to write less data we add padding node to the write-buffer and |
194 | * pad it to the next minimal I/O unit's boundary. Padding nodes help when the |
195 | * media is being scanned. If the amount of wasted space is not enough to fit a |
196 | * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes |
197 | * pattern (%UBIFS_PADDING_BYTE). |
198 | * |
199 | * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is |
200 | * used. |
201 | */ |
202 | void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) |
203 | { |
204 | uint32_t crc; |
205 | |
206 | ubifs_assert(pad >= 0 && !(pad & 7)); |
207 | |
208 | if (pad >= UBIFS_PAD_NODE_SZ) { |
209 | struct ubifs_ch *ch = buf; |
210 | struct ubifs_pad_node *pad_node = buf; |
211 | |
212 | ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); |
213 | ch->node_type = UBIFS_PAD_NODE; |
214 | ch->group_type = UBIFS_NO_NODE_GROUP; |
215 | ch->padding[0] = ch->padding[1] = 0; |
216 | ch->sqnum = 0; |
217 | ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); |
218 | pad -= UBIFS_PAD_NODE_SZ; |
219 | pad_node->pad_len = cpu_to_le32(pad); |
220 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); |
221 | ch->crc = cpu_to_le32(crc); |
222 | memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); |
223 | } else if (pad > 0) |
224 | /* Too little space, padding node won't fit */ |
225 | memset(buf, UBIFS_PADDING_BYTE, pad); |
226 | } |
227 | |
228 | /** |
229 | * next_sqnum - get next sequence number. |
230 | * @c: UBIFS file-system description object |
231 | */ |
232 | static unsigned long long next_sqnum(struct ubifs_info *c) |
233 | { |
234 | unsigned long long sqnum; |
235 | |
236 | spin_lock(&c->cnt_lock); |
237 | sqnum = ++c->max_sqnum; |
238 | spin_unlock(&c->cnt_lock); |
239 | |
240 | if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { |
241 | if (sqnum >= SQNUM_WATERMARK) { |
242 | ubifs_err("sequence number overflow %llu, end of life", |
243 | sqnum); |
244 | ubifs_ro_mode(c, -EINVAL); |
245 | } |
246 | ubifs_warn("running out of sequence numbers, end of life soon"); |
247 | } |
248 | |
249 | return sqnum; |
250 | } |
251 | |
252 | /** |
253 | * ubifs_prepare_node - prepare node to be written to flash. |
254 | * @c: UBIFS file-system description object |
255 | * @node: the node to pad |
256 | * @len: node length |
257 | * @pad: if the buffer has to be padded |
258 | * |
259 | * This function prepares node at @node to be written to the media - it |
260 | * calculates node CRC, fills the common header, and adds proper padding up to |
261 | * the next minimum I/O unit if @pad is not zero. |
262 | */ |
263 | void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) |
264 | { |
265 | uint32_t crc; |
266 | struct ubifs_ch *ch = node; |
267 | unsigned long long sqnum = next_sqnum(c); |
268 | |
269 | ubifs_assert(len >= UBIFS_CH_SZ); |
270 | |
271 | ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); |
272 | ch->len = cpu_to_le32(len); |
273 | ch->group_type = UBIFS_NO_NODE_GROUP; |
274 | ch->sqnum = cpu_to_le64(sqnum); |
275 | ch->padding[0] = ch->padding[1] = 0; |
276 | crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); |
277 | ch->crc = cpu_to_le32(crc); |
278 | |
279 | if (pad) { |
280 | len = ALIGN(len, 8); |
281 | pad = ALIGN(len, c->min_io_size) - len; |
282 | ubifs_pad(c, node + len, pad); |
283 | } |
284 | } |
285 | |
286 | /** |
287 | * ubifs_prep_grp_node - prepare node of a group to be written to flash. |
288 | * @c: UBIFS file-system description object |
289 | * @node: the node to pad |
290 | * @len: node length |
291 | * @last: indicates the last node of the group |
292 | * |
293 | * This function prepares node at @node to be written to the media - it |
294 | * calculates node CRC and fills the common header. |
295 | */ |
296 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) |
297 | { |
298 | uint32_t crc; |
299 | struct ubifs_ch *ch = node; |
300 | unsigned long long sqnum = next_sqnum(c); |
301 | |
302 | ubifs_assert(len >= UBIFS_CH_SZ); |
303 | |
304 | ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); |
305 | ch->len = cpu_to_le32(len); |
306 | if (last) |
307 | ch->group_type = UBIFS_LAST_OF_NODE_GROUP; |
308 | else |
309 | ch->group_type = UBIFS_IN_NODE_GROUP; |
310 | ch->sqnum = cpu_to_le64(sqnum); |
311 | ch->padding[0] = ch->padding[1] = 0; |
312 | crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); |
313 | ch->crc = cpu_to_le32(crc); |
314 | } |
315 | |
316 | /** |
317 | * wbuf_timer_callback - write-buffer timer callback function. |
318 | * @data: timer data (write-buffer descriptor) |
319 | * |
320 | * This function is called when the write-buffer timer expires. |
321 | */ |
322 | static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) |
323 | { |
324 | struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); |
325 | |
326 | dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); |
327 | wbuf->need_sync = 1; |
328 | wbuf->c->need_wbuf_sync = 1; |
329 | ubifs_wake_up_bgt(wbuf->c); |
330 | return HRTIMER_NORESTART; |
331 | } |
332 | |
333 | /** |
334 | * new_wbuf_timer - start new write-buffer timer. |
335 | * @wbuf: write-buffer descriptor |
336 | */ |
337 | static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) |
338 | { |
339 | ubifs_assert(!hrtimer_active(&wbuf->timer)); |
340 | |
341 | if (wbuf->no_timer) |
342 | return; |
343 | dbg_io("set timer for jhead %s, %llu-%llu millisecs", |
344 | dbg_jhead(wbuf->jhead), |
345 | div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), |
346 | div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, |
347 | USEC_PER_SEC)); |
348 | hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, |
349 | HRTIMER_MODE_REL); |
350 | } |
351 | |
352 | /** |
353 | * cancel_wbuf_timer - cancel write-buffer timer. |
354 | * @wbuf: write-buffer descriptor |
355 | */ |
356 | static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) |
357 | { |
358 | if (wbuf->no_timer) |
359 | return; |
360 | wbuf->need_sync = 0; |
361 | hrtimer_cancel(&wbuf->timer); |
362 | } |
363 | |
364 | /** |
365 | * ubifs_wbuf_sync_nolock - synchronize write-buffer. |
366 | * @wbuf: write-buffer to synchronize |
367 | * |
368 | * This function synchronizes write-buffer @buf and returns zero in case of |
369 | * success or a negative error code in case of failure. |
370 | * |
371 | * Note, although write-buffers are of @c->max_write_size, this function does |
372 | * not necessarily writes all @c->max_write_size bytes to the flash. Instead, |
373 | * if the write-buffer is only partially filled with data, only the used part |
374 | * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. |
375 | * This way we waste less space. |
376 | */ |
377 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) |
378 | { |
379 | struct ubifs_info *c = wbuf->c; |
380 | int err, dirt, sync_len; |
381 | |
382 | cancel_wbuf_timer_nolock(wbuf); |
383 | if (!wbuf->used || wbuf->lnum == -1) |
384 | /* Write-buffer is empty or not seeked */ |
385 | return 0; |
386 | |
387 | dbg_io("LEB %d:%d, %d bytes, jhead %s", |
388 | wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); |
389 | ubifs_assert(!(wbuf->avail & 7)); |
390 | ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); |
391 | ubifs_assert(wbuf->size >= c->min_io_size); |
392 | ubifs_assert(wbuf->size <= c->max_write_size); |
393 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
394 | ubifs_assert(!c->ro_media && !c->ro_mount); |
395 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
397 | |
398 | if (c->ro_error) |
399 | return -EROFS; |
400 | |
401 | /* |
402 | * Do not write whole write buffer but write only the minimum necessary |
403 | * amount of min. I/O units. |
404 | */ |
405 | sync_len = ALIGN(wbuf->used, c->min_io_size); |
406 | dirt = sync_len - wbuf->used; |
407 | if (dirt) |
408 | ubifs_pad(c, wbuf->buf + wbuf->used, dirt); |
409 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, |
410 | sync_len, wbuf->dtype); |
411 | if (err) { |
412 | ubifs_err("cannot write %d bytes to LEB %d:%d", |
413 | sync_len, wbuf->lnum, wbuf->offs); |
414 | dbg_dump_stack(); |
415 | return err; |
416 | } |
417 | |
418 | spin_lock(&wbuf->lock); |
419 | wbuf->offs += sync_len; |
420 | /* |
421 | * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. |
422 | * But our goal is to optimize writes and make sure we write in |
423 | * @c->max_write_size chunks and to @c->max_write_size-aligned offset. |
424 | * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make |
425 | * sure that @wbuf->offs + @wbuf->size is aligned to |
426 | * @c->max_write_size. This way we make sure that after next |
427 | * write-buffer flush we are again at the optimal offset (aligned to |
428 | * @c->max_write_size). |
429 | */ |
430 | if (c->leb_size - wbuf->offs < c->max_write_size) |
431 | wbuf->size = c->leb_size - wbuf->offs; |
432 | else if (wbuf->offs & (c->max_write_size - 1)) |
433 | wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; |
434 | else |
435 | wbuf->size = c->max_write_size; |
436 | wbuf->avail = wbuf->size; |
437 | wbuf->used = 0; |
438 | wbuf->next_ino = 0; |
439 | spin_unlock(&wbuf->lock); |
440 | |
441 | if (wbuf->sync_callback) |
442 | err = wbuf->sync_callback(c, wbuf->lnum, |
443 | c->leb_size - wbuf->offs, dirt); |
444 | return err; |
445 | } |
446 | |
447 | /** |
448 | * ubifs_wbuf_seek_nolock - seek write-buffer. |
449 | * @wbuf: write-buffer |
450 | * @lnum: logical eraseblock number to seek to |
451 | * @offs: logical eraseblock offset to seek to |
452 | * @dtype: data type |
453 | * |
454 | * This function targets the write-buffer to logical eraseblock @lnum:@offs. |
455 | * The write-buffer has to be empty. Returns zero in case of success and a |
456 | * negative error code in case of failure. |
457 | */ |
458 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, |
459 | int dtype) |
460 | { |
461 | const struct ubifs_info *c = wbuf->c; |
462 | |
463 | dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); |
464 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); |
465 | ubifs_assert(offs >= 0 && offs <= c->leb_size); |
466 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); |
467 | ubifs_assert(lnum != wbuf->lnum); |
468 | ubifs_assert(wbuf->used == 0); |
469 | |
470 | spin_lock(&wbuf->lock); |
471 | wbuf->lnum = lnum; |
472 | wbuf->offs = offs; |
473 | if (c->leb_size - wbuf->offs < c->max_write_size) |
474 | wbuf->size = c->leb_size - wbuf->offs; |
475 | else if (wbuf->offs & (c->max_write_size - 1)) |
476 | wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; |
477 | else |
478 | wbuf->size = c->max_write_size; |
479 | wbuf->avail = wbuf->size; |
480 | wbuf->used = 0; |
481 | spin_unlock(&wbuf->lock); |
482 | wbuf->dtype = dtype; |
483 | |
484 | return 0; |
485 | } |
486 | |
487 | /** |
488 | * ubifs_bg_wbufs_sync - synchronize write-buffers. |
489 | * @c: UBIFS file-system description object |
490 | * |
491 | * This function is called by background thread to synchronize write-buffers. |
492 | * Returns zero in case of success and a negative error code in case of |
493 | * failure. |
494 | */ |
495 | int ubifs_bg_wbufs_sync(struct ubifs_info *c) |
496 | { |
497 | int err, i; |
498 | |
499 | ubifs_assert(!c->ro_media && !c->ro_mount); |
500 | if (!c->need_wbuf_sync) |
501 | return 0; |
502 | c->need_wbuf_sync = 0; |
503 | |
504 | if (c->ro_error) { |
505 | err = -EROFS; |
506 | goto out_timers; |
507 | } |
508 | |
509 | dbg_io("synchronize"); |
510 | for (i = 0; i < c->jhead_cnt; i++) { |
511 | struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; |
512 | |
513 | cond_resched(); |
514 | |
515 | /* |
516 | * If the mutex is locked then wbuf is being changed, so |
517 | * synchronization is not necessary. |
518 | */ |
519 | if (mutex_is_locked(&wbuf->io_mutex)) |
520 | continue; |
521 | |
522 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
523 | if (!wbuf->need_sync) { |
524 | mutex_unlock(&wbuf->io_mutex); |
525 | continue; |
526 | } |
527 | |
528 | err = ubifs_wbuf_sync_nolock(wbuf); |
529 | mutex_unlock(&wbuf->io_mutex); |
530 | if (err) { |
531 | ubifs_err("cannot sync write-buffer, error %d", err); |
532 | ubifs_ro_mode(c, err); |
533 | goto out_timers; |
534 | } |
535 | } |
536 | |
537 | return 0; |
538 | |
539 | out_timers: |
540 | /* Cancel all timers to prevent repeated errors */ |
541 | for (i = 0; i < c->jhead_cnt; i++) { |
542 | struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; |
543 | |
544 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
545 | cancel_wbuf_timer_nolock(wbuf); |
546 | mutex_unlock(&wbuf->io_mutex); |
547 | } |
548 | return err; |
549 | } |
550 | |
551 | /** |
552 | * ubifs_wbuf_write_nolock - write data to flash via write-buffer. |
553 | * @wbuf: write-buffer |
554 | * @buf: node to write |
555 | * @len: node length |
556 | * |
557 | * This function writes data to flash via write-buffer @wbuf. This means that |
558 | * the last piece of the node won't reach the flash media immediately if it |
559 | * does not take whole max. write unit (@c->max_write_size). Instead, the node |
560 | * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or |
561 | * because more data are appended to the write-buffer). |
562 | * |
563 | * This function returns zero in case of success and a negative error code in |
564 | * case of failure. If the node cannot be written because there is no more |
565 | * space in this logical eraseblock, %-ENOSPC is returned. |
566 | */ |
567 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) |
568 | { |
569 | struct ubifs_info *c = wbuf->c; |
570 | int err, written, n, aligned_len = ALIGN(len, 8); |
571 | |
572 | dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, |
573 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), |
574 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); |
575 | ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); |
576 | ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); |
577 | ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); |
578 | ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); |
579 | ubifs_assert(wbuf->size >= c->min_io_size); |
580 | ubifs_assert(wbuf->size <= c->max_write_size); |
581 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
583 | ubifs_assert(!c->ro_media && !c->ro_mount); |
584 | ubifs_assert(!c->space_fixup); |
585 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
586 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
587 | |
588 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { |
589 | err = -ENOSPC; |
590 | goto out; |
591 | } |
592 | |
593 | cancel_wbuf_timer_nolock(wbuf); |
594 | |
595 | if (c->ro_error) |
596 | return -EROFS; |
597 | |
598 | if (aligned_len <= wbuf->avail) { |
599 | /* |
600 | * The node is not very large and fits entirely within |
601 | * write-buffer. |
602 | */ |
603 | memcpy(wbuf->buf + wbuf->used, buf, len); |
604 | |
605 | if (aligned_len == wbuf->avail) { |
606 | dbg_io("flush jhead %s wbuf to LEB %d:%d", |
607 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); |
608 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, |
609 | wbuf->offs, wbuf->size, |
610 | wbuf->dtype); |
611 | if (err) |
612 | goto out; |
613 | |
614 | spin_lock(&wbuf->lock); |
615 | wbuf->offs += wbuf->size; |
616 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
617 | wbuf->size = c->max_write_size; |
618 | else |
619 | wbuf->size = c->leb_size - wbuf->offs; |
620 | wbuf->avail = wbuf->size; |
621 | wbuf->used = 0; |
622 | wbuf->next_ino = 0; |
623 | spin_unlock(&wbuf->lock); |
624 | } else { |
625 | spin_lock(&wbuf->lock); |
626 | wbuf->avail -= aligned_len; |
627 | wbuf->used += aligned_len; |
628 | spin_unlock(&wbuf->lock); |
629 | } |
630 | |
631 | goto exit; |
632 | } |
633 | |
634 | written = 0; |
635 | |
636 | if (wbuf->used) { |
637 | /* |
638 | * The node is large enough and does not fit entirely within |
639 | * current available space. We have to fill and flush |
640 | * write-buffer and switch to the next max. write unit. |
641 | */ |
642 | dbg_io("flush jhead %s wbuf to LEB %d:%d", |
643 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); |
644 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); |
645 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, |
646 | wbuf->size, wbuf->dtype); |
647 | if (err) |
648 | goto out; |
649 | |
650 | wbuf->offs += wbuf->size; |
651 | len -= wbuf->avail; |
652 | aligned_len -= wbuf->avail; |
653 | written += wbuf->avail; |
654 | } else if (wbuf->offs & (c->max_write_size - 1)) { |
655 | /* |
656 | * The write-buffer offset is not aligned to |
657 | * @c->max_write_size and @wbuf->size is less than |
658 | * @c->max_write_size. Write @wbuf->size bytes to make sure the |
659 | * following writes are done in optimal @c->max_write_size |
660 | * chunks. |
661 | */ |
662 | dbg_io("write %d bytes to LEB %d:%d", |
663 | wbuf->size, wbuf->lnum, wbuf->offs); |
664 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, |
665 | wbuf->size, wbuf->dtype); |
666 | if (err) |
667 | goto out; |
668 | |
669 | wbuf->offs += wbuf->size; |
670 | len -= wbuf->size; |
671 | aligned_len -= wbuf->size; |
672 | written += wbuf->size; |
673 | } |
674 | |
675 | /* |
676 | * The remaining data may take more whole max. write units, so write the |
677 | * remains multiple to max. write unit size directly to the flash media. |
678 | * We align node length to 8-byte boundary because we anyway flash wbuf |
679 | * if the remaining space is less than 8 bytes. |
680 | */ |
681 | n = aligned_len >> c->max_write_shift; |
682 | if (n) { |
683 | n <<= c->max_write_shift; |
684 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, |
685 | wbuf->offs); |
686 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, |
687 | wbuf->offs, n, wbuf->dtype); |
688 | if (err) |
689 | goto out; |
690 | wbuf->offs += n; |
691 | aligned_len -= n; |
692 | len -= n; |
693 | written += n; |
694 | } |
695 | |
696 | spin_lock(&wbuf->lock); |
697 | if (aligned_len) |
698 | /* |
699 | * And now we have what's left and what does not take whole |
700 | * max. write unit, so write it to the write-buffer and we are |
701 | * done. |
702 | */ |
703 | memcpy(wbuf->buf, buf + written, len); |
704 | |
705 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
706 | wbuf->size = c->max_write_size; |
707 | else |
708 | wbuf->size = c->leb_size - wbuf->offs; |
709 | wbuf->avail = wbuf->size - aligned_len; |
710 | wbuf->used = aligned_len; |
711 | wbuf->next_ino = 0; |
712 | spin_unlock(&wbuf->lock); |
713 | |
714 | exit: |
715 | if (wbuf->sync_callback) { |
716 | int free = c->leb_size - wbuf->offs - wbuf->used; |
717 | |
718 | err = wbuf->sync_callback(c, wbuf->lnum, free, 0); |
719 | if (err) |
720 | goto out; |
721 | } |
722 | |
723 | if (wbuf->used) |
724 | new_wbuf_timer_nolock(wbuf); |
725 | |
726 | return 0; |
727 | |
728 | out: |
729 | ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", |
730 | len, wbuf->lnum, wbuf->offs, err); |
731 | dbg_dump_node(c, buf); |
732 | dbg_dump_stack(); |
733 | dbg_dump_leb(c, wbuf->lnum); |
734 | return err; |
735 | } |
736 | |
737 | /** |
738 | * ubifs_write_node - write node to the media. |
739 | * @c: UBIFS file-system description object |
740 | * @buf: the node to write |
741 | * @len: node length |
742 | * @lnum: logical eraseblock number |
743 | * @offs: offset within the logical eraseblock |
744 | * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) |
745 | * |
746 | * This function automatically fills node magic number, assigns sequence |
747 | * number, and calculates node CRC checksum. The length of the @buf buffer has |
748 | * to be aligned to the minimal I/O unit size. This function automatically |
749 | * appends padding node and padding bytes if needed. Returns zero in case of |
750 | * success and a negative error code in case of failure. |
751 | */ |
752 | int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, |
753 | int offs, int dtype) |
754 | { |
755 | int err, buf_len = ALIGN(len, c->min_io_size); |
756 | |
757 | dbg_io("LEB %d:%d, %s, length %d (aligned %d)", |
758 | lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, |
759 | buf_len); |
760 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
761 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); |
762 | ubifs_assert(!c->ro_media && !c->ro_mount); |
763 | ubifs_assert(!c->space_fixup); |
764 | |
765 | if (c->ro_error) |
766 | return -EROFS; |
767 | |
768 | ubifs_prepare_node(c, buf, len, 1); |
769 | err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); |
770 | if (err) { |
771 | ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", |
772 | buf_len, lnum, offs, err); |
773 | dbg_dump_node(c, buf); |
774 | dbg_dump_stack(); |
775 | } |
776 | |
777 | return err; |
778 | } |
779 | |
780 | /** |
781 | * ubifs_read_node_wbuf - read node from the media or write-buffer. |
782 | * @wbuf: wbuf to check for un-written data |
783 | * @buf: buffer to read to |
784 | * @type: node type |
785 | * @len: node length |
786 | * @lnum: logical eraseblock number |
787 | * @offs: offset within the logical eraseblock |
788 | * |
789 | * This function reads a node of known type and length, checks it and stores |
790 | * in @buf. If the node partially or fully sits in the write-buffer, this |
791 | * function takes data from the buffer, otherwise it reads the flash media. |
792 | * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative |
793 | * error code in case of failure. |
794 | */ |
795 | int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, |
796 | int lnum, int offs) |
797 | { |
798 | const struct ubifs_info *c = wbuf->c; |
799 | int err, rlen, overlap; |
800 | struct ubifs_ch *ch = buf; |
801 | |
802 | dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, |
803 | dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); |
804 | ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
805 | ubifs_assert(!(offs & 7) && offs < c->leb_size); |
806 | ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); |
807 | |
808 | spin_lock(&wbuf->lock); |
809 | overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); |
810 | if (!overlap) { |
811 | /* We may safely unlock the write-buffer and read the data */ |
812 | spin_unlock(&wbuf->lock); |
813 | return ubifs_read_node(c, buf, type, len, lnum, offs); |
814 | } |
815 | |
816 | /* Don't read under wbuf */ |
817 | rlen = wbuf->offs - offs; |
818 | if (rlen < 0) |
819 | rlen = 0; |
820 | |
821 | /* Copy the rest from the write-buffer */ |
822 | memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); |
823 | spin_unlock(&wbuf->lock); |
824 | |
825 | if (rlen > 0) { |
826 | /* Read everything that goes before write-buffer */ |
827 | err = ubi_read(c->ubi, lnum, buf, offs, rlen); |
828 | if (err && err != -EBADMSG) { |
829 | ubifs_err("failed to read node %d from LEB %d:%d, " |
830 | "error %d", type, lnum, offs, err); |
831 | dbg_dump_stack(); |
832 | return err; |
833 | } |
834 | } |
835 | |
836 | if (type != ch->node_type) { |
837 | ubifs_err("bad node type (%d but expected %d)", |
838 | ch->node_type, type); |
839 | goto out; |
840 | } |
841 | |
842 | err = ubifs_check_node(c, buf, lnum, offs, 0, 0); |
843 | if (err) { |
844 | ubifs_err("expected node type %d", type); |
845 | return err; |
846 | } |
847 | |
848 | rlen = le32_to_cpu(ch->len); |
849 | if (rlen != len) { |
850 | ubifs_err("bad node length %d, expected %d", rlen, len); |
851 | goto out; |
852 | } |
853 | |
854 | return 0; |
855 | |
856 | out: |
857 | ubifs_err("bad node at LEB %d:%d", lnum, offs); |
858 | dbg_dump_node(c, buf); |
859 | dbg_dump_stack(); |
860 | return -EINVAL; |
861 | } |
862 | |
863 | /** |
864 | * ubifs_read_node - read node. |
865 | * @c: UBIFS file-system description object |
866 | * @buf: buffer to read to |
867 | * @type: node type |
868 | * @len: node length (not aligned) |
869 | * @lnum: logical eraseblock number |
870 | * @offs: offset within the logical eraseblock |
871 | * |
872 | * This function reads a node of known type and and length, checks it and |
873 | * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched |
874 | * and a negative error code in case of failure. |
875 | */ |
876 | int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, |
877 | int lnum, int offs) |
878 | { |
879 | int err, l; |
880 | struct ubifs_ch *ch = buf; |
881 | |
882 | dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); |
883 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
884 | ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); |
885 | ubifs_assert(!(offs & 7) && offs < c->leb_size); |
886 | ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); |
887 | |
888 | err = ubi_read(c->ubi, lnum, buf, offs, len); |
889 | if (err && err != -EBADMSG) { |
890 | ubifs_err("cannot read node %d from LEB %d:%d, error %d", |
891 | type, lnum, offs, err); |
892 | return err; |
893 | } |
894 | |
895 | if (type != ch->node_type) { |
896 | ubifs_err("bad node type (%d but expected %d)", |
897 | ch->node_type, type); |
898 | goto out; |
899 | } |
900 | |
901 | err = ubifs_check_node(c, buf, lnum, offs, 0, 0); |
902 | if (err) { |
903 | ubifs_err("expected node type %d", type); |
904 | return err; |
905 | } |
906 | |
907 | l = le32_to_cpu(ch->len); |
908 | if (l != len) { |
909 | ubifs_err("bad node length %d, expected %d", l, len); |
910 | goto out; |
911 | } |
912 | |
913 | return 0; |
914 | |
915 | out: |
916 | ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, |
917 | ubi_is_mapped(c->ubi, lnum)); |
918 | dbg_dump_node(c, buf); |
919 | dbg_dump_stack(); |
920 | return -EINVAL; |
921 | } |
922 | |
923 | /** |
924 | * ubifs_wbuf_init - initialize write-buffer. |
925 | * @c: UBIFS file-system description object |
926 | * @wbuf: write-buffer to initialize |
927 | * |
928 | * This function initializes write-buffer. Returns zero in case of success |
929 | * %-ENOMEM in case of failure. |
930 | */ |
931 | int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) |
932 | { |
933 | size_t size; |
934 | |
935 | wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); |
936 | if (!wbuf->buf) |
937 | return -ENOMEM; |
938 | |
939 | size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); |
940 | wbuf->inodes = kmalloc(size, GFP_KERNEL); |
941 | if (!wbuf->inodes) { |
942 | kfree(wbuf->buf); |
943 | wbuf->buf = NULL; |
944 | return -ENOMEM; |
945 | } |
946 | |
947 | wbuf->used = 0; |
948 | wbuf->lnum = wbuf->offs = -1; |
949 | /* |
950 | * If the LEB starts at the max. write size aligned address, then |
951 | * write-buffer size has to be set to @c->max_write_size. Otherwise, |
952 | * set it to something smaller so that it ends at the closest max. |
953 | * write size boundary. |
954 | */ |
955 | size = c->max_write_size - (c->leb_start % c->max_write_size); |
956 | wbuf->avail = wbuf->size = size; |
957 | wbuf->dtype = UBI_UNKNOWN; |
958 | wbuf->sync_callback = NULL; |
959 | mutex_init(&wbuf->io_mutex); |
960 | spin_lock_init(&wbuf->lock); |
961 | wbuf->c = c; |
962 | wbuf->next_ino = 0; |
963 | |
964 | hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
965 | wbuf->timer.function = wbuf_timer_callback_nolock; |
966 | wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); |
967 | wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; |
968 | wbuf->delta *= 1000000000ULL; |
969 | ubifs_assert(wbuf->delta <= ULONG_MAX); |
970 | return 0; |
971 | } |
972 | |
973 | /** |
974 | * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. |
975 | * @wbuf: the write-buffer where to add |
976 | * @inum: the inode number |
977 | * |
978 | * This function adds an inode number to the inode array of the write-buffer. |
979 | */ |
980 | void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) |
981 | { |
982 | if (!wbuf->buf) |
983 | /* NOR flash or something similar */ |
984 | return; |
985 | |
986 | spin_lock(&wbuf->lock); |
987 | if (wbuf->used) |
988 | wbuf->inodes[wbuf->next_ino++] = inum; |
989 | spin_unlock(&wbuf->lock); |
990 | } |
991 | |
992 | /** |
993 | * wbuf_has_ino - returns if the wbuf contains data from the inode. |
994 | * @wbuf: the write-buffer |
995 | * @inum: the inode number |
996 | * |
997 | * This function returns with %1 if the write-buffer contains some data from the |
998 | * given inode otherwise it returns with %0. |
999 | */ |
1000 | static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) |
1001 | { |
1002 | int i, ret = 0; |
1003 | |
1004 | spin_lock(&wbuf->lock); |
1005 | for (i = 0; i < wbuf->next_ino; i++) |
1006 | if (inum == wbuf->inodes[i]) { |
1007 | ret = 1; |
1008 | break; |
1009 | } |
1010 | spin_unlock(&wbuf->lock); |
1011 | |
1012 | return ret; |
1013 | } |
1014 | |
1015 | /** |
1016 | * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. |
1017 | * @c: UBIFS file-system description object |
1018 | * @inode: inode to synchronize |
1019 | * |
1020 | * This function synchronizes write-buffers which contain nodes belonging to |
1021 | * @inode. Returns zero in case of success and a negative error code in case of |
1022 | * failure. |
1023 | */ |
1024 | int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) |
1025 | { |
1026 | int i, err = 0; |
1027 | |
1028 | for (i = 0; i < c->jhead_cnt; i++) { |
1029 | struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; |
1030 | |
1031 | if (i == GCHD) |
1032 | /* |
1033 | * GC head is special, do not look at it. Even if the |
1034 | * head contains something related to this inode, it is |
1035 | * a _copy_ of corresponding on-flash node which sits |
1036 | * somewhere else. |
1037 | */ |
1038 | continue; |
1039 | |
1040 | if (!wbuf_has_ino(wbuf, inode->i_ino)) |
1041 | continue; |
1042 | |
1043 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
1044 | if (wbuf_has_ino(wbuf, inode->i_ino)) |
1045 | err = ubifs_wbuf_sync_nolock(wbuf); |
1046 | mutex_unlock(&wbuf->io_mutex); |
1047 | |
1048 | if (err) { |
1049 | ubifs_ro_mode(c, err); |
1050 | return err; |
1051 | } |
1052 | } |
1053 | return 0; |
1054 | } |
1055 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9