Root/
1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | #include "xfs.h" |
19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" |
21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" |
26 | #include "xfs_dir2.h" |
27 | #include "xfs_alloc.h" |
28 | #include "xfs_dmapi.h" |
29 | #include "xfs_quota.h" |
30 | #include "xfs_mount.h" |
31 | #include "xfs_bmap_btree.h" |
32 | #include "xfs_alloc_btree.h" |
33 | #include "xfs_ialloc_btree.h" |
34 | #include "xfs_dir2_sf.h" |
35 | #include "xfs_attr_sf.h" |
36 | #include "xfs_dinode.h" |
37 | #include "xfs_inode.h" |
38 | #include "xfs_ialloc.h" |
39 | #include "xfs_btree.h" |
40 | #include "xfs_bmap.h" |
41 | #include "xfs_rtalloc.h" |
42 | #include "xfs_error.h" |
43 | #include "xfs_itable.h" |
44 | #include "xfs_rw.h" |
45 | #include "xfs_attr.h" |
46 | #include "xfs_buf_item.h" |
47 | #include "xfs_trans_space.h" |
48 | #include "xfs_utils.h" |
49 | #include "xfs_iomap.h" |
50 | |
51 | #if defined(XFS_RW_TRACE) |
52 | void |
53 | xfs_iomap_enter_trace( |
54 | int tag, |
55 | xfs_inode_t *ip, |
56 | xfs_off_t offset, |
57 | ssize_t count) |
58 | { |
59 | if (!ip->i_rwtrace) |
60 | return; |
61 | |
62 | ktrace_enter(ip->i_rwtrace, |
63 | (void *)((unsigned long)tag), |
64 | (void *)ip, |
65 | (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), |
66 | (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), |
67 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), |
68 | (void *)((unsigned long)(offset & 0xffffffff)), |
69 | (void *)((unsigned long)count), |
70 | (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), |
71 | (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), |
72 | (void *)((unsigned long)current_pid()), |
73 | (void *)NULL, |
74 | (void *)NULL, |
75 | (void *)NULL, |
76 | (void *)NULL, |
77 | (void *)NULL, |
78 | (void *)NULL); |
79 | } |
80 | |
81 | void |
82 | xfs_iomap_map_trace( |
83 | int tag, |
84 | xfs_inode_t *ip, |
85 | xfs_off_t offset, |
86 | ssize_t count, |
87 | xfs_iomap_t *iomapp, |
88 | xfs_bmbt_irec_t *imapp, |
89 | int flags) |
90 | { |
91 | if (!ip->i_rwtrace) |
92 | return; |
93 | |
94 | ktrace_enter(ip->i_rwtrace, |
95 | (void *)((unsigned long)tag), |
96 | (void *)ip, |
97 | (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), |
98 | (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), |
99 | (void *)((unsigned long)((offset >> 32) & 0xffffffff)), |
100 | (void *)((unsigned long)(offset & 0xffffffff)), |
101 | (void *)((unsigned long)count), |
102 | (void *)((unsigned long)flags), |
103 | (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)), |
104 | (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)), |
105 | (void *)((unsigned long)(iomapp->iomap_delta)), |
106 | (void *)((unsigned long)(iomapp->iomap_bsize)), |
107 | (void *)((unsigned long)(iomapp->iomap_bn)), |
108 | (void *)(__psint_t)(imapp->br_startoff), |
109 | (void *)((unsigned long)(imapp->br_blockcount)), |
110 | (void *)(__psint_t)(imapp->br_startblock)); |
111 | } |
112 | #else |
113 | #define xfs_iomap_enter_trace(tag, io, offset, count) |
114 | #define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags) |
115 | #endif |
116 | |
117 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
118 | << mp->m_writeio_log) |
119 | #define XFS_STRAT_WRITE_IMAPS 2 |
120 | #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP |
121 | |
122 | STATIC int |
123 | xfs_imap_to_bmap( |
124 | xfs_inode_t *ip, |
125 | xfs_off_t offset, |
126 | xfs_bmbt_irec_t *imap, |
127 | xfs_iomap_t *iomapp, |
128 | int imaps, /* Number of imap entries */ |
129 | int iomaps, /* Number of iomap entries */ |
130 | int flags) |
131 | { |
132 | xfs_mount_t *mp = ip->i_mount; |
133 | int pbm; |
134 | xfs_fsblock_t start_block; |
135 | |
136 | |
137 | for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) { |
138 | iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); |
139 | iomapp->iomap_delta = offset - iomapp->iomap_offset; |
140 | iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); |
141 | iomapp->iomap_flags = flags; |
142 | |
143 | if (XFS_IS_REALTIME_INODE(ip)) { |
144 | iomapp->iomap_flags |= IOMAP_REALTIME; |
145 | iomapp->iomap_target = mp->m_rtdev_targp; |
146 | } else { |
147 | iomapp->iomap_target = mp->m_ddev_targp; |
148 | } |
149 | start_block = imap->br_startblock; |
150 | if (start_block == HOLESTARTBLOCK) { |
151 | iomapp->iomap_bn = IOMAP_DADDR_NULL; |
152 | iomapp->iomap_flags |= IOMAP_HOLE; |
153 | } else if (start_block == DELAYSTARTBLOCK) { |
154 | iomapp->iomap_bn = IOMAP_DADDR_NULL; |
155 | iomapp->iomap_flags |= IOMAP_DELAY; |
156 | } else { |
157 | iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block); |
158 | if (ISUNWRITTEN(imap)) |
159 | iomapp->iomap_flags |= IOMAP_UNWRITTEN; |
160 | } |
161 | |
162 | offset += iomapp->iomap_bsize - iomapp->iomap_delta; |
163 | } |
164 | return pbm; /* Return the number filled */ |
165 | } |
166 | |
167 | int |
168 | xfs_iomap( |
169 | xfs_inode_t *ip, |
170 | xfs_off_t offset, |
171 | ssize_t count, |
172 | int flags, |
173 | xfs_iomap_t *iomapp, |
174 | int *niomaps) |
175 | { |
176 | xfs_mount_t *mp = ip->i_mount; |
177 | xfs_fileoff_t offset_fsb, end_fsb; |
178 | int error = 0; |
179 | int lockmode = 0; |
180 | xfs_bmbt_irec_t imap; |
181 | int nimaps = 1; |
182 | int bmapi_flags = 0; |
183 | int iomap_flags = 0; |
184 | |
185 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); |
186 | |
187 | if (XFS_FORCED_SHUTDOWN(mp)) |
188 | return XFS_ERROR(EIO); |
189 | |
190 | switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { |
191 | case BMAPI_READ: |
192 | xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count); |
193 | lockmode = xfs_ilock_map_shared(ip); |
194 | bmapi_flags = XFS_BMAPI_ENTIRE; |
195 | break; |
196 | case BMAPI_WRITE: |
197 | xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); |
198 | lockmode = XFS_ILOCK_EXCL; |
199 | if (flags & BMAPI_IGNSTATE) |
200 | bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; |
201 | xfs_ilock(ip, lockmode); |
202 | break; |
203 | case BMAPI_ALLOCATE: |
204 | xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); |
205 | lockmode = XFS_ILOCK_SHARED; |
206 | bmapi_flags = XFS_BMAPI_ENTIRE; |
207 | |
208 | /* Attempt non-blocking lock */ |
209 | if (flags & BMAPI_TRYLOCK) { |
210 | if (!xfs_ilock_nowait(ip, lockmode)) |
211 | return XFS_ERROR(EAGAIN); |
212 | } else { |
213 | xfs_ilock(ip, lockmode); |
214 | } |
215 | break; |
216 | default: |
217 | BUG(); |
218 | } |
219 | |
220 | ASSERT(offset <= mp->m_maxioffset); |
221 | if ((xfs_fsize_t)offset + count > mp->m_maxioffset) |
222 | count = mp->m_maxioffset - offset; |
223 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); |
224 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
225 | |
226 | error = xfs_bmapi(NULL, ip, offset_fsb, |
227 | (xfs_filblks_t)(end_fsb - offset_fsb), |
228 | bmapi_flags, NULL, 0, &imap, |
229 | &nimaps, NULL, NULL); |
230 | |
231 | if (error) |
232 | goto out; |
233 | |
234 | switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { |
235 | case BMAPI_WRITE: |
236 | /* If we found an extent, return it */ |
237 | if (nimaps && |
238 | (imap.br_startblock != HOLESTARTBLOCK) && |
239 | (imap.br_startblock != DELAYSTARTBLOCK)) { |
240 | xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, |
241 | offset, count, iomapp, &imap, flags); |
242 | break; |
243 | } |
244 | |
245 | if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { |
246 | error = xfs_iomap_write_direct(ip, offset, count, flags, |
247 | &imap, &nimaps, nimaps); |
248 | } else { |
249 | error = xfs_iomap_write_delay(ip, offset, count, flags, |
250 | &imap, &nimaps); |
251 | } |
252 | if (!error) { |
253 | xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, |
254 | offset, count, iomapp, &imap, flags); |
255 | } |
256 | iomap_flags = IOMAP_NEW; |
257 | break; |
258 | case BMAPI_ALLOCATE: |
259 | /* If we found an extent, return it */ |
260 | xfs_iunlock(ip, lockmode); |
261 | lockmode = 0; |
262 | |
263 | if (nimaps && !isnullstartblock(imap.br_startblock)) { |
264 | xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, |
265 | offset, count, iomapp, &imap, flags); |
266 | break; |
267 | } |
268 | |
269 | error = xfs_iomap_write_allocate(ip, offset, count, |
270 | &imap, &nimaps); |
271 | break; |
272 | } |
273 | |
274 | if (nimaps) { |
275 | *niomaps = xfs_imap_to_bmap(ip, offset, &imap, |
276 | iomapp, nimaps, *niomaps, iomap_flags); |
277 | } else if (niomaps) { |
278 | *niomaps = 0; |
279 | } |
280 | |
281 | out: |
282 | if (lockmode) |
283 | xfs_iunlock(ip, lockmode); |
284 | return XFS_ERROR(error); |
285 | } |
286 | |
287 | |
288 | STATIC int |
289 | xfs_iomap_eof_align_last_fsb( |
290 | xfs_mount_t *mp, |
291 | xfs_inode_t *ip, |
292 | xfs_extlen_t extsize, |
293 | xfs_fileoff_t *last_fsb) |
294 | { |
295 | xfs_fileoff_t new_last_fsb = 0; |
296 | xfs_extlen_t align; |
297 | int eof, error; |
298 | |
299 | if (XFS_IS_REALTIME_INODE(ip)) |
300 | ; |
301 | /* |
302 | * If mounted with the "-o swalloc" option, roundup the allocation |
303 | * request to a stripe width boundary if the file size is >= |
304 | * stripe width and we are allocating past the allocation eof. |
305 | */ |
306 | else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && |
307 | (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth))) |
308 | new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); |
309 | /* |
310 | * Roundup the allocation request to a stripe unit (m_dalign) boundary |
311 | * if the file size is >= stripe unit size, and we are allocating past |
312 | * the allocation eof. |
313 | */ |
314 | else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign))) |
315 | new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); |
316 | |
317 | /* |
318 | * Always round up the allocation request to an extent boundary |
319 | * (when file on a real-time subvolume or has di_extsize hint). |
320 | */ |
321 | if (extsize) { |
322 | if (new_last_fsb) |
323 | align = roundup_64(new_last_fsb, extsize); |
324 | else |
325 | align = extsize; |
326 | new_last_fsb = roundup_64(*last_fsb, align); |
327 | } |
328 | |
329 | if (new_last_fsb) { |
330 | error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); |
331 | if (error) |
332 | return error; |
333 | if (eof) |
334 | *last_fsb = new_last_fsb; |
335 | } |
336 | return 0; |
337 | } |
338 | |
339 | STATIC int |
340 | xfs_cmn_err_fsblock_zero( |
341 | xfs_inode_t *ip, |
342 | xfs_bmbt_irec_t *imap) |
343 | { |
344 | xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, |
345 | "Access to block zero in inode %llu " |
346 | "start_block: %llx start_off: %llx " |
347 | "blkcnt: %llx extent-state: %x\n", |
348 | (unsigned long long)ip->i_ino, |
349 | (unsigned long long)imap->br_startblock, |
350 | (unsigned long long)imap->br_startoff, |
351 | (unsigned long long)imap->br_blockcount, |
352 | imap->br_state); |
353 | return EFSCORRUPTED; |
354 | } |
355 | |
356 | int |
357 | xfs_iomap_write_direct( |
358 | xfs_inode_t *ip, |
359 | xfs_off_t offset, |
360 | size_t count, |
361 | int flags, |
362 | xfs_bmbt_irec_t *ret_imap, |
363 | int *nmaps, |
364 | int found) |
365 | { |
366 | xfs_mount_t *mp = ip->i_mount; |
367 | xfs_fileoff_t offset_fsb; |
368 | xfs_fileoff_t last_fsb; |
369 | xfs_filblks_t count_fsb, resaligned; |
370 | xfs_fsblock_t firstfsb; |
371 | xfs_extlen_t extsz, temp; |
372 | int nimaps; |
373 | int bmapi_flag; |
374 | int quota_flag; |
375 | int rt; |
376 | xfs_trans_t *tp; |
377 | xfs_bmbt_irec_t imap; |
378 | xfs_bmap_free_t free_list; |
379 | uint qblocks, resblks, resrtextents; |
380 | int committed; |
381 | int error; |
382 | |
383 | /* |
384 | * Make sure that the dquots are there. This doesn't hold |
385 | * the ilock across a disk read. |
386 | */ |
387 | error = xfs_qm_dqattach_locked(ip, 0); |
388 | if (error) |
389 | return XFS_ERROR(error); |
390 | |
391 | rt = XFS_IS_REALTIME_INODE(ip); |
392 | extsz = xfs_get_extsz_hint(ip); |
393 | |
394 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
395 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
396 | if ((offset + count) > ip->i_size) { |
397 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); |
398 | if (error) |
399 | goto error_out; |
400 | } else { |
401 | if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) |
402 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
403 | ret_imap->br_blockcount + |
404 | ret_imap->br_startoff); |
405 | } |
406 | count_fsb = last_fsb - offset_fsb; |
407 | ASSERT(count_fsb > 0); |
408 | |
409 | resaligned = count_fsb; |
410 | if (unlikely(extsz)) { |
411 | if ((temp = do_mod(offset_fsb, extsz))) |
412 | resaligned += temp; |
413 | if ((temp = do_mod(resaligned, extsz))) |
414 | resaligned += extsz - temp; |
415 | } |
416 | |
417 | if (unlikely(rt)) { |
418 | resrtextents = qblocks = resaligned; |
419 | resrtextents /= mp->m_sb.sb_rextsize; |
420 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); |
421 | quota_flag = XFS_QMOPT_RES_RTBLKS; |
422 | } else { |
423 | resrtextents = 0; |
424 | resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); |
425 | quota_flag = XFS_QMOPT_RES_REGBLKS; |
426 | } |
427 | |
428 | /* |
429 | * Allocate and setup the transaction |
430 | */ |
431 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
432 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
433 | error = xfs_trans_reserve(tp, resblks, |
434 | XFS_WRITE_LOG_RES(mp), resrtextents, |
435 | XFS_TRANS_PERM_LOG_RES, |
436 | XFS_WRITE_LOG_COUNT); |
437 | /* |
438 | * Check for running out of space, note: need lock to return |
439 | */ |
440 | if (error) |
441 | xfs_trans_cancel(tp, 0); |
442 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
443 | if (error) |
444 | goto error_out; |
445 | |
446 | error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); |
447 | if (error) |
448 | goto error1; |
449 | |
450 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
451 | xfs_trans_ihold(tp, ip); |
452 | |
453 | bmapi_flag = XFS_BMAPI_WRITE; |
454 | if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) |
455 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
456 | |
457 | /* |
458 | * Issue the xfs_bmapi() call to allocate the blocks |
459 | */ |
460 | xfs_bmap_init(&free_list, &firstfsb); |
461 | nimaps = 1; |
462 | error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, |
463 | &firstfsb, 0, &imap, &nimaps, &free_list, NULL); |
464 | if (error) |
465 | goto error0; |
466 | |
467 | /* |
468 | * Complete the transaction |
469 | */ |
470 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
471 | if (error) |
472 | goto error0; |
473 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
474 | if (error) |
475 | goto error_out; |
476 | |
477 | /* |
478 | * Copy any maps to caller's array and return any error. |
479 | */ |
480 | if (nimaps == 0) { |
481 | error = ENOSPC; |
482 | goto error_out; |
483 | } |
484 | |
485 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) { |
486 | error = xfs_cmn_err_fsblock_zero(ip, &imap); |
487 | goto error_out; |
488 | } |
489 | |
490 | *ret_imap = imap; |
491 | *nmaps = 1; |
492 | return 0; |
493 | |
494 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
495 | xfs_bmap_cancel(&free_list); |
496 | xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); |
497 | |
498 | error1: /* Just cancel transaction */ |
499 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
500 | *nmaps = 0; /* nothing set-up here */ |
501 | |
502 | error_out: |
503 | return XFS_ERROR(error); |
504 | } |
505 | |
506 | /* |
507 | * If the caller is doing a write at the end of the file, then extend the |
508 | * allocation out to the file system's write iosize. We clean up any extra |
509 | * space left over when the file is closed in xfs_inactive(). |
510 | */ |
511 | STATIC int |
512 | xfs_iomap_eof_want_preallocate( |
513 | xfs_mount_t *mp, |
514 | xfs_inode_t *ip, |
515 | xfs_off_t offset, |
516 | size_t count, |
517 | int ioflag, |
518 | xfs_bmbt_irec_t *imap, |
519 | int nimaps, |
520 | int *prealloc) |
521 | { |
522 | xfs_fileoff_t start_fsb; |
523 | xfs_filblks_t count_fsb; |
524 | xfs_fsblock_t firstblock; |
525 | int n, error, imaps; |
526 | |
527 | *prealloc = 0; |
528 | if ((offset + count) <= ip->i_size) |
529 | return 0; |
530 | |
531 | /* |
532 | * If there are any real blocks past eof, then don't |
533 | * do any speculative allocation. |
534 | */ |
535 | start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); |
536 | count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); |
537 | while (count_fsb > 0) { |
538 | imaps = nimaps; |
539 | firstblock = NULLFSBLOCK; |
540 | error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0, |
541 | &firstblock, 0, imap, &imaps, NULL, NULL); |
542 | if (error) |
543 | return error; |
544 | for (n = 0; n < imaps; n++) { |
545 | if ((imap[n].br_startblock != HOLESTARTBLOCK) && |
546 | (imap[n].br_startblock != DELAYSTARTBLOCK)) |
547 | return 0; |
548 | start_fsb += imap[n].br_blockcount; |
549 | count_fsb -= imap[n].br_blockcount; |
550 | } |
551 | } |
552 | *prealloc = 1; |
553 | return 0; |
554 | } |
555 | |
556 | int |
557 | xfs_iomap_write_delay( |
558 | xfs_inode_t *ip, |
559 | xfs_off_t offset, |
560 | size_t count, |
561 | int ioflag, |
562 | xfs_bmbt_irec_t *ret_imap, |
563 | int *nmaps) |
564 | { |
565 | xfs_mount_t *mp = ip->i_mount; |
566 | xfs_fileoff_t offset_fsb; |
567 | xfs_fileoff_t last_fsb; |
568 | xfs_off_t aligned_offset; |
569 | xfs_fileoff_t ioalign; |
570 | xfs_fsblock_t firstblock; |
571 | xfs_extlen_t extsz; |
572 | int nimaps; |
573 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; |
574 | int prealloc, flushed = 0; |
575 | int error; |
576 | |
577 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
578 | |
579 | /* |
580 | * Make sure that the dquots are there. This doesn't hold |
581 | * the ilock across a disk read. |
582 | */ |
583 | error = xfs_qm_dqattach_locked(ip, 0); |
584 | if (error) |
585 | return XFS_ERROR(error); |
586 | |
587 | extsz = xfs_get_extsz_hint(ip); |
588 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
589 | |
590 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, |
591 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); |
592 | if (error) |
593 | return error; |
594 | |
595 | retry: |
596 | if (prealloc) { |
597 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); |
598 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); |
599 | last_fsb = ioalign + mp->m_writeio_blocks; |
600 | } else { |
601 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
602 | } |
603 | |
604 | if (prealloc || extsz) { |
605 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); |
606 | if (error) |
607 | return error; |
608 | } |
609 | |
610 | nimaps = XFS_WRITE_IMAPS; |
611 | firstblock = NULLFSBLOCK; |
612 | error = xfs_bmapi(NULL, ip, offset_fsb, |
613 | (xfs_filblks_t)(last_fsb - offset_fsb), |
614 | XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | |
615 | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, |
616 | &nimaps, NULL, NULL); |
617 | if (error && (error != ENOSPC)) |
618 | return XFS_ERROR(error); |
619 | |
620 | /* |
621 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, |
622 | * then we must have run out of space - flush all other inodes with |
623 | * delalloc blocks and retry without EOF preallocation. |
624 | */ |
625 | if (nimaps == 0) { |
626 | xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, |
627 | ip, offset, count); |
628 | if (flushed) |
629 | return XFS_ERROR(ENOSPC); |
630 | |
631 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
632 | xfs_flush_inodes(ip); |
633 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
634 | |
635 | flushed = 1; |
636 | error = 0; |
637 | prealloc = 0; |
638 | goto retry; |
639 | } |
640 | |
641 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) |
642 | return xfs_cmn_err_fsblock_zero(ip, &imap[0]); |
643 | |
644 | *ret_imap = imap[0]; |
645 | *nmaps = 1; |
646 | |
647 | return 0; |
648 | } |
649 | |
650 | /* |
651 | * Pass in a delayed allocate extent, convert it to real extents; |
652 | * return to the caller the extent we create which maps on top of |
653 | * the originating callers request. |
654 | * |
655 | * Called without a lock on the inode. |
656 | * |
657 | * We no longer bother to look at the incoming map - all we have to |
658 | * guarantee is that whatever we allocate fills the required range. |
659 | */ |
660 | int |
661 | xfs_iomap_write_allocate( |
662 | xfs_inode_t *ip, |
663 | xfs_off_t offset, |
664 | size_t count, |
665 | xfs_bmbt_irec_t *map, |
666 | int *retmap) |
667 | { |
668 | xfs_mount_t *mp = ip->i_mount; |
669 | xfs_fileoff_t offset_fsb, last_block; |
670 | xfs_fileoff_t end_fsb, map_start_fsb; |
671 | xfs_fsblock_t first_block; |
672 | xfs_bmap_free_t free_list; |
673 | xfs_filblks_t count_fsb; |
674 | xfs_bmbt_irec_t imap; |
675 | xfs_trans_t *tp; |
676 | int nimaps, committed; |
677 | int error = 0; |
678 | int nres; |
679 | |
680 | *retmap = 0; |
681 | |
682 | /* |
683 | * Make sure that the dquots are there. |
684 | */ |
685 | error = xfs_qm_dqattach(ip, 0); |
686 | if (error) |
687 | return XFS_ERROR(error); |
688 | |
689 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
690 | count_fsb = map->br_blockcount; |
691 | map_start_fsb = map->br_startoff; |
692 | |
693 | XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); |
694 | |
695 | while (count_fsb != 0) { |
696 | /* |
697 | * Set up a transaction with which to allocate the |
698 | * backing store for the file. Do allocations in a |
699 | * loop until we get some space in the range we are |
700 | * interested in. The other space that might be allocated |
701 | * is in the delayed allocation extent on which we sit |
702 | * but before our buffer starts. |
703 | */ |
704 | |
705 | nimaps = 0; |
706 | while (nimaps == 0) { |
707 | tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); |
708 | tp->t_flags |= XFS_TRANS_RESERVE; |
709 | nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); |
710 | error = xfs_trans_reserve(tp, nres, |
711 | XFS_WRITE_LOG_RES(mp), |
712 | 0, XFS_TRANS_PERM_LOG_RES, |
713 | XFS_WRITE_LOG_COUNT); |
714 | if (error) { |
715 | xfs_trans_cancel(tp, 0); |
716 | return XFS_ERROR(error); |
717 | } |
718 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
719 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
720 | xfs_trans_ihold(tp, ip); |
721 | |
722 | xfs_bmap_init(&free_list, &first_block); |
723 | |
724 | /* |
725 | * it is possible that the extents have changed since |
726 | * we did the read call as we dropped the ilock for a |
727 | * while. We have to be careful about truncates or hole |
728 | * punchs here - we are not allowed to allocate |
729 | * non-delalloc blocks here. |
730 | * |
731 | * The only protection against truncation is the pages |
732 | * for the range we are being asked to convert are |
733 | * locked and hence a truncate will block on them |
734 | * first. |
735 | * |
736 | * As a result, if we go beyond the range we really |
737 | * need and hit an delalloc extent boundary followed by |
738 | * a hole while we have excess blocks in the map, we |
739 | * will fill the hole incorrectly and overrun the |
740 | * transaction reservation. |
741 | * |
742 | * Using a single map prevents this as we are forced to |
743 | * check each map we look for overlap with the desired |
744 | * range and abort as soon as we find it. Also, given |
745 | * that we only return a single map, having one beyond |
746 | * what we can return is probably a bit silly. |
747 | * |
748 | * We also need to check that we don't go beyond EOF; |
749 | * this is a truncate optimisation as a truncate sets |
750 | * the new file size before block on the pages we |
751 | * currently have locked under writeback. Because they |
752 | * are about to be tossed, we don't need to write them |
753 | * back.... |
754 | */ |
755 | nimaps = 1; |
756 | end_fsb = XFS_B_TO_FSB(mp, ip->i_size); |
757 | error = xfs_bmap_last_offset(NULL, ip, &last_block, |
758 | XFS_DATA_FORK); |
759 | if (error) |
760 | goto trans_cancel; |
761 | |
762 | last_block = XFS_FILEOFF_MAX(last_block, end_fsb); |
763 | if ((map_start_fsb + count_fsb) > last_block) { |
764 | count_fsb = last_block - map_start_fsb; |
765 | if (count_fsb == 0) { |
766 | error = EAGAIN; |
767 | goto trans_cancel; |
768 | } |
769 | } |
770 | |
771 | /* Go get the actual blocks */ |
772 | error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, |
773 | XFS_BMAPI_WRITE, &first_block, 1, |
774 | &imap, &nimaps, &free_list, NULL); |
775 | if (error) |
776 | goto trans_cancel; |
777 | |
778 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
779 | if (error) |
780 | goto trans_cancel; |
781 | |
782 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
783 | if (error) |
784 | goto error0; |
785 | |
786 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
787 | } |
788 | |
789 | /* |
790 | * See if we were able to allocate an extent that |
791 | * covers at least part of the callers request |
792 | */ |
793 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) |
794 | return xfs_cmn_err_fsblock_zero(ip, &imap); |
795 | |
796 | if ((offset_fsb >= imap.br_startoff) && |
797 | (offset_fsb < (imap.br_startoff + |
798 | imap.br_blockcount))) { |
799 | *map = imap; |
800 | *retmap = 1; |
801 | XFS_STATS_INC(xs_xstrat_quick); |
802 | return 0; |
803 | } |
804 | |
805 | /* |
806 | * So far we have not mapped the requested part of the |
807 | * file, just surrounding data, try again. |
808 | */ |
809 | count_fsb -= imap.br_blockcount; |
810 | map_start_fsb = imap.br_startoff + imap.br_blockcount; |
811 | } |
812 | |
813 | trans_cancel: |
814 | xfs_bmap_cancel(&free_list); |
815 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
816 | error0: |
817 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
818 | return XFS_ERROR(error); |
819 | } |
820 | |
821 | int |
822 | xfs_iomap_write_unwritten( |
823 | xfs_inode_t *ip, |
824 | xfs_off_t offset, |
825 | size_t count) |
826 | { |
827 | xfs_mount_t *mp = ip->i_mount; |
828 | xfs_fileoff_t offset_fsb; |
829 | xfs_filblks_t count_fsb; |
830 | xfs_filblks_t numblks_fsb; |
831 | xfs_fsblock_t firstfsb; |
832 | int nimaps; |
833 | xfs_trans_t *tp; |
834 | xfs_bmbt_irec_t imap; |
835 | xfs_bmap_free_t free_list; |
836 | uint resblks; |
837 | int committed; |
838 | int error; |
839 | |
840 | xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); |
841 | |
842 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
843 | count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); |
844 | count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); |
845 | |
846 | /* |
847 | * Reserve enough blocks in this transaction for two complete extent |
848 | * btree splits. We may be converting the middle part of an unwritten |
849 | * extent and in this case we will insert two new extents in the btree |
850 | * each of which could cause a full split. |
851 | * |
852 | * This reservation amount will be used in the first call to |
853 | * xfs_bmbt_split() to select an AG with enough space to satisfy the |
854 | * rest of the operation. |
855 | */ |
856 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; |
857 | |
858 | do { |
859 | /* |
860 | * set up a transaction to convert the range of extents |
861 | * from unwritten to real. Do allocations in a loop until |
862 | * we have covered the range passed in. |
863 | */ |
864 | tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); |
865 | tp->t_flags |= XFS_TRANS_RESERVE; |
866 | error = xfs_trans_reserve(tp, resblks, |
867 | XFS_WRITE_LOG_RES(mp), 0, |
868 | XFS_TRANS_PERM_LOG_RES, |
869 | XFS_WRITE_LOG_COUNT); |
870 | if (error) { |
871 | xfs_trans_cancel(tp, 0); |
872 | return XFS_ERROR(error); |
873 | } |
874 | |
875 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
876 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
877 | xfs_trans_ihold(tp, ip); |
878 | |
879 | /* |
880 | * Modify the unwritten extent state of the buffer. |
881 | */ |
882 | xfs_bmap_init(&free_list, &firstfsb); |
883 | nimaps = 1; |
884 | error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, |
885 | XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, |
886 | 1, &imap, &nimaps, &free_list, NULL); |
887 | if (error) |
888 | goto error_on_bmapi_transaction; |
889 | |
890 | error = xfs_bmap_finish(&(tp), &(free_list), &committed); |
891 | if (error) |
892 | goto error_on_bmapi_transaction; |
893 | |
894 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
895 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
896 | if (error) |
897 | return XFS_ERROR(error); |
898 | |
899 | if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) |
900 | return xfs_cmn_err_fsblock_zero(ip, &imap); |
901 | |
902 | if ((numblks_fsb = imap.br_blockcount) == 0) { |
903 | /* |
904 | * The numblks_fsb value should always get |
905 | * smaller, otherwise the loop is stuck. |
906 | */ |
907 | ASSERT(imap.br_blockcount); |
908 | break; |
909 | } |
910 | offset_fsb += numblks_fsb; |
911 | count_fsb -= numblks_fsb; |
912 | } while (count_fsb > 0); |
913 | |
914 | return 0; |
915 | |
916 | error_on_bmapi_transaction: |
917 | xfs_bmap_cancel(&free_list); |
918 | xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); |
919 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
920 | return XFS_ERROR(error); |
921 | } |
922 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9