Root/fs/ocfs2/localalloc.c

1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * localalloc.c
5 *
6 * Node local data allocation
7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#include <linux/fs.h>
27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/highmem.h>
30#include <linux/bitops.h>
31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h>
34
35#include "ocfs2.h"
36
37#include "alloc.h"
38#include "blockcheck.h"
39#include "dlmglue.h"
40#include "inode.h"
41#include "journal.h"
42#include "localalloc.h"
43#include "suballoc.h"
44#include "super.h"
45#include "sysfile.h"
46
47#include "buffer_head_io.h"
48
49#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
50
51static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54                         struct ocfs2_dinode *alloc,
55                         u32 numbits);
56
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58
59static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
60                    handle_t *handle,
61                    struct ocfs2_dinode *alloc,
62                    struct inode *main_bm_inode,
63                    struct buffer_head *main_bm_bh);
64
65static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
66                        struct ocfs2_alloc_context **ac,
67                        struct inode **bitmap_inode,
68                        struct buffer_head **bitmap_bh);
69
70static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
71                    handle_t *handle,
72                    struct ocfs2_alloc_context *ac);
73
74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75                      struct inode *local_alloc_inode);
76
77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78{
79    return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
80        osb->local_alloc_state == OCFS2_LA_ENABLED);
81}
82
83void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
84                      unsigned int num_clusters)
85{
86    spin_lock(&osb->osb_lock);
87    if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
88        osb->local_alloc_state == OCFS2_LA_THROTTLED)
89        if (num_clusters >= osb->local_alloc_default_bits) {
90            cancel_delayed_work(&osb->la_enable_wq);
91            osb->local_alloc_state = OCFS2_LA_ENABLED;
92        }
93    spin_unlock(&osb->osb_lock);
94}
95
96void ocfs2_la_enable_worker(struct work_struct *work)
97{
98    struct ocfs2_super *osb =
99        container_of(work, struct ocfs2_super,
100                 la_enable_wq.work);
101    spin_lock(&osb->osb_lock);
102    osb->local_alloc_state = OCFS2_LA_ENABLED;
103    spin_unlock(&osb->osb_lock);
104}
105
106/*
107 * Tell us whether a given allocation should use the local alloc
108 * file. Otherwise, it has to go to the main bitmap.
109 *
110 * This function does semi-dirty reads of local alloc size and state!
111 * This is ok however, as the values are re-checked once under mutex.
112 */
113int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
114{
115    int ret = 0;
116    int la_bits;
117
118    spin_lock(&osb->osb_lock);
119    la_bits = osb->local_alloc_bits;
120
121    if (!ocfs2_la_state_enabled(osb))
122        goto bail;
123
124    /* la_bits should be at least twice the size (in clusters) of
125     * a new block group. We want to be sure block group
126     * allocations go through the local alloc, so allow an
127     * allocation to take up to half the bitmap. */
128    if (bits > (la_bits / 2))
129        goto bail;
130
131    ret = 1;
132bail:
133    mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
134         osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
135    spin_unlock(&osb->osb_lock);
136    return ret;
137}
138
139int ocfs2_load_local_alloc(struct ocfs2_super *osb)
140{
141    int status = 0;
142    struct ocfs2_dinode *alloc = NULL;
143    struct buffer_head *alloc_bh = NULL;
144    u32 num_used;
145    struct inode *inode = NULL;
146    struct ocfs2_local_alloc *la;
147
148    mlog_entry_void();
149
150    if (osb->local_alloc_bits == 0)
151        goto bail;
152
153    if (osb->local_alloc_bits >= osb->bitmap_cpg) {
154        mlog(ML_NOTICE, "Requested local alloc window %d is larger "
155             "than max possible %u. Using defaults.\n",
156             osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157        osb->local_alloc_bits =
158            ocfs2_megabytes_to_clusters(osb->sb,
159                            OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
160    }
161
162    /* read the alloc off disk */
163    inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
164                        osb->slot_num);
165    if (!inode) {
166        status = -EINVAL;
167        mlog_errno(status);
168        goto bail;
169    }
170
171    status = ocfs2_read_inode_block_full(inode, &alloc_bh,
172                         OCFS2_BH_IGNORE_CACHE);
173    if (status < 0) {
174        mlog_errno(status);
175        goto bail;
176    }
177
178    alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
179    la = OCFS2_LOCAL_ALLOC(alloc);
180
181    if (!(le32_to_cpu(alloc->i_flags) &
182        (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
183        mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
184             (unsigned long long)OCFS2_I(inode)->ip_blkno);
185        status = -EINVAL;
186        goto bail;
187    }
188
189    if ((la->la_size == 0) ||
190        (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
191        mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
192             le16_to_cpu(la->la_size));
193        status = -EINVAL;
194        goto bail;
195    }
196
197    /* do a little verification. */
198    num_used = ocfs2_local_alloc_count_bits(alloc);
199
200    /* hopefully the local alloc has always been recovered before
201     * we load it. */
202    if (num_used
203        || alloc->id1.bitmap1.i_used
204        || alloc->id1.bitmap1.i_total
205        || la->la_bm_off)
206        mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
207             "found = %u, set = %u, taken = %u, off = %u\n",
208             num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
209             le32_to_cpu(alloc->id1.bitmap1.i_total),
210             OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
211
212    osb->local_alloc_bh = alloc_bh;
213    osb->local_alloc_state = OCFS2_LA_ENABLED;
214
215bail:
216    if (status < 0)
217        brelse(alloc_bh);
218    if (inode)
219        iput(inode);
220
221    mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
222
223    mlog_exit(status);
224    return status;
225}
226
227/*
228 * return any unused bits to the bitmap and write out a clean
229 * local_alloc.
230 *
231 * local_alloc_bh is optional. If not passed, we will simply use the
232 * one off osb. If you do pass it however, be warned that it *will* be
233 * returned brelse'd and NULL'd out.*/
234void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
235{
236    int status;
237    handle_t *handle;
238    struct inode *local_alloc_inode = NULL;
239    struct buffer_head *bh = NULL;
240    struct buffer_head *main_bm_bh = NULL;
241    struct inode *main_bm_inode = NULL;
242    struct ocfs2_dinode *alloc_copy = NULL;
243    struct ocfs2_dinode *alloc = NULL;
244
245    mlog_entry_void();
246
247    cancel_delayed_work(&osb->la_enable_wq);
248    flush_workqueue(ocfs2_wq);
249
250    if (osb->local_alloc_state == OCFS2_LA_UNUSED)
251        goto out;
252
253    local_alloc_inode =
254        ocfs2_get_system_file_inode(osb,
255                        LOCAL_ALLOC_SYSTEM_INODE,
256                        osb->slot_num);
257    if (!local_alloc_inode) {
258        status = -ENOENT;
259        mlog_errno(status);
260        goto out;
261    }
262
263    osb->local_alloc_state = OCFS2_LA_DISABLED;
264
265    main_bm_inode = ocfs2_get_system_file_inode(osb,
266                            GLOBAL_BITMAP_SYSTEM_INODE,
267                            OCFS2_INVALID_SLOT);
268    if (!main_bm_inode) {
269        status = -EINVAL;
270        mlog_errno(status);
271        goto out;
272    }
273
274    mutex_lock(&main_bm_inode->i_mutex);
275
276    status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
277    if (status < 0) {
278        mlog_errno(status);
279        goto out_mutex;
280    }
281
282    /* WINDOW_MOVE_CREDITS is a bit heavy... */
283    handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
284    if (IS_ERR(handle)) {
285        mlog_errno(PTR_ERR(handle));
286        handle = NULL;
287        goto out_unlock;
288    }
289
290    bh = osb->local_alloc_bh;
291    alloc = (struct ocfs2_dinode *) bh->b_data;
292
293    alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
294    if (!alloc_copy) {
295        status = -ENOMEM;
296        goto out_commit;
297    }
298    memcpy(alloc_copy, alloc, bh->b_size);
299
300    status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
301                     bh, OCFS2_JOURNAL_ACCESS_WRITE);
302    if (status < 0) {
303        mlog_errno(status);
304        goto out_commit;
305    }
306
307    ocfs2_clear_local_alloc(alloc);
308
309    status = ocfs2_journal_dirty(handle, bh);
310    if (status < 0) {
311        mlog_errno(status);
312        goto out_commit;
313    }
314
315    brelse(bh);
316    osb->local_alloc_bh = NULL;
317    osb->local_alloc_state = OCFS2_LA_UNUSED;
318
319    status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
320                      main_bm_inode, main_bm_bh);
321    if (status < 0)
322        mlog_errno(status);
323
324out_commit:
325    ocfs2_commit_trans(osb, handle);
326
327out_unlock:
328    brelse(main_bm_bh);
329
330    ocfs2_inode_unlock(main_bm_inode, 1);
331
332out_mutex:
333    mutex_unlock(&main_bm_inode->i_mutex);
334    iput(main_bm_inode);
335
336out:
337    if (local_alloc_inode)
338        iput(local_alloc_inode);
339
340    if (alloc_copy)
341        kfree(alloc_copy);
342
343    mlog_exit_void();
344}
345
346/*
347 * We want to free the bitmap bits outside of any recovery context as
348 * we'll need a cluster lock to do so, but we must clear the local
349 * alloc before giving up the recovered nodes journal. To solve this,
350 * we kmalloc a copy of the local alloc before it's change for the
351 * caller to process with ocfs2_complete_local_alloc_recovery
352 */
353int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
354                     int slot_num,
355                     struct ocfs2_dinode **alloc_copy)
356{
357    int status = 0;
358    struct buffer_head *alloc_bh = NULL;
359    struct inode *inode = NULL;
360    struct ocfs2_dinode *alloc;
361
362    mlog_entry("(slot_num = %d)\n", slot_num);
363
364    *alloc_copy = NULL;
365
366    inode = ocfs2_get_system_file_inode(osb,
367                        LOCAL_ALLOC_SYSTEM_INODE,
368                        slot_num);
369    if (!inode) {
370        status = -EINVAL;
371        mlog_errno(status);
372        goto bail;
373    }
374
375    mutex_lock(&inode->i_mutex);
376
377    status = ocfs2_read_inode_block_full(inode, &alloc_bh,
378                         OCFS2_BH_IGNORE_CACHE);
379    if (status < 0) {
380        mlog_errno(status);
381        goto bail;
382    }
383
384    *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
385    if (!(*alloc_copy)) {
386        status = -ENOMEM;
387        goto bail;
388    }
389    memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
390
391    alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
392    ocfs2_clear_local_alloc(alloc);
393
394    ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
395    status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
396    if (status < 0)
397        mlog_errno(status);
398
399bail:
400    if ((status < 0) && (*alloc_copy)) {
401        kfree(*alloc_copy);
402        *alloc_copy = NULL;
403    }
404
405    brelse(alloc_bh);
406
407    if (inode) {
408        mutex_unlock(&inode->i_mutex);
409        iput(inode);
410    }
411
412    mlog_exit(status);
413    return status;
414}
415
416/*
417 * Step 2: By now, we've completed the journal recovery, we've stamped
418 * a clean local alloc on disk and dropped the node out of the
419 * recovery map. Dlm locks will no longer stall, so lets clear out the
420 * main bitmap.
421 */
422int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
423                    struct ocfs2_dinode *alloc)
424{
425    int status;
426    handle_t *handle;
427    struct buffer_head *main_bm_bh = NULL;
428    struct inode *main_bm_inode;
429
430    mlog_entry_void();
431
432    main_bm_inode = ocfs2_get_system_file_inode(osb,
433                            GLOBAL_BITMAP_SYSTEM_INODE,
434                            OCFS2_INVALID_SLOT);
435    if (!main_bm_inode) {
436        status = -EINVAL;
437        mlog_errno(status);
438        goto out;
439    }
440
441    mutex_lock(&main_bm_inode->i_mutex);
442
443    status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
444    if (status < 0) {
445        mlog_errno(status);
446        goto out_mutex;
447    }
448
449    handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
450    if (IS_ERR(handle)) {
451        status = PTR_ERR(handle);
452        handle = NULL;
453        mlog_errno(status);
454        goto out_unlock;
455    }
456
457    /* we want the bitmap change to be recorded on disk asap */
458    handle->h_sync = 1;
459
460    status = ocfs2_sync_local_to_main(osb, handle, alloc,
461                      main_bm_inode, main_bm_bh);
462    if (status < 0)
463        mlog_errno(status);
464
465    ocfs2_commit_trans(osb, handle);
466
467out_unlock:
468    ocfs2_inode_unlock(main_bm_inode, 1);
469
470out_mutex:
471    mutex_unlock(&main_bm_inode->i_mutex);
472
473    brelse(main_bm_bh);
474
475    iput(main_bm_inode);
476
477out:
478    if (!status)
479        ocfs2_init_steal_slots(osb);
480    mlog_exit(status);
481    return status;
482}
483
484/* Check to see if the local alloc window is within ac->ac_max_block */
485static int ocfs2_local_alloc_in_range(struct inode *inode,
486                      struct ocfs2_alloc_context *ac,
487                      u32 bits_wanted)
488{
489    struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490    struct ocfs2_dinode *alloc;
491    struct ocfs2_local_alloc *la;
492    int start;
493    u64 block_off;
494
495    if (!ac->ac_max_block)
496        return 1;
497
498    alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499    la = OCFS2_LOCAL_ALLOC(alloc);
500
501    start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502    if (start == -1) {
503        mlog_errno(-ENOSPC);
504        return 0;
505    }
506
507    /*
508     * Converting (bm_off + start + bits_wanted) to blocks gives us
509     * the blkno just past our actual allocation. This is perfect
510     * to compare with ac_max_block.
511     */
512    block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513                         le32_to_cpu(la->la_bm_off) +
514                         start + bits_wanted);
515    mlog(0, "Checking %llu against %llu\n",
516         (unsigned long long)block_off,
517         (unsigned long long)ac->ac_max_block);
518    if (block_off > ac->ac_max_block)
519        return 0;
520
521    return 1;
522}
523
524/*
525 * make sure we've got at least bits_wanted contiguous bits in the
526 * local alloc. You lose them when you drop i_mutex.
527 *
528 * We will add ourselves to the transaction passed in, but may start
529 * our own in order to shift windows.
530 */
531int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
532                   u32 bits_wanted,
533                   struct ocfs2_alloc_context *ac)
534{
535    int status;
536    struct ocfs2_dinode *alloc;
537    struct inode *local_alloc_inode;
538    unsigned int free_bits;
539
540    mlog_entry_void();
541
542    BUG_ON(!ac);
543
544    local_alloc_inode =
545        ocfs2_get_system_file_inode(osb,
546                        LOCAL_ALLOC_SYSTEM_INODE,
547                        osb->slot_num);
548    if (!local_alloc_inode) {
549        status = -ENOENT;
550        mlog_errno(status);
551        goto bail;
552    }
553
554    mutex_lock(&local_alloc_inode->i_mutex);
555
556    /*
557     * We must double check state and allocator bits because
558     * another process may have changed them while holding i_mutex.
559     */
560    spin_lock(&osb->osb_lock);
561    if (!ocfs2_la_state_enabled(osb) ||
562        (bits_wanted > osb->local_alloc_bits)) {
563        spin_unlock(&osb->osb_lock);
564        status = -ENOSPC;
565        goto bail;
566    }
567    spin_unlock(&osb->osb_lock);
568
569    alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
570
571#ifdef CONFIG_OCFS2_DEBUG_FS
572    if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
573        ocfs2_local_alloc_count_bits(alloc)) {
574        ocfs2_error(osb->sb, "local alloc inode %llu says it has "
575                "%u free bits, but a count shows %u",
576                (unsigned long long)le64_to_cpu(alloc->i_blkno),
577                le32_to_cpu(alloc->id1.bitmap1.i_used),
578                ocfs2_local_alloc_count_bits(alloc));
579        status = -EIO;
580        goto bail;
581    }
582#endif
583
584    free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
585        le32_to_cpu(alloc->id1.bitmap1.i_used);
586    if (bits_wanted > free_bits) {
587        /* uhoh, window change time. */
588        status =
589            ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
590        if (status < 0) {
591            if (status != -ENOSPC)
592                mlog_errno(status);
593            goto bail;
594        }
595
596        /*
597         * Under certain conditions, the window slide code
598         * might have reduced the number of bits available or
599         * disabled the the local alloc entirely. Re-check
600         * here and return -ENOSPC if necessary.
601         */
602        status = -ENOSPC;
603        if (!ocfs2_la_state_enabled(osb))
604            goto bail;
605
606        free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
607            le32_to_cpu(alloc->id1.bitmap1.i_used);
608        if (bits_wanted > free_bits)
609            goto bail;
610    }
611
612    if (ac->ac_max_block)
613        mlog(0, "Calling in_range for max block %llu\n",
614             (unsigned long long)ac->ac_max_block);
615
616    if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617                    bits_wanted)) {
618        /*
619         * The window is outside ac->ac_max_block.
620         * This errno tells the caller to keep localalloc enabled
621         * but to get the allocation from the main bitmap.
622         */
623        status = -EFBIG;
624        goto bail;
625    }
626
627    ac->ac_inode = local_alloc_inode;
628    /* We should never use localalloc from another slot */
629    ac->ac_alloc_slot = osb->slot_num;
630    ac->ac_which = OCFS2_AC_USE_LOCAL;
631    get_bh(osb->local_alloc_bh);
632    ac->ac_bh = osb->local_alloc_bh;
633    status = 0;
634bail:
635    if (status < 0 && local_alloc_inode) {
636        mutex_unlock(&local_alloc_inode->i_mutex);
637        iput(local_alloc_inode);
638    }
639
640    mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
641         status);
642
643    mlog_exit(status);
644    return status;
645}
646
647int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
648                 handle_t *handle,
649                 struct ocfs2_alloc_context *ac,
650                 u32 bits_wanted,
651                 u32 *bit_off,
652                 u32 *num_bits)
653{
654    int status, start;
655    struct inode *local_alloc_inode;
656    void *bitmap;
657    struct ocfs2_dinode *alloc;
658    struct ocfs2_local_alloc *la;
659
660    mlog_entry_void();
661    BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
662
663    local_alloc_inode = ac->ac_inode;
664    alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665    la = OCFS2_LOCAL_ALLOC(alloc);
666
667    start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
668    if (start == -1) {
669        /* TODO: Shouldn't we just BUG here? */
670        status = -ENOSPC;
671        mlog_errno(status);
672        goto bail;
673    }
674
675    bitmap = la->la_bitmap;
676    *bit_off = le32_to_cpu(la->la_bm_off) + start;
677    /* local alloc is always contiguous by nature -- we never
678     * delete bits from it! */
679    *num_bits = bits_wanted;
680
681    status = ocfs2_journal_access_di(handle,
682                     INODE_CACHE(local_alloc_inode),
683                     osb->local_alloc_bh,
684                     OCFS2_JOURNAL_ACCESS_WRITE);
685    if (status < 0) {
686        mlog_errno(status);
687        goto bail;
688    }
689
690    while(bits_wanted--)
691        ocfs2_set_bit(start++, bitmap);
692
693    le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
694
695    status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
696    if (status < 0) {
697        mlog_errno(status);
698        goto bail;
699    }
700
701    status = 0;
702bail:
703    mlog_exit(status);
704    return status;
705}
706
707static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
708{
709    int i;
710    u8 *buffer;
711    u32 count = 0;
712    struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
713
714    mlog_entry_void();
715
716    buffer = la->la_bitmap;
717    for (i = 0; i < le16_to_cpu(la->la_size); i++)
718        count += hweight8(buffer[i]);
719
720    mlog_exit(count);
721    return count;
722}
723
724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725                         struct ocfs2_dinode *alloc,
726                         u32 numbits)
727{
728    int numfound, bitoff, left, startoff, lastzero;
729    void *bitmap = NULL;
730
731    mlog_entry("(numbits wanted = %u)\n", numbits);
732
733    if (!alloc->id1.bitmap1.i_total) {
734        mlog(0, "No bits in my window!\n");
735        bitoff = -1;
736        goto bail;
737    }
738
739    bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
740
741    numfound = bitoff = startoff = 0;
742    lastzero = -1;
743    left = le32_to_cpu(alloc->id1.bitmap1.i_total);
744    while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
745        if (bitoff == left) {
746            /* mlog(0, "bitoff (%d) == left", bitoff); */
747            break;
748        }
749        /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
750           "numfound = %d\n", bitoff, startoff, numfound);*/
751
752        /* Ok, we found a zero bit... is it contig. or do we
753         * start over?*/
754        if (bitoff == startoff) {
755            /* we found a zero */
756            numfound++;
757            startoff++;
758        } else {
759            /* got a zero after some ones */
760            numfound = 1;
761            startoff = bitoff+1;
762        }
763        /* we got everything we needed */
764        if (numfound == numbits) {
765            /* mlog(0, "Found it all!\n"); */
766            break;
767        }
768    }
769
770    mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
771         numfound);
772
773    if (numfound == numbits)
774        bitoff = startoff - numfound;
775    else
776        bitoff = -1;
777
778bail:
779    mlog_exit(bitoff);
780    return bitoff;
781}
782
783static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
784{
785    struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
786    int i;
787    mlog_entry_void();
788
789    alloc->id1.bitmap1.i_total = 0;
790    alloc->id1.bitmap1.i_used = 0;
791    la->la_bm_off = 0;
792    for(i = 0; i < le16_to_cpu(la->la_size); i++)
793        la->la_bitmap[i] = 0;
794
795    mlog_exit_void();
796}
797
798#if 0
799/* turn this on and uncomment below to aid debugging window shifts. */
800static void ocfs2_verify_zero_bits(unsigned long *bitmap,
801                   unsigned int start,
802                   unsigned int count)
803{
804    unsigned int tmp = count;
805    while(tmp--) {
806        if (ocfs2_test_bit(start + tmp, bitmap)) {
807            printk("ocfs2_verify_zero_bits: start = %u, count = "
808                   "%u\n", start, count);
809            printk("ocfs2_verify_zero_bits: bit %u is set!",
810                   start + tmp);
811            BUG();
812        }
813    }
814}
815#endif
816
817/*
818 * sync the local alloc to main bitmap.
819 *
820 * assumes you've already locked the main bitmap -- the bitmap inode
821 * passed is used for caching.
822 */
823static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
824                    handle_t *handle,
825                    struct ocfs2_dinode *alloc,
826                    struct inode *main_bm_inode,
827                    struct buffer_head *main_bm_bh)
828{
829    int status = 0;
830    int bit_off, left, count, start;
831    u64 la_start_blk;
832    u64 blkno;
833    void *bitmap;
834    struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
835
836    mlog_entry("total = %u, used = %u\n",
837           le32_to_cpu(alloc->id1.bitmap1.i_total),
838           le32_to_cpu(alloc->id1.bitmap1.i_used));
839
840    if (!alloc->id1.bitmap1.i_total) {
841        mlog(0, "nothing to sync!\n");
842        goto bail;
843    }
844
845    if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
846        le32_to_cpu(alloc->id1.bitmap1.i_total)) {
847        mlog(0, "all bits were taken!\n");
848        goto bail;
849    }
850
851    la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
852                        le32_to_cpu(la->la_bm_off));
853    bitmap = la->la_bitmap;
854    start = count = bit_off = 0;
855    left = le32_to_cpu(alloc->id1.bitmap1.i_total);
856
857    while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
858           != -1) {
859        if ((bit_off < left) && (bit_off == start)) {
860            count++;
861            start++;
862            continue;
863        }
864        if (count) {
865            blkno = la_start_blk +
866                ocfs2_clusters_to_blocks(osb->sb,
867                             start - count);
868
869            mlog(0, "freeing %u bits starting at local alloc bit "
870                 "%u (la_start_blk = %llu, blkno = %llu)\n",
871                 count, start - count,
872                 (unsigned long long)la_start_blk,
873                 (unsigned long long)blkno);
874
875            status = ocfs2_release_clusters(handle,
876                            main_bm_inode,
877                            main_bm_bh, blkno,
878                            count);
879            if (status < 0) {
880                mlog_errno(status);
881                goto bail;
882            }
883        }
884        if (bit_off >= left)
885            break;
886        count = 1;
887        start = bit_off + 1;
888    }
889
890bail:
891    mlog_exit(status);
892    return status;
893}
894
895enum ocfs2_la_event {
896    OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
897    OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
898                     * enough bits theoretically
899                     * free, but a contiguous
900                     * allocation could not be
901                     * found. */
902    OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
903                     * enough bits free to satisfy
904                     * our request. */
905};
906#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
907/*
908 * Given an event, calculate the size of our next local alloc window.
909 *
910 * This should always be called under i_mutex of the local alloc inode
911 * so that local alloc disabling doesn't race with processes trying to
912 * use the allocator.
913 *
914 * Returns the state which the local alloc was left in. This value can
915 * be ignored by some paths.
916 */
917static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
918                  enum ocfs2_la_event event)
919{
920    unsigned int bits;
921    int state;
922
923    spin_lock(&osb->osb_lock);
924    if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
925        WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
926        goto out_unlock;
927    }
928
929    /*
930     * ENOSPC and fragmentation are treated similarly for now.
931     */
932    if (event == OCFS2_LA_EVENT_ENOSPC ||
933        event == OCFS2_LA_EVENT_FRAGMENTED) {
934        /*
935         * We ran out of contiguous space in the primary
936         * bitmap. Drastically reduce the number of bits used
937         * by local alloc until we have to disable it.
938         */
939        bits = osb->local_alloc_bits >> 1;
940        if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
941            /*
942             * By setting state to THROTTLED, we'll keep
943             * the number of local alloc bits used down
944             * until an event occurs which would give us
945             * reason to assume the bitmap situation might
946             * have changed.
947             */
948            osb->local_alloc_state = OCFS2_LA_THROTTLED;
949            osb->local_alloc_bits = bits;
950        } else {
951            osb->local_alloc_state = OCFS2_LA_DISABLED;
952        }
953        queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
954                   OCFS2_LA_ENABLE_INTERVAL);
955        goto out_unlock;
956    }
957
958    /*
959     * Don't increase the size of the local alloc window until we
960     * know we might be able to fulfill the request. Otherwise, we
961     * risk bouncing around the global bitmap during periods of
962     * low space.
963     */
964    if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
965        osb->local_alloc_bits = osb->local_alloc_default_bits;
966
967out_unlock:
968    state = osb->local_alloc_state;
969    spin_unlock(&osb->osb_lock);
970
971    return state;
972}
973
974static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
975                        struct ocfs2_alloc_context **ac,
976                        struct inode **bitmap_inode,
977                        struct buffer_head **bitmap_bh)
978{
979    int status;
980
981    *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
982    if (!(*ac)) {
983        status = -ENOMEM;
984        mlog_errno(status);
985        goto bail;
986    }
987
988retry_enospc:
989    (*ac)->ac_bits_wanted = osb->local_alloc_default_bits;
990    status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
991    if (status == -ENOSPC) {
992        if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
993            OCFS2_LA_DISABLED)
994            goto bail;
995
996        ocfs2_free_ac_resource(*ac);
997        memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
998        goto retry_enospc;
999    }
1000    if (status < 0) {
1001        mlog_errno(status);
1002        goto bail;
1003    }
1004
1005    *bitmap_inode = (*ac)->ac_inode;
1006    igrab(*bitmap_inode);
1007    *bitmap_bh = (*ac)->ac_bh;
1008    get_bh(*bitmap_bh);
1009    status = 0;
1010bail:
1011    if ((status < 0) && *ac) {
1012        ocfs2_free_alloc_context(*ac);
1013        *ac = NULL;
1014    }
1015
1016    mlog_exit(status);
1017    return status;
1018}
1019
1020/*
1021 * pass it the bitmap lock in lock_bh if you have it.
1022 */
1023static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1024                    handle_t *handle,
1025                    struct ocfs2_alloc_context *ac)
1026{
1027    int status = 0;
1028    u32 cluster_off, cluster_count;
1029    struct ocfs2_dinode *alloc = NULL;
1030    struct ocfs2_local_alloc *la;
1031
1032    mlog_entry_void();
1033
1034    alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1035    la = OCFS2_LOCAL_ALLOC(alloc);
1036
1037    if (alloc->id1.bitmap1.i_total)
1038        mlog(0, "asking me to alloc a new window over a non-empty "
1039             "one\n");
1040
1041    mlog(0, "Allocating %u clusters for a new window.\n",
1042         osb->local_alloc_bits);
1043
1044    /* Instruct the allocation code to try the most recently used
1045     * cluster group. We'll re-record the group used this pass
1046     * below. */
1047    ac->ac_last_group = osb->la_last_gd;
1048
1049    /* we used the generic suballoc reserve function, but we set
1050     * everything up nicely, so there's no reason why we can't use
1051     * the more specific cluster api to claim bits. */
1052    status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
1053                      &cluster_off, &cluster_count);
1054    if (status == -ENOSPC) {
1055retry_enospc:
1056        /*
1057         * Note: We could also try syncing the journal here to
1058         * allow use of any free bits which the current
1059         * transaction can't give us access to. --Mark
1060         */
1061        if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
1062            OCFS2_LA_DISABLED)
1063            goto bail;
1064
1065        ac->ac_bits_wanted = osb->local_alloc_default_bits;
1066        status = ocfs2_claim_clusters(osb, handle, ac,
1067                          osb->local_alloc_bits,
1068                          &cluster_off,
1069                          &cluster_count);
1070        if (status == -ENOSPC)
1071            goto retry_enospc;
1072        /*
1073         * We only shrunk the *minimum* number of in our
1074         * request - it's entirely possible that the allocator
1075         * might give us more than we asked for.
1076         */
1077        if (status == 0) {
1078            spin_lock(&osb->osb_lock);
1079            osb->local_alloc_bits = cluster_count;
1080            spin_unlock(&osb->osb_lock);
1081        }
1082    }
1083    if (status < 0) {
1084        if (status != -ENOSPC)
1085            mlog_errno(status);
1086        goto bail;
1087    }
1088
1089    osb->la_last_gd = ac->ac_last_group;
1090
1091    la->la_bm_off = cpu_to_le32(cluster_off);
1092    alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
1093    /* just in case... In the future when we find space ourselves,
1094     * we don't have to get all contiguous -- but we'll have to
1095     * set all previously used bits in bitmap and update
1096     * la_bits_set before setting the bits in the main bitmap. */
1097    alloc->id1.bitmap1.i_used = 0;
1098    memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1099           le16_to_cpu(la->la_size));
1100
1101    mlog(0, "New window allocated:\n");
1102    mlog(0, "window la_bm_off = %u\n",
1103         OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
1104    mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
1105
1106bail:
1107    mlog_exit(status);
1108    return status;
1109}
1110
1111/* Note that we do *NOT* lock the local alloc inode here as
1112 * it's been locked already for us. */
1113static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1114                      struct inode *local_alloc_inode)
1115{
1116    int status = 0;
1117    struct buffer_head *main_bm_bh = NULL;
1118    struct inode *main_bm_inode = NULL;
1119    handle_t *handle = NULL;
1120    struct ocfs2_dinode *alloc;
1121    struct ocfs2_dinode *alloc_copy = NULL;
1122    struct ocfs2_alloc_context *ac = NULL;
1123
1124    mlog_entry_void();
1125
1126    ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1127
1128    /* This will lock the main bitmap for us. */
1129    status = ocfs2_local_alloc_reserve_for_window(osb,
1130                              &ac,
1131                              &main_bm_inode,
1132                              &main_bm_bh);
1133    if (status < 0) {
1134        if (status != -ENOSPC)
1135            mlog_errno(status);
1136        goto bail;
1137    }
1138
1139    handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
1140    if (IS_ERR(handle)) {
1141        status = PTR_ERR(handle);
1142        handle = NULL;
1143        mlog_errno(status);
1144        goto bail;
1145    }
1146
1147    alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1148
1149    /* We want to clear the local alloc before doing anything
1150     * else, so that if we error later during this operation,
1151     * local alloc shutdown won't try to double free main bitmap
1152     * bits. Make a copy so the sync function knows which bits to
1153     * free. */
1154    alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
1155    if (!alloc_copy) {
1156        status = -ENOMEM;
1157        mlog_errno(status);
1158        goto bail;
1159    }
1160    memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1161
1162    status = ocfs2_journal_access_di(handle,
1163                     INODE_CACHE(local_alloc_inode),
1164                     osb->local_alloc_bh,
1165                     OCFS2_JOURNAL_ACCESS_WRITE);
1166    if (status < 0) {
1167        mlog_errno(status);
1168        goto bail;
1169    }
1170
1171    ocfs2_clear_local_alloc(alloc);
1172
1173    status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1174    if (status < 0) {
1175        mlog_errno(status);
1176        goto bail;
1177    }
1178
1179    status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1180                      main_bm_inode, main_bm_bh);
1181    if (status < 0) {
1182        mlog_errno(status);
1183        goto bail;
1184    }
1185
1186    status = ocfs2_local_alloc_new_window(osb, handle, ac);
1187    if (status < 0) {
1188        if (status != -ENOSPC)
1189            mlog_errno(status);
1190        goto bail;
1191    }
1192
1193    atomic_inc(&osb->alloc_stats.moves);
1194
1195    status = 0;
1196bail:
1197    if (handle)
1198        ocfs2_commit_trans(osb, handle);
1199
1200    brelse(main_bm_bh);
1201
1202    if (main_bm_inode)
1203        iput(main_bm_inode);
1204
1205    if (alloc_copy)
1206        kfree(alloc_copy);
1207
1208    if (ac)
1209        ocfs2_free_alloc_context(ac);
1210
1211    mlog_exit(status);
1212    return status;
1213}
1214
1215

Archive Download this file



interactive