Root/fs/ocfs2/dcache.c

1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dcache.c
5 *
6 * dentry cache handling code
7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#include <linux/fs.h>
27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/namei.h>
30
31#define MLOG_MASK_PREFIX ML_DCACHE
32#include <cluster/masklog.h>
33
34#include "ocfs2.h"
35
36#include "alloc.h"
37#include "dcache.h"
38#include "dlmglue.h"
39#include "file.h"
40#include "inode.h"
41#include "super.h"
42
43
44static int ocfs2_dentry_revalidate(struct dentry *dentry,
45                   struct nameidata *nd)
46{
47    struct inode *inode = dentry->d_inode;
48    int ret = 0; /* if all else fails, just return false */
49    struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
50
51    mlog_entry("(0x%p, '%.*s')\n", dentry,
52           dentry->d_name.len, dentry->d_name.name);
53
54    /* Never trust a negative dentry - force a new lookup. */
55    if (inode == NULL) {
56        mlog(0, "negative dentry: %.*s\n", dentry->d_name.len,
57             dentry->d_name.name);
58        goto bail;
59    }
60
61    BUG_ON(!osb);
62
63    if (inode == osb->root_inode || is_bad_inode(inode))
64        goto bail;
65
66    spin_lock(&OCFS2_I(inode)->ip_lock);
67    /* did we or someone else delete this inode? */
68    if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
69        spin_unlock(&OCFS2_I(inode)->ip_lock);
70        mlog(0, "inode (%llu) deleted, returning false\n",
71             (unsigned long long)OCFS2_I(inode)->ip_blkno);
72        goto bail;
73    }
74    spin_unlock(&OCFS2_I(inode)->ip_lock);
75
76    /*
77     * We don't need a cluster lock to test this because once an
78     * inode nlink hits zero, it never goes back.
79     */
80    if (inode->i_nlink == 0) {
81        mlog(0, "Inode %llu orphaned, returning false "
82             "dir = %d\n",
83             (unsigned long long)OCFS2_I(inode)->ip_blkno,
84             S_ISDIR(inode->i_mode));
85        goto bail;
86    }
87
88    /*
89     * If the last lookup failed to create dentry lock, let us
90     * redo it.
91     */
92    if (!dentry->d_fsdata) {
93        mlog(0, "Inode %llu doesn't have dentry lock, "
94             "returning false\n",
95             (unsigned long long)OCFS2_I(inode)->ip_blkno);
96        goto bail;
97    }
98
99    ret = 1;
100
101bail:
102    mlog_exit(ret);
103
104    return ret;
105}
106
107static int ocfs2_match_dentry(struct dentry *dentry,
108                  u64 parent_blkno,
109                  int skip_unhashed)
110{
111    struct inode *parent;
112
113    /*
114     * ocfs2_lookup() does a d_splice_alias() _before_ attaching
115     * to the lock data, so we skip those here, otherwise
116     * ocfs2_dentry_attach_lock() will get its original dentry
117     * back.
118     */
119    if (!dentry->d_fsdata)
120        return 0;
121
122    if (!dentry->d_parent)
123        return 0;
124
125    if (skip_unhashed && d_unhashed(dentry))
126        return 0;
127
128    parent = dentry->d_parent->d_inode;
129    /* Negative parent dentry? */
130    if (!parent)
131        return 0;
132
133    /* Name is in a different directory. */
134    if (OCFS2_I(parent)->ip_blkno != parent_blkno)
135        return 0;
136
137    return 1;
138}
139
140/*
141 * Walk the inode alias list, and find a dentry which has a given
142 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
143 * is looking for a dentry_lock reference. The downconvert thread is
144 * looking to unhash aliases, so we allow it to skip any that already
145 * have that property.
146 */
147struct dentry *ocfs2_find_local_alias(struct inode *inode,
148                      u64 parent_blkno,
149                      int skip_unhashed)
150{
151    struct list_head *p;
152    struct dentry *dentry = NULL;
153
154    spin_lock(&dcache_lock);
155
156    list_for_each(p, &inode->i_dentry) {
157        dentry = list_entry(p, struct dentry, d_alias);
158
159        if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
160            mlog(0, "dentry found: %.*s\n",
161                 dentry->d_name.len, dentry->d_name.name);
162
163            dget_locked(dentry);
164            break;
165        }
166
167        dentry = NULL;
168    }
169
170    spin_unlock(&dcache_lock);
171
172    return dentry;
173}
174
175DEFINE_SPINLOCK(dentry_attach_lock);
176
177/*
178 * Attach this dentry to a cluster lock.
179 *
180 * Dentry locks cover all links in a given directory to a particular
181 * inode. We do this so that ocfs2 can build a lock name which all
182 * nodes in the cluster can agree on at all times. Shoving full names
183 * in the cluster lock won't work due to size restrictions. Covering
184 * links inside of a directory is a good compromise because it still
185 * allows us to use the parent directory lock to synchronize
186 * operations.
187 *
188 * Call this function with the parent dir semaphore and the parent dir
189 * cluster lock held.
190 *
191 * The dir semaphore will protect us from having to worry about
192 * concurrent processes on our node trying to attach a lock at the
193 * same time.
194 *
195 * The dir cluster lock (held at either PR or EX mode) protects us
196 * from unlink and rename on other nodes.
197 *
198 * A dput() can happen asynchronously due to pruning, so we cover
199 * attaching and detaching the dentry lock with a
200 * dentry_attach_lock.
201 *
202 * A node which has done lookup on a name retains a protected read
203 * lock until final dput. If the user requests and unlink or rename,
204 * the protected read is upgraded to an exclusive lock. Other nodes
205 * who have seen the dentry will then be informed that they need to
206 * downgrade their lock, which will involve d_delete on the
207 * dentry. This happens in ocfs2_dentry_convert_worker().
208 */
209int ocfs2_dentry_attach_lock(struct dentry *dentry,
210                 struct inode *inode,
211                 u64 parent_blkno)
212{
213    int ret;
214    struct dentry *alias;
215    struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
216
217    mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
218         dentry->d_name.len, dentry->d_name.name,
219         (unsigned long long)parent_blkno, dl);
220
221    /*
222     * Negative dentry. We ignore these for now.
223     *
224     * XXX: Could we can improve ocfs2_dentry_revalidate() by
225     * tracking these?
226     */
227    if (!inode)
228        return 0;
229
230    if (dl) {
231        mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
232                " \"%.*s\": old parent: %llu, new: %llu\n",
233                dentry->d_name.len, dentry->d_name.name,
234                (unsigned long long)parent_blkno,
235                (unsigned long long)dl->dl_parent_blkno);
236        return 0;
237    }
238
239    alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
240    if (alias) {
241        /*
242         * Great, an alias exists, which means we must have a
243         * dentry lock already. We can just grab the lock off
244         * the alias and add it to the list.
245         *
246         * We're depending here on the fact that this dentry
247         * was found and exists in the dcache and so must have
248         * a reference to the dentry_lock because we can't
249         * race creates. Final dput() cannot happen on it
250         * since we have it pinned, so our reference is safe.
251         */
252        dl = alias->d_fsdata;
253        mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
254                (unsigned long long)parent_blkno,
255                (unsigned long long)OCFS2_I(inode)->ip_blkno);
256
257        mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
258                " \"%.*s\": old parent: %llu, new: %llu\n",
259                dentry->d_name.len, dentry->d_name.name,
260                (unsigned long long)parent_blkno,
261                (unsigned long long)dl->dl_parent_blkno);
262
263        mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
264
265        goto out_attach;
266    }
267
268    /*
269     * There are no other aliases
270     */
271    dl = kmalloc(sizeof(*dl), GFP_NOFS);
272    if (!dl) {
273        ret = -ENOMEM;
274        mlog_errno(ret);
275        return ret;
276    }
277
278    dl->dl_count = 0;
279    /*
280     * Does this have to happen below, for all attaches, in case
281     * the struct inode gets blown away by the downconvert thread?
282     */
283    dl->dl_inode = igrab(inode);
284    dl->dl_parent_blkno = parent_blkno;
285    ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
286
287out_attach:
288    spin_lock(&dentry_attach_lock);
289    dentry->d_fsdata = dl;
290    dl->dl_count++;
291    spin_unlock(&dentry_attach_lock);
292
293    /*
294     * This actually gets us our PRMODE level lock. From now on,
295     * we'll have a notification if one of these names is
296     * destroyed on another node.
297     */
298    ret = ocfs2_dentry_lock(dentry, 0);
299    if (!ret)
300        ocfs2_dentry_unlock(dentry, 0);
301    else
302        mlog_errno(ret);
303
304    /*
305     * In case of error, manually free the allocation and do the iput().
306     * We need to do this because error here means no d_instantiate(),
307     * which means iput() will not be called during dput(dentry).
308     */
309    if (ret < 0 && !alias) {
310        ocfs2_lock_res_free(&dl->dl_lockres);
311        BUG_ON(dl->dl_count != 1);
312        spin_lock(&dentry_attach_lock);
313        dentry->d_fsdata = NULL;
314        spin_unlock(&dentry_attach_lock);
315        kfree(dl);
316        iput(inode);
317    }
318
319    dput(alias);
320
321    return ret;
322}
323
324DEFINE_SPINLOCK(dentry_list_lock);
325
326/* We limit the number of dentry locks to drop in one go. We have
327 * this limit so that we don't starve other users of ocfs2_wq. */
328#define DL_INODE_DROP_COUNT 64
329
330/* Drop inode references from dentry locks */
331static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
332{
333    struct ocfs2_dentry_lock *dl;
334
335    spin_lock(&dentry_list_lock);
336    while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
337        dl = osb->dentry_lock_list;
338        osb->dentry_lock_list = dl->dl_next;
339        spin_unlock(&dentry_list_lock);
340        iput(dl->dl_inode);
341        kfree(dl);
342        spin_lock(&dentry_list_lock);
343    }
344    spin_unlock(&dentry_list_lock);
345}
346
347void ocfs2_drop_dl_inodes(struct work_struct *work)
348{
349    struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
350                           dentry_lock_work);
351
352    __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
353    /*
354     * Don't queue dropping if umount is in progress. We flush the
355     * list in ocfs2_dismount_volume
356     */
357    spin_lock(&dentry_list_lock);
358    if (osb->dentry_lock_list &&
359        !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
360        queue_work(ocfs2_wq, &osb->dentry_lock_work);
361    spin_unlock(&dentry_list_lock);
362}
363
364/* Flush the whole work queue */
365void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
366{
367    __ocfs2_drop_dl_inodes(osb, -1);
368}
369
370/*
371 * ocfs2_dentry_iput() and friends.
372 *
373 * At this point, our particular dentry is detached from the inodes
374 * alias list, so there's no way that the locking code can find it.
375 *
376 * The interesting stuff happens when we determine that our lock needs
377 * to go away because this is the last subdir alias in the
378 * system. This function needs to handle a couple things:
379 *
380 * 1) Synchronizing lock shutdown with the downconvert threads. This
381 * is already handled for us via the lockres release drop function
382 * called in ocfs2_release_dentry_lock()
383 *
384 * 2) A race may occur when we're doing our lock shutdown and
385 * another process wants to create a new dentry lock. Right now we
386 * let them race, which means that for a very short while, this
387 * node might have two locks on a lock resource. This should be a
388 * problem though because one of them is in the process of being
389 * thrown out.
390 */
391static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
392                   struct ocfs2_dentry_lock *dl)
393{
394    ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
395    ocfs2_lock_res_free(&dl->dl_lockres);
396
397    /* We leave dropping of inode reference to ocfs2_wq as that can
398     * possibly lead to inode deletion which gets tricky */
399    spin_lock(&dentry_list_lock);
400    if (!osb->dentry_lock_list &&
401        !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
402        queue_work(ocfs2_wq, &osb->dentry_lock_work);
403    dl->dl_next = osb->dentry_lock_list;
404    osb->dentry_lock_list = dl;
405    spin_unlock(&dentry_list_lock);
406}
407
408void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
409               struct ocfs2_dentry_lock *dl)
410{
411    int unlock;
412
413    BUG_ON(dl->dl_count == 0);
414
415    spin_lock(&dentry_attach_lock);
416    dl->dl_count--;
417    unlock = !dl->dl_count;
418    spin_unlock(&dentry_attach_lock);
419
420    if (unlock)
421        ocfs2_drop_dentry_lock(osb, dl);
422}
423
424static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
425{
426    struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
427
428    if (!dl) {
429        /*
430         * No dentry lock is ok if we're disconnected or
431         * unhashed.
432         */
433        if (!(dentry->d_flags & DCACHE_DISCONNECTED) &&
434            !d_unhashed(dentry)) {
435            unsigned long long ino = 0ULL;
436            if (inode)
437                ino = (unsigned long long)OCFS2_I(inode)->ip_blkno;
438            mlog(ML_ERROR, "Dentry is missing cluster lock. "
439                 "inode: %llu, d_flags: 0x%x, d_name: %.*s\n",
440                 ino, dentry->d_flags, dentry->d_name.len,
441                 dentry->d_name.name);
442        }
443
444        goto out;
445    }
446
447    mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
448            dentry->d_name.len, dentry->d_name.name,
449            dl->dl_count);
450
451    ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
452
453out:
454    iput(inode);
455}
456
457/*
458 * d_move(), but keep the locks in sync.
459 *
460 * When we are done, "dentry" will have the parent dir and name of
461 * "target", which will be thrown away.
462 *
463 * We manually update the lock of "dentry" if need be.
464 *
465 * "target" doesn't have it's dentry lock touched - we allow the later
466 * dput() to handle this for us.
467 *
468 * This is called during ocfs2_rename(), while holding parent
469 * directory locks. The dentries have already been deleted on other
470 * nodes via ocfs2_remote_dentry_delete().
471 *
472 * Normally, the VFS handles the d_move() for the file system, after
473 * the ->rename() callback. OCFS2 wants to handle this internally, so
474 * the new lock can be created atomically with respect to the cluster.
475 */
476void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
477               struct inode *old_dir, struct inode *new_dir)
478{
479    int ret;
480    struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
481    struct inode *inode = dentry->d_inode;
482
483    /*
484     * Move within the same directory, so the actual lock info won't
485     * change.
486     *
487     * XXX: Is there any advantage to dropping the lock here?
488     */
489    if (old_dir == new_dir)
490        goto out_move;
491
492    ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
493
494    dentry->d_fsdata = NULL;
495    ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
496    if (ret)
497        mlog_errno(ret);
498
499out_move:
500    d_move(dentry, target);
501}
502
503const struct dentry_operations ocfs2_dentry_ops = {
504    .d_revalidate = ocfs2_dentry_revalidate,
505    .d_iput = ocfs2_dentry_iput,
506};
507

Archive Download this file



interactive