Root/fs/dcache.c

1/*
2 * fs/dcache.c
3 *
4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds
7 */
8
9/*
10 * Notes on the allocation strategy:
11 *
12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected.
15 */
16
17#include <linux/syscalls.h>
18#include <linux/string.h>
19#include <linux/mm.h>
20#include <linux/fs.h>
21#include <linux/fsnotify.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/hash.h>
25#include <linux/cache.h>
26#include <linux/module.h>
27#include <linux/mount.h>
28#include <linux/file.h>
29#include <asm/uaccess.h>
30#include <linux/security.h>
31#include <linux/seqlock.h>
32#include <linux/swap.h>
33#include <linux/bootmem.h>
34#include <linux/fs_struct.h>
35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
39#include "internal.h"
40
41/*
42 * Usage:
43 * dcache->d_inode->i_lock protects:
44 * - i_dentry, d_alias, d_inode of aliases
45 * dcache_hash_bucket lock protects:
46 * - the dcache hash table
47 * s_anon bl list spinlock protects:
48 * - the s_anon list (see __d_drop)
49 * dcache_lru_lock protects:
50 * - the dcache lru lists and counters
51 * d_lock protects:
52 * - d_flags
53 * - d_name
54 * - d_lru
55 * - d_count
56 * - d_unhashed()
57 * - d_parent and d_subdirs
58 * - childrens' d_child and d_parent
59 * - d_alias, d_inode
60 *
61 * Ordering:
62 * dentry->d_inode->i_lock
63 * dentry->d_lock
64 * dcache_lru_lock
65 * dcache_hash_bucket lock
66 * s_anon lock
67 *
68 * If there is an ancestor relationship:
69 * dentry->d_parent->...->d_parent->d_lock
70 * ...
71 * dentry->d_parent->d_lock
72 * dentry->d_lock
73 *
74 * If no ancestor relationship:
75 * if (dentry1 < dentry2)
76 * dentry1->d_lock
77 * dentry2->d_lock
78 */
79int sysctl_vfs_cache_pressure __read_mostly = 100;
80EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
81
82static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
83__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
84
85EXPORT_SYMBOL(rename_lock);
86
87static struct kmem_cache *dentry_cache __read_mostly;
88
89/*
90 * This is the single most critical data structure when it comes
91 * to the dcache: the hashtable for lookups. Somebody should try
92 * to make this good - I've just made it work.
93 *
94 * This hash-function tries to avoid losing too many bits of hash
95 * information, yet avoid using a prime hash-size or similar.
96 */
97#define D_HASHBITS d_hash_shift
98#define D_HASHMASK d_hash_mask
99
100static unsigned int d_hash_mask __read_mostly;
101static unsigned int d_hash_shift __read_mostly;
102
103static struct hlist_bl_head *dentry_hashtable __read_mostly;
104
105static inline struct hlist_bl_head *d_hash(struct dentry *parent,
106                    unsigned long hash)
107{
108    hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
109    hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
110    return dentry_hashtable + (hash & D_HASHMASK);
111}
112
113/* Statistics gathering. */
114struct dentry_stat_t dentry_stat = {
115    .age_limit = 45,
116};
117
118static DEFINE_PER_CPU(unsigned int, nr_dentry);
119
120#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
121static int get_nr_dentry(void)
122{
123    int i;
124    int sum = 0;
125    for_each_possible_cpu(i)
126        sum += per_cpu(nr_dentry, i);
127    return sum < 0 ? 0 : sum;
128}
129
130int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
131           size_t *lenp, loff_t *ppos)
132{
133    dentry_stat.nr_dentry = get_nr_dentry();
134    return proc_dointvec(table, write, buffer, lenp, ppos);
135}
136#endif
137
138static void __d_free(struct rcu_head *head)
139{
140    struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
141
142    WARN_ON(!list_empty(&dentry->d_alias));
143    if (dname_external(dentry))
144        kfree(dentry->d_name.name);
145    kmem_cache_free(dentry_cache, dentry);
146}
147
148/*
149 * no locks, please.
150 */
151static void d_free(struct dentry *dentry)
152{
153    BUG_ON(dentry->d_count);
154    this_cpu_dec(nr_dentry);
155    if (dentry->d_op && dentry->d_op->d_release)
156        dentry->d_op->d_release(dentry);
157
158    /* if dentry was never visible to RCU, immediate free is OK */
159    if (!(dentry->d_flags & DCACHE_RCUACCESS))
160        __d_free(&dentry->d_u.d_rcu);
161    else
162        call_rcu(&dentry->d_u.d_rcu, __d_free);
163}
164
165/**
166 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
167 * @dentry: the target dentry
168 * After this call, in-progress rcu-walk path lookup will fail. This
169 * should be called after unhashing, and after changing d_inode (if
170 * the dentry has not already been unhashed).
171 */
172static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
173{
174    assert_spin_locked(&dentry->d_lock);
175    /* Go through a barrier */
176    write_seqcount_barrier(&dentry->d_seq);
177}
178
179/*
180 * Release the dentry's inode, using the filesystem
181 * d_iput() operation if defined. Dentry has no refcount
182 * and is unhashed.
183 */
184static void dentry_iput(struct dentry * dentry)
185    __releases(dentry->d_lock)
186    __releases(dentry->d_inode->i_lock)
187{
188    struct inode *inode = dentry->d_inode;
189    if (inode) {
190        dentry->d_inode = NULL;
191        list_del_init(&dentry->d_alias);
192        spin_unlock(&dentry->d_lock);
193        spin_unlock(&inode->i_lock);
194        if (!inode->i_nlink)
195            fsnotify_inoderemove(inode);
196        if (dentry->d_op && dentry->d_op->d_iput)
197            dentry->d_op->d_iput(dentry, inode);
198        else
199            iput(inode);
200    } else {
201        spin_unlock(&dentry->d_lock);
202    }
203}
204
205/*
206 * Release the dentry's inode, using the filesystem
207 * d_iput() operation if defined. dentry remains in-use.
208 */
209static void dentry_unlink_inode(struct dentry * dentry)
210    __releases(dentry->d_lock)
211    __releases(dentry->d_inode->i_lock)
212{
213    struct inode *inode = dentry->d_inode;
214    dentry->d_inode = NULL;
215    list_del_init(&dentry->d_alias);
216    dentry_rcuwalk_barrier(dentry);
217    spin_unlock(&dentry->d_lock);
218    spin_unlock(&inode->i_lock);
219    if (!inode->i_nlink)
220        fsnotify_inoderemove(inode);
221    if (dentry->d_op && dentry->d_op->d_iput)
222        dentry->d_op->d_iput(dentry, inode);
223    else
224        iput(inode);
225}
226
227/*
228 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
229 */
230static void dentry_lru_add(struct dentry *dentry)
231{
232    if (list_empty(&dentry->d_lru)) {
233        spin_lock(&dcache_lru_lock);
234        list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
235        dentry->d_sb->s_nr_dentry_unused++;
236        dentry_stat.nr_unused++;
237        spin_unlock(&dcache_lru_lock);
238    }
239}
240
241static void __dentry_lru_del(struct dentry *dentry)
242{
243    list_del_init(&dentry->d_lru);
244    dentry->d_sb->s_nr_dentry_unused--;
245    dentry_stat.nr_unused--;
246}
247
248static void dentry_lru_del(struct dentry *dentry)
249{
250    if (!list_empty(&dentry->d_lru)) {
251        spin_lock(&dcache_lru_lock);
252        __dentry_lru_del(dentry);
253        spin_unlock(&dcache_lru_lock);
254    }
255}
256
257static void dentry_lru_move_tail(struct dentry *dentry)
258{
259    spin_lock(&dcache_lru_lock);
260    if (list_empty(&dentry->d_lru)) {
261        list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
262        dentry->d_sb->s_nr_dentry_unused++;
263        dentry_stat.nr_unused++;
264    } else {
265        list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
266    }
267    spin_unlock(&dcache_lru_lock);
268}
269
270/**
271 * d_kill - kill dentry and return parent
272 * @dentry: dentry to kill
273 * @parent: parent dentry
274 *
275 * The dentry must already be unhashed and removed from the LRU.
276 *
277 * If this is the root of the dentry tree, return NULL.
278 *
279 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
280 * d_kill.
281 */
282static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
283    __releases(dentry->d_lock)
284    __releases(parent->d_lock)
285    __releases(dentry->d_inode->i_lock)
286{
287    list_del(&dentry->d_u.d_child);
288    /*
289     * Inform try_to_ascend() that we are no longer attached to the
290     * dentry tree
291     */
292    dentry->d_flags |= DCACHE_DISCONNECTED;
293    if (parent)
294        spin_unlock(&parent->d_lock);
295    dentry_iput(dentry);
296    /*
297     * dentry_iput drops the locks, at which point nobody (except
298     * transient RCU lookups) can reach this dentry.
299     */
300    d_free(dentry);
301    return parent;
302}
303
304/**
305 * d_drop - drop a dentry
306 * @dentry: dentry to drop
307 *
308 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
309 * be found through a VFS lookup any more. Note that this is different from
310 * deleting the dentry - d_delete will try to mark the dentry negative if
311 * possible, giving a successful _negative_ lookup, while d_drop will
312 * just make the cache lookup fail.
313 *
314 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
315 * reason (NFS timeouts or autofs deletes).
316 *
317 * __d_drop requires dentry->d_lock.
318 */
319void __d_drop(struct dentry *dentry)
320{
321    if (!d_unhashed(dentry)) {
322        struct hlist_bl_head *b;
323        if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
324            b = &dentry->d_sb->s_anon;
325        else
326            b = d_hash(dentry->d_parent, dentry->d_name.hash);
327
328        hlist_bl_lock(b);
329        __hlist_bl_del(&dentry->d_hash);
330        dentry->d_hash.pprev = NULL;
331        hlist_bl_unlock(b);
332
333        dentry_rcuwalk_barrier(dentry);
334    }
335}
336EXPORT_SYMBOL(__d_drop);
337
338void d_drop(struct dentry *dentry)
339{
340    spin_lock(&dentry->d_lock);
341    __d_drop(dentry);
342    spin_unlock(&dentry->d_lock);
343}
344EXPORT_SYMBOL(d_drop);
345
346/*
347 * Finish off a dentry we've decided to kill.
348 * dentry->d_lock must be held, returns with it unlocked.
349 * If ref is non-zero, then decrement the refcount too.
350 * Returns dentry requiring refcount drop, or NULL if we're done.
351 */
352static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
353    __releases(dentry->d_lock)
354{
355    struct inode *inode;
356    struct dentry *parent;
357
358    inode = dentry->d_inode;
359    if (inode && !spin_trylock(&inode->i_lock)) {
360relock:
361        spin_unlock(&dentry->d_lock);
362        cpu_relax();
363        return dentry; /* try again with same dentry */
364    }
365    if (IS_ROOT(dentry))
366        parent = NULL;
367    else
368        parent = dentry->d_parent;
369    if (parent && !spin_trylock(&parent->d_lock)) {
370        if (inode)
371            spin_unlock(&inode->i_lock);
372        goto relock;
373    }
374
375    if (ref)
376        dentry->d_count--;
377    /* if dentry was on the d_lru list delete it from there */
378    dentry_lru_del(dentry);
379    /* if it was on the hash then remove it */
380    __d_drop(dentry);
381    return d_kill(dentry, parent);
382}
383
384/*
385 * This is dput
386 *
387 * This is complicated by the fact that we do not want to put
388 * dentries that are no longer on any hash chain on the unused
389 * list: we'd much rather just get rid of them immediately.
390 *
391 * However, that implies that we have to traverse the dentry
392 * tree upwards to the parents which might _also_ now be
393 * scheduled for deletion (it may have been only waiting for
394 * its last child to go away).
395 *
396 * This tail recursion is done by hand as we don't want to depend
397 * on the compiler to always get this right (gcc generally doesn't).
398 * Real recursion would eat up our stack space.
399 */
400
401/*
402 * dput - release a dentry
403 * @dentry: dentry to release
404 *
405 * Release a dentry. This will drop the usage count and if appropriate
406 * call the dentry unlink method as well as removing it from the queues and
407 * releasing its resources. If the parent dentries were scheduled for release
408 * they too may now get deleted.
409 */
410void dput(struct dentry *dentry)
411{
412    if (!dentry)
413        return;
414
415repeat:
416    if (dentry->d_count == 1)
417        might_sleep();
418    spin_lock(&dentry->d_lock);
419    BUG_ON(!dentry->d_count);
420    if (dentry->d_count > 1) {
421        dentry->d_count--;
422        spin_unlock(&dentry->d_lock);
423        return;
424    }
425
426    if (dentry->d_flags & DCACHE_OP_DELETE) {
427        if (dentry->d_op->d_delete(dentry))
428            goto kill_it;
429    }
430
431    /* Unreachable? Get rid of it */
432     if (d_unhashed(dentry))
433        goto kill_it;
434
435    /* Otherwise leave it cached and ensure it's on the LRU */
436    dentry->d_flags |= DCACHE_REFERENCED;
437    dentry_lru_add(dentry);
438
439    dentry->d_count--;
440    spin_unlock(&dentry->d_lock);
441    return;
442
443kill_it:
444    dentry = dentry_kill(dentry, 1);
445    if (dentry)
446        goto repeat;
447}
448EXPORT_SYMBOL(dput);
449
450/**
451 * d_invalidate - invalidate a dentry
452 * @dentry: dentry to invalidate
453 *
454 * Try to invalidate the dentry if it turns out to be
455 * possible. If there are other dentries that can be
456 * reached through this one we can't delete it and we
457 * return -EBUSY. On success we return 0.
458 *
459 * no dcache lock.
460 */
461 
462int d_invalidate(struct dentry * dentry)
463{
464    /*
465     * If it's already been dropped, return OK.
466     */
467    spin_lock(&dentry->d_lock);
468    if (d_unhashed(dentry)) {
469        spin_unlock(&dentry->d_lock);
470        return 0;
471    }
472    /*
473     * Check whether to do a partial shrink_dcache
474     * to get rid of unused child entries.
475     */
476    if (!list_empty(&dentry->d_subdirs)) {
477        spin_unlock(&dentry->d_lock);
478        shrink_dcache_parent(dentry);
479        spin_lock(&dentry->d_lock);
480    }
481
482    /*
483     * Somebody else still using it?
484     *
485     * If it's a directory, we can't drop it
486     * for fear of somebody re-populating it
487     * with children (even though dropping it
488     * would make it unreachable from the root,
489     * we might still populate it if it was a
490     * working directory or similar).
491     */
492    if (dentry->d_count > 1) {
493        if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
494            spin_unlock(&dentry->d_lock);
495            return -EBUSY;
496        }
497    }
498
499    __d_drop(dentry);
500    spin_unlock(&dentry->d_lock);
501    return 0;
502}
503EXPORT_SYMBOL(d_invalidate);
504
505/* This must be called with d_lock held */
506static inline void __dget_dlock(struct dentry *dentry)
507{
508    dentry->d_count++;
509}
510
511static inline void __dget(struct dentry *dentry)
512{
513    spin_lock(&dentry->d_lock);
514    __dget_dlock(dentry);
515    spin_unlock(&dentry->d_lock);
516}
517
518struct dentry *dget_parent(struct dentry *dentry)
519{
520    struct dentry *ret;
521
522repeat:
523    /*
524     * Don't need rcu_dereference because we re-check it was correct under
525     * the lock.
526     */
527    rcu_read_lock();
528    ret = dentry->d_parent;
529    if (!ret) {
530        rcu_read_unlock();
531        goto out;
532    }
533    spin_lock(&ret->d_lock);
534    if (unlikely(ret != dentry->d_parent)) {
535        spin_unlock(&ret->d_lock);
536        rcu_read_unlock();
537        goto repeat;
538    }
539    rcu_read_unlock();
540    BUG_ON(!ret->d_count);
541    ret->d_count++;
542    spin_unlock(&ret->d_lock);
543out:
544    return ret;
545}
546EXPORT_SYMBOL(dget_parent);
547
548/**
549 * d_find_alias - grab a hashed alias of inode
550 * @inode: inode in question
551 * @want_discon: flag, used by d_splice_alias, to request
552 * that only a DISCONNECTED alias be returned.
553 *
554 * If inode has a hashed alias, or is a directory and has any alias,
555 * acquire the reference to alias and return it. Otherwise return NULL.
556 * Notice that if inode is a directory there can be only one alias and
557 * it can be unhashed only if it has no children, or if it is the root
558 * of a filesystem.
559 *
560 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
561 * any other hashed alias over that one unless @want_discon is set,
562 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
563 */
564static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
565{
566    struct dentry *alias, *discon_alias;
567
568again:
569    discon_alias = NULL;
570    list_for_each_entry(alias, &inode->i_dentry, d_alias) {
571        spin_lock(&alias->d_lock);
572         if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
573            if (IS_ROOT(alias) &&
574                (alias->d_flags & DCACHE_DISCONNECTED)) {
575                discon_alias = alias;
576            } else if (!want_discon) {
577                __dget_dlock(alias);
578                spin_unlock(&alias->d_lock);
579                return alias;
580            }
581        }
582        spin_unlock(&alias->d_lock);
583    }
584    if (discon_alias) {
585        alias = discon_alias;
586        spin_lock(&alias->d_lock);
587        if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
588            if (IS_ROOT(alias) &&
589                (alias->d_flags & DCACHE_DISCONNECTED)) {
590                __dget_dlock(alias);
591                spin_unlock(&alias->d_lock);
592                return alias;
593            }
594        }
595        spin_unlock(&alias->d_lock);
596        goto again;
597    }
598    return NULL;
599}
600
601struct dentry *d_find_alias(struct inode *inode)
602{
603    struct dentry *de = NULL;
604
605    if (!list_empty(&inode->i_dentry)) {
606        spin_lock(&inode->i_lock);
607        de = __d_find_alias(inode, 0);
608        spin_unlock(&inode->i_lock);
609    }
610    return de;
611}
612EXPORT_SYMBOL(d_find_alias);
613
614/*
615 * Try to kill dentries associated with this inode.
616 * WARNING: you must own a reference to inode.
617 */
618void d_prune_aliases(struct inode *inode)
619{
620    struct dentry *dentry;
621restart:
622    spin_lock(&inode->i_lock);
623    list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
624        spin_lock(&dentry->d_lock);
625        if (!dentry->d_count) {
626            __dget_dlock(dentry);
627            __d_drop(dentry);
628            spin_unlock(&dentry->d_lock);
629            spin_unlock(&inode->i_lock);
630            dput(dentry);
631            goto restart;
632        }
633        spin_unlock(&dentry->d_lock);
634    }
635    spin_unlock(&inode->i_lock);
636}
637EXPORT_SYMBOL(d_prune_aliases);
638
639/*
640 * Try to throw away a dentry - free the inode, dput the parent.
641 * Requires dentry->d_lock is held, and dentry->d_count == 0.
642 * Releases dentry->d_lock.
643 *
644 * This may fail if locks cannot be acquired no problem, just try again.
645 */
646static void try_prune_one_dentry(struct dentry *dentry)
647    __releases(dentry->d_lock)
648{
649    struct dentry *parent;
650
651    parent = dentry_kill(dentry, 0);
652    /*
653     * If dentry_kill returns NULL, we have nothing more to do.
654     * if it returns the same dentry, trylocks failed. In either
655     * case, just loop again.
656     *
657     * Otherwise, we need to prune ancestors too. This is necessary
658     * to prevent quadratic behavior of shrink_dcache_parent(), but
659     * is also expected to be beneficial in reducing dentry cache
660     * fragmentation.
661     */
662    if (!parent)
663        return;
664    if (parent == dentry)
665        return;
666
667    /* Prune ancestors. */
668    dentry = parent;
669    while (dentry) {
670        spin_lock(&dentry->d_lock);
671        if (dentry->d_count > 1) {
672            dentry->d_count--;
673            spin_unlock(&dentry->d_lock);
674            return;
675        }
676        dentry = dentry_kill(dentry, 1);
677    }
678}
679
680static void shrink_dentry_list(struct list_head *list)
681{
682    struct dentry *dentry;
683
684    rcu_read_lock();
685    for (;;) {
686        dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
687        if (&dentry->d_lru == list)
688            break; /* empty */
689        spin_lock(&dentry->d_lock);
690        if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
691            spin_unlock(&dentry->d_lock);
692            continue;
693        }
694
695        /*
696         * We found an inuse dentry which was not removed from
697         * the LRU because of laziness during lookup. Do not free
698         * it - just keep it off the LRU list.
699         */
700        if (dentry->d_count) {
701            dentry_lru_del(dentry);
702            spin_unlock(&dentry->d_lock);
703            continue;
704        }
705
706        rcu_read_unlock();
707
708        try_prune_one_dentry(dentry);
709
710        rcu_read_lock();
711    }
712    rcu_read_unlock();
713}
714
715/**
716 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
717 * @sb: superblock to shrink dentry LRU.
718 * @count: number of entries to prune
719 * @flags: flags to control the dentry processing
720 *
721 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
722 */
723static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
724{
725    /* called from prune_dcache() and shrink_dcache_parent() */
726    struct dentry *dentry;
727    LIST_HEAD(referenced);
728    LIST_HEAD(tmp);
729    int cnt = *count;
730
731relock:
732    spin_lock(&dcache_lru_lock);
733    while (!list_empty(&sb->s_dentry_lru)) {
734        dentry = list_entry(sb->s_dentry_lru.prev,
735                struct dentry, d_lru);
736        BUG_ON(dentry->d_sb != sb);
737
738        if (!spin_trylock(&dentry->d_lock)) {
739            spin_unlock(&dcache_lru_lock);
740            cpu_relax();
741            goto relock;
742        }
743
744        /*
745         * If we are honouring the DCACHE_REFERENCED flag and the
746         * dentry has this flag set, don't free it. Clear the flag
747         * and put it back on the LRU.
748         */
749        if (flags & DCACHE_REFERENCED &&
750                dentry->d_flags & DCACHE_REFERENCED) {
751            dentry->d_flags &= ~DCACHE_REFERENCED;
752            list_move(&dentry->d_lru, &referenced);
753            spin_unlock(&dentry->d_lock);
754        } else {
755            list_move_tail(&dentry->d_lru, &tmp);
756            spin_unlock(&dentry->d_lock);
757            if (!--cnt)
758                break;
759        }
760        cond_resched_lock(&dcache_lru_lock);
761    }
762    if (!list_empty(&referenced))
763        list_splice(&referenced, &sb->s_dentry_lru);
764    spin_unlock(&dcache_lru_lock);
765
766    shrink_dentry_list(&tmp);
767
768    *count = cnt;
769}
770
771/**
772 * prune_dcache - shrink the dcache
773 * @count: number of entries to try to free
774 *
775 * Shrink the dcache. This is done when we need more memory, or simply when we
776 * need to unmount something (at which point we need to unuse all dentries).
777 *
778 * This function may fail to free any resources if all the dentries are in use.
779 */
780static void prune_dcache(int count)
781{
782    struct super_block *sb, *p = NULL;
783    int w_count;
784    int unused = dentry_stat.nr_unused;
785    int prune_ratio;
786    int pruned;
787
788    if (unused == 0 || count == 0)
789        return;
790    if (count >= unused)
791        prune_ratio = 1;
792    else
793        prune_ratio = unused / count;
794    spin_lock(&sb_lock);
795    list_for_each_entry(sb, &super_blocks, s_list) {
796        if (list_empty(&sb->s_instances))
797            continue;
798        if (sb->s_nr_dentry_unused == 0)
799            continue;
800        sb->s_count++;
801        /* Now, we reclaim unused dentrins with fairness.
802         * We reclaim them same percentage from each superblock.
803         * We calculate number of dentries to scan on this sb
804         * as follows, but the implementation is arranged to avoid
805         * overflows:
806         * number of dentries to scan on this sb =
807         * count * (number of dentries on this sb /
808         * number of dentries in the machine)
809         */
810        spin_unlock(&sb_lock);
811        if (prune_ratio != 1)
812            w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
813        else
814            w_count = sb->s_nr_dentry_unused;
815        pruned = w_count;
816        /*
817         * We need to be sure this filesystem isn't being unmounted,
818         * otherwise we could race with generic_shutdown_super(), and
819         * end up holding a reference to an inode while the filesystem
820         * is unmounted. So we try to get s_umount, and make sure
821         * s_root isn't NULL.
822         */
823        if (down_read_trylock(&sb->s_umount)) {
824            if ((sb->s_root != NULL) &&
825                (!list_empty(&sb->s_dentry_lru))) {
826                __shrink_dcache_sb(sb, &w_count,
827                        DCACHE_REFERENCED);
828                pruned -= w_count;
829            }
830            up_read(&sb->s_umount);
831        }
832        spin_lock(&sb_lock);
833        if (p)
834            __put_super(p);
835        count -= pruned;
836        p = sb;
837        /* more work left to do? */
838        if (count <= 0)
839            break;
840    }
841    if (p)
842        __put_super(p);
843    spin_unlock(&sb_lock);
844}
845
846/**
847 * shrink_dcache_sb - shrink dcache for a superblock
848 * @sb: superblock
849 *
850 * Shrink the dcache for the specified super block. This is used to free
851 * the dcache before unmounting a file system.
852 */
853void shrink_dcache_sb(struct super_block *sb)
854{
855    LIST_HEAD(tmp);
856
857    spin_lock(&dcache_lru_lock);
858    while (!list_empty(&sb->s_dentry_lru)) {
859        list_splice_init(&sb->s_dentry_lru, &tmp);
860        spin_unlock(&dcache_lru_lock);
861        shrink_dentry_list(&tmp);
862        spin_lock(&dcache_lru_lock);
863    }
864    spin_unlock(&dcache_lru_lock);
865}
866EXPORT_SYMBOL(shrink_dcache_sb);
867
868/*
869 * destroy a single subtree of dentries for unmount
870 * - see the comments on shrink_dcache_for_umount() for a description of the
871 * locking
872 */
873static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
874{
875    struct dentry *parent;
876    unsigned detached = 0;
877
878    BUG_ON(!IS_ROOT(dentry));
879
880    /* detach this root from the system */
881    spin_lock(&dentry->d_lock);
882    dentry_lru_del(dentry);
883    __d_drop(dentry);
884    spin_unlock(&dentry->d_lock);
885
886    for (;;) {
887        /* descend to the first leaf in the current subtree */
888        while (!list_empty(&dentry->d_subdirs)) {
889            struct dentry *loop;
890
891            /* this is a branch with children - detach all of them
892             * from the system in one go */
893            spin_lock(&dentry->d_lock);
894            list_for_each_entry(loop, &dentry->d_subdirs,
895                        d_u.d_child) {
896                spin_lock_nested(&loop->d_lock,
897                        DENTRY_D_LOCK_NESTED);
898                dentry_lru_del(loop);
899                __d_drop(loop);
900                spin_unlock(&loop->d_lock);
901            }
902            spin_unlock(&dentry->d_lock);
903
904            /* move to the first child */
905            dentry = list_entry(dentry->d_subdirs.next,
906                        struct dentry, d_u.d_child);
907        }
908
909        /* consume the dentries from this leaf up through its parents
910         * until we find one with children or run out altogether */
911        do {
912            struct inode *inode;
913
914            if (dentry->d_count != 0) {
915                printk(KERN_ERR
916                       "BUG: Dentry %p{i=%lx,n=%s}"
917                       " still in use (%d)"
918                       " [unmount of %s %s]\n",
919                       dentry,
920                       dentry->d_inode ?
921                       dentry->d_inode->i_ino : 0UL,
922                       dentry->d_name.name,
923                       dentry->d_count,
924                       dentry->d_sb->s_type->name,
925                       dentry->d_sb->s_id);
926                BUG();
927            }
928
929            if (IS_ROOT(dentry)) {
930                parent = NULL;
931                list_del(&dentry->d_u.d_child);
932            } else {
933                parent = dentry->d_parent;
934                spin_lock(&parent->d_lock);
935                parent->d_count--;
936                list_del(&dentry->d_u.d_child);
937                spin_unlock(&parent->d_lock);
938            }
939
940            detached++;
941
942            inode = dentry->d_inode;
943            if (inode) {
944                dentry->d_inode = NULL;
945                list_del_init(&dentry->d_alias);
946                if (dentry->d_op && dentry->d_op->d_iput)
947                    dentry->d_op->d_iput(dentry, inode);
948                else
949                    iput(inode);
950            }
951
952            d_free(dentry);
953
954            /* finished when we fall off the top of the tree,
955             * otherwise we ascend to the parent and move to the
956             * next sibling if there is one */
957            if (!parent)
958                return;
959            dentry = parent;
960        } while (list_empty(&dentry->d_subdirs));
961
962        dentry = list_entry(dentry->d_subdirs.next,
963                    struct dentry, d_u.d_child);
964    }
965}
966
967/*
968 * destroy the dentries attached to a superblock on unmounting
969 * - we don't need to use dentry->d_lock because:
970 * - the superblock is detached from all mountings and open files, so the
971 * dentry trees will not be rearranged by the VFS
972 * - s_umount is write-locked, so the memory pressure shrinker will ignore
973 * any dentries belonging to this superblock that it comes across
974 * - the filesystem itself is no longer permitted to rearrange the dentries
975 * in this superblock
976 */
977void shrink_dcache_for_umount(struct super_block *sb)
978{
979    struct dentry *dentry;
980
981    if (down_read_trylock(&sb->s_umount))
982        BUG();
983
984    dentry = sb->s_root;
985    sb->s_root = NULL;
986    spin_lock(&dentry->d_lock);
987    dentry->d_count--;
988    spin_unlock(&dentry->d_lock);
989    shrink_dcache_for_umount_subtree(dentry);
990
991    while (!hlist_bl_empty(&sb->s_anon)) {
992        dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
993        shrink_dcache_for_umount_subtree(dentry);
994    }
995}
996
997/*
998 * This tries to ascend one level of parenthood, but
999 * we can race with renaming, so we need to re-check
1000 * the parenthood after dropping the lock and check
1001 * that the sequence number still matches.
1002 */
1003static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1004{
1005    struct dentry *new = old->d_parent;
1006
1007    rcu_read_lock();
1008    spin_unlock(&old->d_lock);
1009    spin_lock(&new->d_lock);
1010
1011    /*
1012     * might go back up the wrong parent if we have had a rename
1013     * or deletion
1014     */
1015    if (new != old->d_parent ||
1016         (old->d_flags & DCACHE_DISCONNECTED) ||
1017         (!locked && read_seqretry(&rename_lock, seq))) {
1018        spin_unlock(&new->d_lock);
1019        new = NULL;
1020    }
1021    rcu_read_unlock();
1022    return new;
1023}
1024
1025
1026/*
1027 * Search for at least 1 mount point in the dentry's subdirs.
1028 * We descend to the next level whenever the d_subdirs
1029 * list is non-empty and continue searching.
1030 */
1031 
1032/**
1033 * have_submounts - check for mounts over a dentry
1034 * @parent: dentry to check.
1035 *
1036 * Return true if the parent or its subdirectories contain
1037 * a mount point
1038 */
1039int have_submounts(struct dentry *parent)
1040{
1041    struct dentry *this_parent;
1042    struct list_head *next;
1043    unsigned seq;
1044    int locked = 0;
1045
1046    seq = read_seqbegin(&rename_lock);
1047again:
1048    this_parent = parent;
1049
1050    if (d_mountpoint(parent))
1051        goto positive;
1052    spin_lock(&this_parent->d_lock);
1053repeat:
1054    next = this_parent->d_subdirs.next;
1055resume:
1056    while (next != &this_parent->d_subdirs) {
1057        struct list_head *tmp = next;
1058        struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1059        next = tmp->next;
1060
1061        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1062        /* Have we found a mount point ? */
1063        if (d_mountpoint(dentry)) {
1064            spin_unlock(&dentry->d_lock);
1065            spin_unlock(&this_parent->d_lock);
1066            goto positive;
1067        }
1068        if (!list_empty(&dentry->d_subdirs)) {
1069            spin_unlock(&this_parent->d_lock);
1070            spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1071            this_parent = dentry;
1072            spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1073            goto repeat;
1074        }
1075        spin_unlock(&dentry->d_lock);
1076    }
1077    /*
1078     * All done at this level ... ascend and resume the search.
1079     */
1080    if (this_parent != parent) {
1081        struct dentry *child = this_parent;
1082        this_parent = try_to_ascend(this_parent, locked, seq);
1083        if (!this_parent)
1084            goto rename_retry;
1085        next = child->d_u.d_child.next;
1086        goto resume;
1087    }
1088    spin_unlock(&this_parent->d_lock);
1089    if (!locked && read_seqretry(&rename_lock, seq))
1090        goto rename_retry;
1091    if (locked)
1092        write_sequnlock(&rename_lock);
1093    return 0; /* No mount points found in tree */
1094positive:
1095    if (!locked && read_seqretry(&rename_lock, seq))
1096        goto rename_retry;
1097    if (locked)
1098        write_sequnlock(&rename_lock);
1099    return 1;
1100
1101rename_retry:
1102    locked = 1;
1103    write_seqlock(&rename_lock);
1104    goto again;
1105}
1106EXPORT_SYMBOL(have_submounts);
1107
1108/*
1109 * Search the dentry child list for the specified parent,
1110 * and move any unused dentries to the end of the unused
1111 * list for prune_dcache(). We descend to the next level
1112 * whenever the d_subdirs list is non-empty and continue
1113 * searching.
1114 *
1115 * It returns zero iff there are no unused children,
1116 * otherwise it returns the number of children moved to
1117 * the end of the unused list. This may not be the total
1118 * number of unused children, because select_parent can
1119 * drop the lock and return early due to latency
1120 * constraints.
1121 */
1122static int select_parent(struct dentry * parent)
1123{
1124    struct dentry *this_parent;
1125    struct list_head *next;
1126    unsigned seq;
1127    int found = 0;
1128    int locked = 0;
1129
1130    seq = read_seqbegin(&rename_lock);
1131again:
1132    this_parent = parent;
1133    spin_lock(&this_parent->d_lock);
1134repeat:
1135    next = this_parent->d_subdirs.next;
1136resume:
1137    while (next != &this_parent->d_subdirs) {
1138        struct list_head *tmp = next;
1139        struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1140        next = tmp->next;
1141
1142        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1143
1144        /*
1145         * move only zero ref count dentries to the end
1146         * of the unused list for prune_dcache
1147         */
1148        if (!dentry->d_count) {
1149            dentry_lru_move_tail(dentry);
1150            found++;
1151        } else {
1152            dentry_lru_del(dentry);
1153        }
1154
1155        /*
1156         * We can return to the caller if we have found some (this
1157         * ensures forward progress). We'll be coming back to find
1158         * the rest.
1159         */
1160        if (found && need_resched()) {
1161            spin_unlock(&dentry->d_lock);
1162            goto out;
1163        }
1164
1165        /*
1166         * Descend a level if the d_subdirs list is non-empty.
1167         */
1168        if (!list_empty(&dentry->d_subdirs)) {
1169            spin_unlock(&this_parent->d_lock);
1170            spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1171            this_parent = dentry;
1172            spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1173            goto repeat;
1174        }
1175
1176        spin_unlock(&dentry->d_lock);
1177    }
1178    /*
1179     * All done at this level ... ascend and resume the search.
1180     */
1181    if (this_parent != parent) {
1182        struct dentry *child = this_parent;
1183        this_parent = try_to_ascend(this_parent, locked, seq);
1184        if (!this_parent)
1185            goto rename_retry;
1186        next = child->d_u.d_child.next;
1187        goto resume;
1188    }
1189out:
1190    spin_unlock(&this_parent->d_lock);
1191    if (!locked && read_seqretry(&rename_lock, seq))
1192        goto rename_retry;
1193    if (locked)
1194        write_sequnlock(&rename_lock);
1195    return found;
1196
1197rename_retry:
1198    if (found)
1199        return found;
1200    locked = 1;
1201    write_seqlock(&rename_lock);
1202    goto again;
1203}
1204
1205/**
1206 * shrink_dcache_parent - prune dcache
1207 * @parent: parent of entries to prune
1208 *
1209 * Prune the dcache to remove unused children of the parent dentry.
1210 */
1211 
1212void shrink_dcache_parent(struct dentry * parent)
1213{
1214    struct super_block *sb = parent->d_sb;
1215    int found;
1216
1217    while ((found = select_parent(parent)) != 0)
1218        __shrink_dcache_sb(sb, &found, 0);
1219}
1220EXPORT_SYMBOL(shrink_dcache_parent);
1221
1222/*
1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1224 *
1225 * We need to avoid reentering the filesystem if the caller is performing a
1226 * GFP_NOFS allocation attempt. One example deadlock is:
1227 *
1228 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1229 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1230 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1231 *
1232 * In this case we return -1 to tell the caller that we baled.
1233 */
1234static int shrink_dcache_memory(struct shrinker *shrink,
1235                struct shrink_control *sc)
1236{
1237    int nr = sc->nr_to_scan;
1238    gfp_t gfp_mask = sc->gfp_mask;
1239
1240    if (nr) {
1241        if (!(gfp_mask & __GFP_FS))
1242            return -1;
1243        prune_dcache(nr);
1244    }
1245
1246    return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1247}
1248
1249static struct shrinker dcache_shrinker = {
1250    .shrink = shrink_dcache_memory,
1251    .seeks = DEFAULT_SEEKS,
1252};
1253
1254/**
1255 * d_alloc - allocate a dcache entry
1256 * @parent: parent of entry to allocate
1257 * @name: qstr of the name
1258 *
1259 * Allocates a dentry. It returns %NULL if there is insufficient memory
1260 * available. On a success the dentry is returned. The name passed in is
1261 * copied and the copy passed in may be reused after this call.
1262 */
1263 
1264struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1265{
1266    struct dentry *dentry;
1267    char *dname;
1268
1269    dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1270    if (!dentry)
1271        return NULL;
1272
1273    if (name->len > DNAME_INLINE_LEN-1) {
1274        dname = kmalloc(name->len + 1, GFP_KERNEL);
1275        if (!dname) {
1276            kmem_cache_free(dentry_cache, dentry);
1277            return NULL;
1278        }
1279    } else {
1280        dname = dentry->d_iname;
1281    }
1282    dentry->d_name.name = dname;
1283
1284    dentry->d_name.len = name->len;
1285    dentry->d_name.hash = name->hash;
1286    memcpy(dname, name->name, name->len);
1287    dname[name->len] = 0;
1288
1289    dentry->d_count = 1;
1290    dentry->d_flags = 0;
1291    spin_lock_init(&dentry->d_lock);
1292    seqcount_init(&dentry->d_seq);
1293    dentry->d_inode = NULL;
1294    dentry->d_parent = NULL;
1295    dentry->d_sb = NULL;
1296    dentry->d_op = NULL;
1297    dentry->d_fsdata = NULL;
1298    INIT_HLIST_BL_NODE(&dentry->d_hash);
1299    INIT_LIST_HEAD(&dentry->d_lru);
1300    INIT_LIST_HEAD(&dentry->d_subdirs);
1301    INIT_LIST_HEAD(&dentry->d_alias);
1302    INIT_LIST_HEAD(&dentry->d_u.d_child);
1303
1304    if (parent) {
1305        spin_lock(&parent->d_lock);
1306        /*
1307         * don't need child lock because it is not subject
1308         * to concurrency here
1309         */
1310        __dget_dlock(parent);
1311        dentry->d_parent = parent;
1312        dentry->d_sb = parent->d_sb;
1313        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1314        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1315        spin_unlock(&parent->d_lock);
1316    }
1317
1318    this_cpu_inc(nr_dentry);
1319
1320    return dentry;
1321}
1322EXPORT_SYMBOL(d_alloc);
1323
1324struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1325{
1326    struct dentry *dentry = d_alloc(NULL, name);
1327    if (dentry) {
1328        dentry->d_sb = sb;
1329        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1330        dentry->d_parent = dentry;
1331        dentry->d_flags |= DCACHE_DISCONNECTED;
1332    }
1333    return dentry;
1334}
1335EXPORT_SYMBOL(d_alloc_pseudo);
1336
1337struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1338{
1339    struct qstr q;
1340
1341    q.name = name;
1342    q.len = strlen(name);
1343    q.hash = full_name_hash(q.name, q.len);
1344    return d_alloc(parent, &q);
1345}
1346EXPORT_SYMBOL(d_alloc_name);
1347
1348void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1349{
1350    WARN_ON_ONCE(dentry->d_op);
1351    WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
1352                DCACHE_OP_COMPARE |
1353                DCACHE_OP_REVALIDATE |
1354                DCACHE_OP_DELETE ));
1355    dentry->d_op = op;
1356    if (!op)
1357        return;
1358    if (op->d_hash)
1359        dentry->d_flags |= DCACHE_OP_HASH;
1360    if (op->d_compare)
1361        dentry->d_flags |= DCACHE_OP_COMPARE;
1362    if (op->d_revalidate)
1363        dentry->d_flags |= DCACHE_OP_REVALIDATE;
1364    if (op->d_delete)
1365        dentry->d_flags |= DCACHE_OP_DELETE;
1366
1367}
1368EXPORT_SYMBOL(d_set_d_op);
1369
1370static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1371{
1372    spin_lock(&dentry->d_lock);
1373    if (inode) {
1374        if (unlikely(IS_AUTOMOUNT(inode)))
1375            dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1376        list_add(&dentry->d_alias, &inode->i_dentry);
1377    }
1378    dentry->d_inode = inode;
1379    dentry_rcuwalk_barrier(dentry);
1380    spin_unlock(&dentry->d_lock);
1381    fsnotify_d_instantiate(dentry, inode);
1382}
1383
1384/**
1385 * d_instantiate - fill in inode information for a dentry
1386 * @entry: dentry to complete
1387 * @inode: inode to attach to this dentry
1388 *
1389 * Fill in inode information in the entry.
1390 *
1391 * This turns negative dentries into productive full members
1392 * of society.
1393 *
1394 * NOTE! This assumes that the inode count has been incremented
1395 * (or otherwise set) by the caller to indicate that it is now
1396 * in use by the dcache.
1397 */
1398 
1399void d_instantiate(struct dentry *entry, struct inode * inode)
1400{
1401    BUG_ON(!list_empty(&entry->d_alias));
1402    if (inode)
1403        spin_lock(&inode->i_lock);
1404    __d_instantiate(entry, inode);
1405    if (inode)
1406        spin_unlock(&inode->i_lock);
1407    security_d_instantiate(entry, inode);
1408}
1409EXPORT_SYMBOL(d_instantiate);
1410
1411/**
1412 * d_instantiate_unique - instantiate a non-aliased dentry
1413 * @entry: dentry to instantiate
1414 * @inode: inode to attach to this dentry
1415 *
1416 * Fill in inode information in the entry. On success, it returns NULL.
1417 * If an unhashed alias of "entry" already exists, then we return the
1418 * aliased dentry instead and drop one reference to inode.
1419 *
1420 * Note that in order to avoid conflicts with rename() etc, the caller
1421 * had better be holding the parent directory semaphore.
1422 *
1423 * This also assumes that the inode count has been incremented
1424 * (or otherwise set) by the caller to indicate that it is now
1425 * in use by the dcache.
1426 */
1427static struct dentry *__d_instantiate_unique(struct dentry *entry,
1428                         struct inode *inode)
1429{
1430    struct dentry *alias;
1431    int len = entry->d_name.len;
1432    const char *name = entry->d_name.name;
1433    unsigned int hash = entry->d_name.hash;
1434
1435    if (!inode) {
1436        __d_instantiate(entry, NULL);
1437        return NULL;
1438    }
1439
1440    list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1441        struct qstr *qstr = &alias->d_name;
1442
1443        /*
1444         * Don't need alias->d_lock here, because aliases with
1445         * d_parent == entry->d_parent are not subject to name or
1446         * parent changes, because the parent inode i_mutex is held.
1447         */
1448        if (qstr->hash != hash)
1449            continue;
1450        if (alias->d_parent != entry->d_parent)
1451            continue;
1452        if (dentry_cmp(qstr->name, qstr->len, name, len))
1453            continue;
1454        __dget(alias);
1455        return alias;
1456    }
1457
1458    __d_instantiate(entry, inode);
1459    return NULL;
1460}
1461
1462struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1463{
1464    struct dentry *result;
1465
1466    BUG_ON(!list_empty(&entry->d_alias));
1467
1468    if (inode)
1469        spin_lock(&inode->i_lock);
1470    result = __d_instantiate_unique(entry, inode);
1471    if (inode)
1472        spin_unlock(&inode->i_lock);
1473
1474    if (!result) {
1475        security_d_instantiate(entry, inode);
1476        return NULL;
1477    }
1478
1479    BUG_ON(!d_unhashed(result));
1480    iput(inode);
1481    return result;
1482}
1483
1484EXPORT_SYMBOL(d_instantiate_unique);
1485
1486/**
1487 * d_alloc_root - allocate root dentry
1488 * @root_inode: inode to allocate the root for
1489 *
1490 * Allocate a root ("/") dentry for the inode given. The inode is
1491 * instantiated and returned. %NULL is returned if there is insufficient
1492 * memory or the inode passed is %NULL.
1493 */
1494 
1495struct dentry * d_alloc_root(struct inode * root_inode)
1496{
1497    struct dentry *res = NULL;
1498
1499    if (root_inode) {
1500        static const struct qstr name = { .name = "/", .len = 1 };
1501
1502        res = d_alloc(NULL, &name);
1503        if (res) {
1504            res->d_sb = root_inode->i_sb;
1505            d_set_d_op(res, res->d_sb->s_d_op);
1506            res->d_parent = res;
1507            d_instantiate(res, root_inode);
1508        }
1509    }
1510    return res;
1511}
1512EXPORT_SYMBOL(d_alloc_root);
1513
1514static struct dentry * __d_find_any_alias(struct inode *inode)
1515{
1516    struct dentry *alias;
1517
1518    if (list_empty(&inode->i_dentry))
1519        return NULL;
1520    alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1521    __dget(alias);
1522    return alias;
1523}
1524
1525static struct dentry * d_find_any_alias(struct inode *inode)
1526{
1527    struct dentry *de;
1528
1529    spin_lock(&inode->i_lock);
1530    de = __d_find_any_alias(inode);
1531    spin_unlock(&inode->i_lock);
1532    return de;
1533}
1534
1535
1536/**
1537 * d_obtain_alias - find or allocate a dentry for a given inode
1538 * @inode: inode to allocate the dentry for
1539 *
1540 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1541 * similar open by handle operations. The returned dentry may be anonymous,
1542 * or may have a full name (if the inode was already in the cache).
1543 *
1544 * When called on a directory inode, we must ensure that the inode only ever
1545 * has one dentry. If a dentry is found, that is returned instead of
1546 * allocating a new one.
1547 *
1548 * On successful return, the reference to the inode has been transferred
1549 * to the dentry. In case of an error the reference on the inode is released.
1550 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1551 * be passed in and will be the error will be propagate to the return value,
1552 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1553 */
1554struct dentry *d_obtain_alias(struct inode *inode)
1555{
1556    static const struct qstr anonstring = { .name = "" };
1557    struct dentry *tmp;
1558    struct dentry *res;
1559
1560    if (!inode)
1561        return ERR_PTR(-ESTALE);
1562    if (IS_ERR(inode))
1563        return ERR_CAST(inode);
1564
1565    res = d_find_any_alias(inode);
1566    if (res)
1567        goto out_iput;
1568
1569    tmp = d_alloc(NULL, &anonstring);
1570    if (!tmp) {
1571        res = ERR_PTR(-ENOMEM);
1572        goto out_iput;
1573    }
1574    tmp->d_parent = tmp; /* make sure dput doesn't croak */
1575
1576
1577    spin_lock(&inode->i_lock);
1578    res = __d_find_any_alias(inode);
1579    if (res) {
1580        spin_unlock(&inode->i_lock);
1581        dput(tmp);
1582        goto out_iput;
1583    }
1584
1585    /* attach a disconnected dentry */
1586    spin_lock(&tmp->d_lock);
1587    tmp->d_sb = inode->i_sb;
1588    d_set_d_op(tmp, tmp->d_sb->s_d_op);
1589    tmp->d_inode = inode;
1590    tmp->d_flags |= DCACHE_DISCONNECTED;
1591    list_add(&tmp->d_alias, &inode->i_dentry);
1592    hlist_bl_lock(&tmp->d_sb->s_anon);
1593    hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1594    hlist_bl_unlock(&tmp->d_sb->s_anon);
1595    spin_unlock(&tmp->d_lock);
1596    spin_unlock(&inode->i_lock);
1597    security_d_instantiate(tmp, inode);
1598
1599    return tmp;
1600
1601 out_iput:
1602    if (res && !IS_ERR(res))
1603        security_d_instantiate(res, inode);
1604    iput(inode);
1605    return res;
1606}
1607EXPORT_SYMBOL(d_obtain_alias);
1608
1609/**
1610 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1611 * @inode: the inode which may have a disconnected dentry
1612 * @dentry: a negative dentry which we want to point to the inode.
1613 *
1614 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1615 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1616 * and return it, else simply d_add the inode to the dentry and return NULL.
1617 *
1618 * This is needed in the lookup routine of any filesystem that is exportable
1619 * (via knfsd) so that we can build dcache paths to directories effectively.
1620 *
1621 * If a dentry was found and moved, then it is returned. Otherwise NULL
1622 * is returned. This matches the expected return value of ->lookup.
1623 *
1624 */
1625struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1626{
1627    struct dentry *new = NULL;
1628
1629    if (inode && S_ISDIR(inode->i_mode)) {
1630        spin_lock(&inode->i_lock);
1631        new = __d_find_alias(inode, 1);
1632        if (new) {
1633            BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1634            spin_unlock(&inode->i_lock);
1635            security_d_instantiate(new, inode);
1636            d_move(new, dentry);
1637            iput(inode);
1638        } else {
1639            /* already taking inode->i_lock, so d_add() by hand */
1640            __d_instantiate(dentry, inode);
1641            spin_unlock(&inode->i_lock);
1642            security_d_instantiate(dentry, inode);
1643            d_rehash(dentry);
1644        }
1645    } else
1646        d_add(dentry, inode);
1647    return new;
1648}
1649EXPORT_SYMBOL(d_splice_alias);
1650
1651/**
1652 * d_add_ci - lookup or allocate new dentry with case-exact name
1653 * @inode: the inode case-insensitive lookup has found
1654 * @dentry: the negative dentry that was passed to the parent's lookup func
1655 * @name: the case-exact name to be associated with the returned dentry
1656 *
1657 * This is to avoid filling the dcache with case-insensitive names to the
1658 * same inode, only the actual correct case is stored in the dcache for
1659 * case-insensitive filesystems.
1660 *
1661 * For a case-insensitive lookup match and if the the case-exact dentry
1662 * already exists in in the dcache, use it and return it.
1663 *
1664 * If no entry exists with the exact case name, allocate new dentry with
1665 * the exact case, and return the spliced entry.
1666 */
1667struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1668            struct qstr *name)
1669{
1670    int error;
1671    struct dentry *found;
1672    struct dentry *new;
1673
1674    /*
1675     * First check if a dentry matching the name already exists,
1676     * if not go ahead and create it now.
1677     */
1678    found = d_hash_and_lookup(dentry->d_parent, name);
1679    if (!found) {
1680        new = d_alloc(dentry->d_parent, name);
1681        if (!new) {
1682            error = -ENOMEM;
1683            goto err_out;
1684        }
1685
1686        found = d_splice_alias(inode, new);
1687        if (found) {
1688            dput(new);
1689            return found;
1690        }
1691        return new;
1692    }
1693
1694    /*
1695     * If a matching dentry exists, and it's not negative use it.
1696     *
1697     * Decrement the reference count to balance the iget() done
1698     * earlier on.
1699     */
1700    if (found->d_inode) {
1701        if (unlikely(found->d_inode != inode)) {
1702            /* This can't happen because bad inodes are unhashed. */
1703            BUG_ON(!is_bad_inode(inode));
1704            BUG_ON(!is_bad_inode(found->d_inode));
1705        }
1706        iput(inode);
1707        return found;
1708    }
1709
1710    /*
1711     * Negative dentry: instantiate it unless the inode is a directory and
1712     * already has a dentry.
1713     */
1714    spin_lock(&inode->i_lock);
1715    if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1716        __d_instantiate(found, inode);
1717        spin_unlock(&inode->i_lock);
1718        security_d_instantiate(found, inode);
1719        return found;
1720    }
1721
1722    /*
1723     * In case a directory already has a (disconnected) entry grab a
1724     * reference to it, move it in place and use it.
1725     */
1726    new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1727    __dget(new);
1728    spin_unlock(&inode->i_lock);
1729    security_d_instantiate(found, inode);
1730    d_move(new, found);
1731    iput(inode);
1732    dput(found);
1733    return new;
1734
1735err_out:
1736    iput(inode);
1737    return ERR_PTR(error);
1738}
1739EXPORT_SYMBOL(d_add_ci);
1740
1741/**
1742 * __d_lookup_rcu - search for a dentry (racy, store-free)
1743 * @parent: parent dentry
1744 * @name: qstr of name we wish to find
1745 * @seq: returns d_seq value at the point where the dentry was found
1746 * @inode: returns dentry->d_inode when the inode was found valid.
1747 * Returns: dentry, or NULL
1748 *
1749 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1750 * resolution (store-free path walking) design described in
1751 * Documentation/filesystems/path-lookup.txt.
1752 *
1753 * This is not to be used outside core vfs.
1754 *
1755 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1756 * held, and rcu_read_lock held. The returned dentry must not be stored into
1757 * without taking d_lock and checking d_seq sequence count against @seq
1758 * returned here.
1759 *
1760 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1761 * function.
1762 *
1763 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1764 * the returned dentry, so long as its parent's seqlock is checked after the
1765 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1766 * is formed, giving integrity down the path walk.
1767 */
1768struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1769                unsigned *seq, struct inode **inode)
1770{
1771    unsigned int len = name->len;
1772    unsigned int hash = name->hash;
1773    const unsigned char *str = name->name;
1774    struct hlist_bl_head *b = d_hash(parent, hash);
1775    struct hlist_bl_node *node;
1776    struct dentry *dentry;
1777
1778    /*
1779     * Note: There is significant duplication with __d_lookup_rcu which is
1780     * required to prevent single threaded performance regressions
1781     * especially on architectures where smp_rmb (in seqcounts) are costly.
1782     * Keep the two functions in sync.
1783     */
1784
1785    /*
1786     * The hash list is protected using RCU.
1787     *
1788     * Carefully use d_seq when comparing a candidate dentry, to avoid
1789     * races with d_move().
1790     *
1791     * It is possible that concurrent renames can mess up our list
1792     * walk here and result in missing our dentry, resulting in the
1793     * false-negative result. d_lookup() protects against concurrent
1794     * renames using rename_lock seqlock.
1795     *
1796     * See Documentation/filesystems/path-lookup.txt for more details.
1797     */
1798    hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1799        struct inode *i;
1800        const char *tname;
1801        int tlen;
1802
1803        if (dentry->d_name.hash != hash)
1804            continue;
1805
1806seqretry:
1807        *seq = read_seqcount_begin(&dentry->d_seq);
1808        if (dentry->d_parent != parent)
1809            continue;
1810        if (d_unhashed(dentry))
1811            continue;
1812        tlen = dentry->d_name.len;
1813        tname = dentry->d_name.name;
1814        i = dentry->d_inode;
1815        prefetch(tname);
1816        /*
1817         * This seqcount check is required to ensure name and
1818         * len are loaded atomically, so as not to walk off the
1819         * edge of memory when walking. If we could load this
1820         * atomically some other way, we could drop this check.
1821         */
1822        if (read_seqcount_retry(&dentry->d_seq, *seq))
1823            goto seqretry;
1824        if (parent->d_flags & DCACHE_OP_COMPARE) {
1825            if (parent->d_op->d_compare(parent, *inode,
1826                        dentry, i,
1827                        tlen, tname, name))
1828                continue;
1829        } else {
1830            if (dentry_cmp(tname, tlen, str, len))
1831                continue;
1832        }
1833        /*
1834         * No extra seqcount check is required after the name
1835         * compare. The caller must perform a seqcount check in
1836         * order to do anything useful with the returned dentry
1837         * anyway.
1838         */
1839        *inode = i;
1840        return dentry;
1841    }
1842    return NULL;
1843}
1844
1845/**
1846 * d_lookup - search for a dentry
1847 * @parent: parent dentry
1848 * @name: qstr of name we wish to find
1849 * Returns: dentry, or NULL
1850 *
1851 * d_lookup searches the children of the parent dentry for the name in
1852 * question. If the dentry is found its reference count is incremented and the
1853 * dentry is returned. The caller must use dput to free the entry when it has
1854 * finished using it. %NULL is returned if the dentry does not exist.
1855 */
1856struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1857{
1858    struct dentry *dentry;
1859    unsigned seq;
1860
1861        do {
1862                seq = read_seqbegin(&rename_lock);
1863                dentry = __d_lookup(parent, name);
1864                if (dentry)
1865            break;
1866    } while (read_seqretry(&rename_lock, seq));
1867    return dentry;
1868}
1869EXPORT_SYMBOL(d_lookup);
1870
1871/**
1872 * __d_lookup - search for a dentry (racy)
1873 * @parent: parent dentry
1874 * @name: qstr of name we wish to find
1875 * Returns: dentry, or NULL
1876 *
1877 * __d_lookup is like d_lookup, however it may (rarely) return a
1878 * false-negative result due to unrelated rename activity.
1879 *
1880 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1881 * however it must be used carefully, eg. with a following d_lookup in
1882 * the case of failure.
1883 *
1884 * __d_lookup callers must be commented.
1885 */
1886struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1887{
1888    unsigned int len = name->len;
1889    unsigned int hash = name->hash;
1890    const unsigned char *str = name->name;
1891    struct hlist_bl_head *b = d_hash(parent, hash);
1892    struct hlist_bl_node *node;
1893    struct dentry *found = NULL;
1894    struct dentry *dentry;
1895
1896    /*
1897     * Note: There is significant duplication with __d_lookup_rcu which is
1898     * required to prevent single threaded performance regressions
1899     * especially on architectures where smp_rmb (in seqcounts) are costly.
1900     * Keep the two functions in sync.
1901     */
1902
1903    /*
1904     * The hash list is protected using RCU.
1905     *
1906     * Take d_lock when comparing a candidate dentry, to avoid races
1907     * with d_move().
1908     *
1909     * It is possible that concurrent renames can mess up our list
1910     * walk here and result in missing our dentry, resulting in the
1911     * false-negative result. d_lookup() protects against concurrent
1912     * renames using rename_lock seqlock.
1913     *
1914     * See Documentation/filesystems/path-lookup.txt for more details.
1915     */
1916    rcu_read_lock();
1917    
1918    hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1919        const char *tname;
1920        int tlen;
1921
1922        if (dentry->d_name.hash != hash)
1923            continue;
1924
1925        spin_lock(&dentry->d_lock);
1926        if (dentry->d_parent != parent)
1927            goto next;
1928        if (d_unhashed(dentry))
1929            goto next;
1930
1931        /*
1932         * It is safe to compare names since d_move() cannot
1933         * change the qstr (protected by d_lock).
1934         */
1935        tlen = dentry->d_name.len;
1936        tname = dentry->d_name.name;
1937        if (parent->d_flags & DCACHE_OP_COMPARE) {
1938            if (parent->d_op->d_compare(parent, parent->d_inode,
1939                        dentry, dentry->d_inode,
1940                        tlen, tname, name))
1941                goto next;
1942        } else {
1943            if (dentry_cmp(tname, tlen, str, len))
1944                goto next;
1945        }
1946
1947        dentry->d_count++;
1948        found = dentry;
1949        spin_unlock(&dentry->d_lock);
1950        break;
1951next:
1952        spin_unlock(&dentry->d_lock);
1953     }
1954     rcu_read_unlock();
1955
1956     return found;
1957}
1958
1959/**
1960 * d_hash_and_lookup - hash the qstr then search for a dentry
1961 * @dir: Directory to search in
1962 * @name: qstr of name we wish to find
1963 *
1964 * On hash failure or on lookup failure NULL is returned.
1965 */
1966struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1967{
1968    struct dentry *dentry = NULL;
1969
1970    /*
1971     * Check for a fs-specific hash function. Note that we must
1972     * calculate the standard hash first, as the d_op->d_hash()
1973     * routine may choose to leave the hash value unchanged.
1974     */
1975    name->hash = full_name_hash(name->name, name->len);
1976    if (dir->d_flags & DCACHE_OP_HASH) {
1977        if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1978            goto out;
1979    }
1980    dentry = d_lookup(dir, name);
1981out:
1982    return dentry;
1983}
1984
1985/**
1986 * d_validate - verify dentry provided from insecure source (deprecated)
1987 * @dentry: The dentry alleged to be valid child of @dparent
1988 * @dparent: The parent dentry (known to be valid)
1989 *
1990 * An insecure source has sent us a dentry, here we verify it and dget() it.
1991 * This is used by ncpfs in its readdir implementation.
1992 * Zero is returned in the dentry is invalid.
1993 *
1994 * This function is slow for big directories, and deprecated, do not use it.
1995 */
1996int d_validate(struct dentry *dentry, struct dentry *dparent)
1997{
1998    struct dentry *child;
1999
2000    spin_lock(&dparent->d_lock);
2001    list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2002        if (dentry == child) {
2003            spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2004            __dget_dlock(dentry);
2005            spin_unlock(&dentry->d_lock);
2006            spin_unlock(&dparent->d_lock);
2007            return 1;
2008        }
2009    }
2010    spin_unlock(&dparent->d_lock);
2011
2012    return 0;
2013}
2014EXPORT_SYMBOL(d_validate);
2015
2016/*
2017 * When a file is deleted, we have two options:
2018 * - turn this dentry into a negative dentry
2019 * - unhash this dentry and free it.
2020 *
2021 * Usually, we want to just turn this into
2022 * a negative dentry, but if anybody else is
2023 * currently using the dentry or the inode
2024 * we can't do that and we fall back on removing
2025 * it from the hash queues and waiting for
2026 * it to be deleted later when it has no users
2027 */
2028 
2029/**
2030 * d_delete - delete a dentry
2031 * @dentry: The dentry to delete
2032 *
2033 * Turn the dentry into a negative dentry if possible, otherwise
2034 * remove it from the hash queues so it can be deleted later
2035 */
2036 
2037void d_delete(struct dentry * dentry)
2038{
2039    struct inode *inode;
2040    int isdir = 0;
2041    /*
2042     * Are we the only user?
2043     */
2044again:
2045    spin_lock(&dentry->d_lock);
2046    inode = dentry->d_inode;
2047    isdir = S_ISDIR(inode->i_mode);
2048    if (dentry->d_count == 1) {
2049        if (inode && !spin_trylock(&inode->i_lock)) {
2050            spin_unlock(&dentry->d_lock);
2051            cpu_relax();
2052            goto again;
2053        }
2054        dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2055        dentry_unlink_inode(dentry);
2056        fsnotify_nameremove(dentry, isdir);
2057        return;
2058    }
2059
2060    if (!d_unhashed(dentry))
2061        __d_drop(dentry);
2062
2063    spin_unlock(&dentry->d_lock);
2064
2065    fsnotify_nameremove(dentry, isdir);
2066}
2067EXPORT_SYMBOL(d_delete);
2068
2069static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2070{
2071    BUG_ON(!d_unhashed(entry));
2072    hlist_bl_lock(b);
2073    entry->d_flags |= DCACHE_RCUACCESS;
2074    hlist_bl_add_head_rcu(&entry->d_hash, b);
2075    hlist_bl_unlock(b);
2076}
2077
2078static void _d_rehash(struct dentry * entry)
2079{
2080    __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2081}
2082
2083/**
2084 * d_rehash - add an entry back to the hash
2085 * @entry: dentry to add to the hash
2086 *
2087 * Adds a dentry to the hash according to its name.
2088 */
2089 
2090void d_rehash(struct dentry * entry)
2091{
2092    spin_lock(&entry->d_lock);
2093    _d_rehash(entry);
2094    spin_unlock(&entry->d_lock);
2095}
2096EXPORT_SYMBOL(d_rehash);
2097
2098/**
2099 * dentry_update_name_case - update case insensitive dentry with a new name
2100 * @dentry: dentry to be updated
2101 * @name: new name
2102 *
2103 * Update a case insensitive dentry with new case of name.
2104 *
2105 * dentry must have been returned by d_lookup with name @name. Old and new
2106 * name lengths must match (ie. no d_compare which allows mismatched name
2107 * lengths).
2108 *
2109 * Parent inode i_mutex must be held over d_lookup and into this call (to
2110 * keep renames and concurrent inserts, and readdir(2) away).
2111 */
2112void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2113{
2114    BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2115    BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2116
2117    spin_lock(&dentry->d_lock);
2118    write_seqcount_begin(&dentry->d_seq);
2119    memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2120    write_seqcount_end(&dentry->d_seq);
2121    spin_unlock(&dentry->d_lock);
2122}
2123EXPORT_SYMBOL(dentry_update_name_case);
2124
2125static void switch_names(struct dentry *dentry, struct dentry *target)
2126{
2127    if (dname_external(target)) {
2128        if (dname_external(dentry)) {
2129            /*
2130             * Both external: swap the pointers
2131             */
2132            swap(target->d_name.name, dentry->d_name.name);
2133        } else {
2134            /*
2135             * dentry:internal, target:external. Steal target's
2136             * storage and make target internal.
2137             */
2138            memcpy(target->d_iname, dentry->d_name.name,
2139                    dentry->d_name.len + 1);
2140            dentry->d_name.name = target->d_name.name;
2141            target->d_name.name = target->d_iname;
2142        }
2143    } else {
2144        if (dname_external(dentry)) {
2145            /*
2146             * dentry:external, target:internal. Give dentry's
2147             * storage to target and make dentry internal
2148             */
2149            memcpy(dentry->d_iname, target->d_name.name,
2150                    target->d_name.len + 1);
2151            target->d_name.name = dentry->d_name.name;
2152            dentry->d_name.name = dentry->d_iname;
2153        } else {
2154            /*
2155             * Both are internal. Just copy target to dentry
2156             */
2157            memcpy(dentry->d_iname, target->d_name.name,
2158                    target->d_name.len + 1);
2159            dentry->d_name.len = target->d_name.len;
2160            return;
2161        }
2162    }
2163    swap(dentry->d_name.len, target->d_name.len);
2164}
2165
2166static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2167{
2168    /*
2169     * XXXX: do we really need to take target->d_lock?
2170     */
2171    if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2172        spin_lock(&target->d_parent->d_lock);
2173    else {
2174        if (d_ancestor(dentry->d_parent, target->d_parent)) {
2175            spin_lock(&dentry->d_parent->d_lock);
2176            spin_lock_nested(&target->d_parent->d_lock,
2177                        DENTRY_D_LOCK_NESTED);
2178        } else {
2179            spin_lock(&target->d_parent->d_lock);
2180            spin_lock_nested(&dentry->d_parent->d_lock,
2181                        DENTRY_D_LOCK_NESTED);
2182        }
2183    }
2184    if (target < dentry) {
2185        spin_lock_nested(&target->d_lock, 2);
2186        spin_lock_nested(&dentry->d_lock, 3);
2187    } else {
2188        spin_lock_nested(&dentry->d_lock, 2);
2189        spin_lock_nested(&target->d_lock, 3);
2190    }
2191}
2192
2193static void dentry_unlock_parents_for_move(struct dentry *dentry,
2194                    struct dentry *target)
2195{
2196    if (target->d_parent != dentry->d_parent)
2197        spin_unlock(&dentry->d_parent->d_lock);
2198    if (target->d_parent != target)
2199        spin_unlock(&target->d_parent->d_lock);
2200}
2201
2202/*
2203 * When switching names, the actual string doesn't strictly have to
2204 * be preserved in the target - because we're dropping the target
2205 * anyway. As such, we can just do a simple memcpy() to copy over
2206 * the new name before we switch.
2207 *
2208 * Note that we have to be a lot more careful about getting the hash
2209 * switched - we have to switch the hash value properly even if it
2210 * then no longer matches the actual (corrupted) string of the target.
2211 * The hash value has to match the hash queue that the dentry is on..
2212 */
2213/*
2214 * __d_move - move a dentry
2215 * @dentry: entry to move
2216 * @target: new dentry
2217 *
2218 * Update the dcache to reflect the move of a file name. Negative
2219 * dcache entries should not be moved in this way. Caller hold
2220 * rename_lock.
2221 */
2222static void __d_move(struct dentry * dentry, struct dentry * target)
2223{
2224    if (!dentry->d_inode)
2225        printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2226
2227    BUG_ON(d_ancestor(dentry, target));
2228    BUG_ON(d_ancestor(target, dentry));
2229
2230    dentry_lock_for_move(dentry, target);
2231
2232    write_seqcount_begin(&dentry->d_seq);
2233    write_seqcount_begin(&target->d_seq);
2234
2235    /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2236
2237    /*
2238     * Move the dentry to the target hash queue. Don't bother checking
2239     * for the same hash queue because of how unlikely it is.
2240     */
2241    __d_drop(dentry);
2242    __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2243
2244    /* Unhash the target: dput() will then get rid of it */
2245    __d_drop(target);
2246
2247    list_del(&dentry->d_u.d_child);
2248    list_del(&target->d_u.d_child);
2249
2250    /* Switch the names.. */
2251    switch_names(dentry, target);
2252    swap(dentry->d_name.hash, target->d_name.hash);
2253
2254    /* ... and switch the parents */
2255    if (IS_ROOT(dentry)) {
2256        dentry->d_parent = target->d_parent;
2257        target->d_parent = target;
2258        INIT_LIST_HEAD(&target->d_u.d_child);
2259    } else {
2260        swap(dentry->d_parent, target->d_parent);
2261
2262        /* And add them back to the (new) parent lists */
2263        list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2264    }
2265
2266    list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2267
2268    write_seqcount_end(&target->d_seq);
2269    write_seqcount_end(&dentry->d_seq);
2270
2271    dentry_unlock_parents_for_move(dentry, target);
2272    spin_unlock(&target->d_lock);
2273    fsnotify_d_move(dentry);
2274    spin_unlock(&dentry->d_lock);
2275}
2276
2277/*
2278 * d_move - move a dentry
2279 * @dentry: entry to move
2280 * @target: new dentry
2281 *
2282 * Update the dcache to reflect the move of a file name. Negative
2283 * dcache entries should not be moved in this way.
2284 */
2285void d_move(struct dentry *dentry, struct dentry *target)
2286{
2287    write_seqlock(&rename_lock);
2288    __d_move(dentry, target);
2289    write_sequnlock(&rename_lock);
2290}
2291EXPORT_SYMBOL(d_move);
2292
2293/**
2294 * d_ancestor - search for an ancestor
2295 * @p1: ancestor dentry
2296 * @p2: child dentry
2297 *
2298 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2299 * an ancestor of p2, else NULL.
2300 */
2301struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2302{
2303    struct dentry *p;
2304
2305    for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2306        if (p->d_parent == p1)
2307            return p;
2308    }
2309    return NULL;
2310}
2311
2312/*
2313 * This helper attempts to cope with remotely renamed directories
2314 *
2315 * It assumes that the caller is already holding
2316 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2317 *
2318 * Note: If ever the locking in lock_rename() changes, then please
2319 * remember to update this too...
2320 */
2321static struct dentry *__d_unalias(struct inode *inode,
2322        struct dentry *dentry, struct dentry *alias)
2323{
2324    struct mutex *m1 = NULL, *m2 = NULL;
2325    struct dentry *ret;
2326
2327    /* If alias and dentry share a parent, then no extra locks required */
2328    if (alias->d_parent == dentry->d_parent)
2329        goto out_unalias;
2330
2331    /* See lock_rename() */
2332    ret = ERR_PTR(-EBUSY);
2333    if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2334        goto out_err;
2335    m1 = &dentry->d_sb->s_vfs_rename_mutex;
2336    if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2337        goto out_err;
2338    m2 = &alias->d_parent->d_inode->i_mutex;
2339out_unalias:
2340    __d_move(alias, dentry);
2341    ret = alias;
2342out_err:
2343    spin_unlock(&inode->i_lock);
2344    if (m2)
2345        mutex_unlock(m2);
2346    if (m1)
2347        mutex_unlock(m1);
2348    return ret;
2349}
2350
2351/*
2352 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2353 * named dentry in place of the dentry to be replaced.
2354 * returns with anon->d_lock held!
2355 */
2356static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2357{
2358    struct dentry *dparent, *aparent;
2359
2360    dentry_lock_for_move(anon, dentry);
2361
2362    write_seqcount_begin(&dentry->d_seq);
2363    write_seqcount_begin(&anon->d_seq);
2364
2365    dparent = dentry->d_parent;
2366    aparent = anon->d_parent;
2367
2368    switch_names(dentry, anon);
2369    swap(dentry->d_name.hash, anon->d_name.hash);
2370
2371    dentry->d_parent = (aparent == anon) ? dentry : aparent;
2372    list_del(&dentry->d_u.d_child);
2373    if (!IS_ROOT(dentry))
2374        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2375    else
2376        INIT_LIST_HEAD(&dentry->d_u.d_child);
2377
2378    anon->d_parent = (dparent == dentry) ? anon : dparent;
2379    list_del(&anon->d_u.d_child);
2380    if (!IS_ROOT(anon))
2381        list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2382    else
2383        INIT_LIST_HEAD(&anon->d_u.d_child);
2384
2385    write_seqcount_end(&dentry->d_seq);
2386    write_seqcount_end(&anon->d_seq);
2387
2388    dentry_unlock_parents_for_move(anon, dentry);
2389    spin_unlock(&dentry->d_lock);
2390
2391    /* anon->d_lock still locked, returns locked */
2392    anon->d_flags &= ~DCACHE_DISCONNECTED;
2393}
2394
2395/**
2396 * d_materialise_unique - introduce an inode into the tree
2397 * @dentry: candidate dentry
2398 * @inode: inode to bind to the dentry, to which aliases may be attached
2399 *
2400 * Introduces an dentry into the tree, substituting an extant disconnected
2401 * root directory alias in its place if there is one
2402 */
2403struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2404{
2405    struct dentry *actual;
2406
2407    BUG_ON(!d_unhashed(dentry));
2408
2409    if (!inode) {
2410        actual = dentry;
2411        __d_instantiate(dentry, NULL);
2412        d_rehash(actual);
2413        goto out_nolock;
2414    }
2415
2416    spin_lock(&inode->i_lock);
2417
2418    if (S_ISDIR(inode->i_mode)) {
2419        struct dentry *alias;
2420
2421        /* Does an aliased dentry already exist? */
2422        alias = __d_find_alias(inode, 0);
2423        if (alias) {
2424            actual = alias;
2425            write_seqlock(&rename_lock);
2426
2427            if (d_ancestor(alias, dentry)) {
2428                /* Check for loops */
2429                actual = ERR_PTR(-ELOOP);
2430            } else if (IS_ROOT(alias)) {
2431                /* Is this an anonymous mountpoint that we
2432                 * could splice into our tree? */
2433                __d_materialise_dentry(dentry, alias);
2434                write_sequnlock(&rename_lock);
2435                __d_drop(alias);
2436                goto found;
2437            } else {
2438                /* Nope, but we must(!) avoid directory
2439                 * aliasing */
2440                actual = __d_unalias(inode, dentry, alias);
2441            }
2442            write_sequnlock(&rename_lock);
2443            if (IS_ERR(actual))
2444                dput(alias);
2445            goto out_nolock;
2446        }
2447    }
2448
2449    /* Add a unique reference */
2450    actual = __d_instantiate_unique(dentry, inode);
2451    if (!actual)
2452        actual = dentry;
2453    else
2454        BUG_ON(!d_unhashed(actual));
2455
2456    spin_lock(&actual->d_lock);
2457found:
2458    _d_rehash(actual);
2459    spin_unlock(&actual->d_lock);
2460    spin_unlock(&inode->i_lock);
2461out_nolock:
2462    if (actual == dentry) {
2463        security_d_instantiate(dentry, inode);
2464        return NULL;
2465    }
2466
2467    iput(inode);
2468    return actual;
2469}
2470EXPORT_SYMBOL_GPL(d_materialise_unique);
2471
2472static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2473{
2474    *buflen -= namelen;
2475    if (*buflen < 0)
2476        return -ENAMETOOLONG;
2477    *buffer -= namelen;
2478    memcpy(*buffer, str, namelen);
2479    return 0;
2480}
2481
2482static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2483{
2484    return prepend(buffer, buflen, name->name, name->len);
2485}
2486
2487/**
2488 * prepend_path - Prepend path string to a buffer
2489 * @path: the dentry/vfsmount to report
2490 * @root: root vfsmnt/dentry (may be modified by this function)
2491 * @buffer: pointer to the end of the buffer
2492 * @buflen: pointer to buffer length
2493 *
2494 * Caller holds the rename_lock.
2495 *
2496 * If path is not reachable from the supplied root, then the value of
2497 * root is changed (without modifying refcounts).
2498 */
2499static int prepend_path(const struct path *path, struct path *root,
2500            char **buffer, int *buflen)
2501{
2502    struct dentry *dentry = path->dentry;
2503    struct vfsmount *vfsmnt = path->mnt;
2504    bool slash = false;
2505    int error = 0;
2506
2507    br_read_lock(vfsmount_lock);
2508    while (dentry != root->dentry || vfsmnt != root->mnt) {
2509        struct dentry * parent;
2510
2511        if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2512            /* Global root? */
2513            if (vfsmnt->mnt_parent == vfsmnt) {
2514                goto global_root;
2515            }
2516            dentry = vfsmnt->mnt_mountpoint;
2517            vfsmnt = vfsmnt->mnt_parent;
2518            continue;
2519        }
2520        parent = dentry->d_parent;
2521        prefetch(parent);
2522        spin_lock(&dentry->d_lock);
2523        error = prepend_name(buffer, buflen, &dentry->d_name);
2524        spin_unlock(&dentry->d_lock);
2525        if (!error)
2526            error = prepend(buffer, buflen, "/", 1);
2527        if (error)
2528            break;
2529
2530        slash = true;
2531        dentry = parent;
2532    }
2533
2534out:
2535    if (!error && !slash)
2536        error = prepend(buffer, buflen, "/", 1);
2537
2538    br_read_unlock(vfsmount_lock);
2539    return error;
2540
2541global_root:
2542    /*
2543     * Filesystems needing to implement special "root names"
2544     * should do so with ->d_dname()
2545     */
2546    if (IS_ROOT(dentry) &&
2547        (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2548        WARN(1, "Root dentry has weird name <%.*s>\n",
2549             (int) dentry->d_name.len, dentry->d_name.name);
2550    }
2551    root->mnt = vfsmnt;
2552    root->dentry = dentry;
2553    goto out;
2554}
2555
2556/**
2557 * __d_path - return the path of a dentry
2558 * @path: the dentry/vfsmount to report
2559 * @root: root vfsmnt/dentry (may be modified by this function)
2560 * @buf: buffer to return value in
2561 * @buflen: buffer length
2562 *
2563 * Convert a dentry into an ASCII path name.
2564 *
2565 * Returns a pointer into the buffer or an error code if the
2566 * path was too long.
2567 *
2568 * "buflen" should be positive.
2569 *
2570 * If path is not reachable from the supplied root, then the value of
2571 * root is changed (without modifying refcounts).
2572 */
2573char *__d_path(const struct path *path, struct path *root,
2574           char *buf, int buflen)
2575{
2576    char *res = buf + buflen;
2577    int error;
2578
2579    prepend(&res, &buflen, "\0", 1);
2580    write_seqlock(&rename_lock);
2581    error = prepend_path(path, root, &res, &buflen);
2582    write_sequnlock(&rename_lock);
2583
2584    if (error)
2585        return ERR_PTR(error);
2586    return res;
2587}
2588
2589/*
2590 * same as __d_path but appends "(deleted)" for unlinked files.
2591 */
2592static int path_with_deleted(const struct path *path, struct path *root,
2593                 char **buf, int *buflen)
2594{
2595    prepend(buf, buflen, "\0", 1);
2596    if (d_unlinked(path->dentry)) {
2597        int error = prepend(buf, buflen, " (deleted)", 10);
2598        if (error)
2599            return error;
2600    }
2601
2602    return prepend_path(path, root, buf, buflen);
2603}
2604
2605static int prepend_unreachable(char **buffer, int *buflen)
2606{
2607    return prepend(buffer, buflen, "(unreachable)", 13);
2608}
2609
2610/**
2611 * d_path - return the path of a dentry
2612 * @path: path to report
2613 * @buf: buffer to return value in
2614 * @buflen: buffer length
2615 *
2616 * Convert a dentry into an ASCII path name. If the entry has been deleted
2617 * the string " (deleted)" is appended. Note that this is ambiguous.
2618 *
2619 * Returns a pointer into the buffer or an error code if the path was
2620 * too long. Note: Callers should use the returned pointer, not the passed
2621 * in buffer, to use the name! The implementation often starts at an offset
2622 * into the buffer, and may leave 0 bytes at the start.
2623 *
2624 * "buflen" should be positive.
2625 */
2626char *d_path(const struct path *path, char *buf, int buflen)
2627{
2628    char *res = buf + buflen;
2629    struct path root;
2630    struct path tmp;
2631    int error;
2632
2633    /*
2634     * We have various synthetic filesystems that never get mounted. On
2635     * these filesystems dentries are never used for lookup purposes, and
2636     * thus don't need to be hashed. They also don't need a name until a
2637     * user wants to identify the object in /proc/pid/fd/. The little hack
2638     * below allows us to generate a name for these objects on demand:
2639     */
2640    if (path->dentry->d_op && path->dentry->d_op->d_dname)
2641        return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2642
2643    get_fs_root(current->fs, &root);
2644    write_seqlock(&rename_lock);
2645    tmp = root;
2646    error = path_with_deleted(path, &tmp, &res, &buflen);
2647    if (error)
2648        res = ERR_PTR(error);
2649    write_sequnlock(&rename_lock);
2650    path_put(&root);
2651    return res;
2652}
2653EXPORT_SYMBOL(d_path);
2654
2655/**
2656 * d_path_with_unreachable - return the path of a dentry
2657 * @path: path to report
2658 * @buf: buffer to return value in
2659 * @buflen: buffer length
2660 *
2661 * The difference from d_path() is that this prepends "(unreachable)"
2662 * to paths which are unreachable from the current process' root.
2663 */
2664char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2665{
2666    char *res = buf + buflen;
2667    struct path root;
2668    struct path tmp;
2669    int error;
2670
2671    if (path->dentry->d_op && path->dentry->d_op->d_dname)
2672        return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2673
2674    get_fs_root(current->fs, &root);
2675    write_seqlock(&rename_lock);
2676    tmp = root;
2677    error = path_with_deleted(path, &tmp, &res, &buflen);
2678    if (!error && !path_equal(&tmp, &root))
2679        error = prepend_unreachable(&res, &buflen);
2680    write_sequnlock(&rename_lock);
2681    path_put(&root);
2682    if (error)
2683        res = ERR_PTR(error);
2684
2685    return res;
2686}
2687
2688/*
2689 * Helper function for dentry_operations.d_dname() members
2690 */
2691char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2692            const char *fmt, ...)
2693{
2694    va_list args;
2695    char temp[64];
2696    int sz;
2697
2698    va_start(args, fmt);
2699    sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2700    va_end(args);
2701
2702    if (sz > sizeof(temp) || sz > buflen)
2703        return ERR_PTR(-ENAMETOOLONG);
2704
2705    buffer += buflen - sz;
2706    return memcpy(buffer, temp, sz);
2707}
2708
2709/*
2710 * Write full pathname from the root of the filesystem into the buffer.
2711 */
2712static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2713{
2714    char *end = buf + buflen;
2715    char *retval;
2716
2717    prepend(&end, &buflen, "\0", 1);
2718    if (buflen < 1)
2719        goto Elong;
2720    /* Get '/' right */
2721    retval = end-1;
2722    *retval = '/';
2723
2724    while (!IS_ROOT(dentry)) {
2725        struct dentry *parent = dentry->d_parent;
2726        int error;
2727
2728        prefetch(parent);
2729        spin_lock(&dentry->d_lock);
2730        error = prepend_name(&end, &buflen, &dentry->d_name);
2731        spin_unlock(&dentry->d_lock);
2732        if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2733            goto Elong;
2734
2735        retval = end;
2736        dentry = parent;
2737    }
2738    return retval;
2739Elong:
2740    return ERR_PTR(-ENAMETOOLONG);
2741}
2742
2743char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2744{
2745    char *retval;
2746
2747    write_seqlock(&rename_lock);
2748    retval = __dentry_path(dentry, buf, buflen);
2749    write_sequnlock(&rename_lock);
2750
2751    return retval;
2752}
2753EXPORT_SYMBOL(dentry_path_raw);
2754
2755char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2756{
2757    char *p = NULL;
2758    char *retval;
2759
2760    write_seqlock(&rename_lock);
2761    if (d_unlinked(dentry)) {
2762        p = buf + buflen;
2763        if (prepend(&p, &buflen, "//deleted", 10) != 0)
2764            goto Elong;
2765        buflen++;
2766    }
2767    retval = __dentry_path(dentry, buf, buflen);
2768    write_sequnlock(&rename_lock);
2769    if (!IS_ERR(retval) && p)
2770        *p = '/'; /* restore '/' overriden with '\0' */
2771    return retval;
2772Elong:
2773    return ERR_PTR(-ENAMETOOLONG);
2774}
2775
2776/*
2777 * NOTE! The user-level library version returns a
2778 * character pointer. The kernel system call just
2779 * returns the length of the buffer filled (which
2780 * includes the ending '\0' character), or a negative
2781 * error value. So libc would do something like
2782 *
2783 * char *getcwd(char * buf, size_t size)
2784 * {
2785 * int retval;
2786 *
2787 * retval = sys_getcwd(buf, size);
2788 * if (retval >= 0)
2789 * return buf;
2790 * errno = -retval;
2791 * return NULL;
2792 * }
2793 */
2794SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2795{
2796    int error;
2797    struct path pwd, root;
2798    char *page = (char *) __get_free_page(GFP_USER);
2799
2800    if (!page)
2801        return -ENOMEM;
2802
2803    get_fs_root_and_pwd(current->fs, &root, &pwd);
2804
2805    error = -ENOENT;
2806    write_seqlock(&rename_lock);
2807    if (!d_unlinked(pwd.dentry)) {
2808        unsigned long len;
2809        struct path tmp = root;
2810        char *cwd = page + PAGE_SIZE;
2811        int buflen = PAGE_SIZE;
2812
2813        prepend(&cwd, &buflen, "\0", 1);
2814        error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2815        write_sequnlock(&rename_lock);
2816
2817        if (error)
2818            goto out;
2819
2820        /* Unreachable from current root */
2821        if (!path_equal(&tmp, &root)) {
2822            error = prepend_unreachable(&cwd, &buflen);
2823            if (error)
2824                goto out;
2825        }
2826
2827        error = -ERANGE;
2828        len = PAGE_SIZE + page - cwd;
2829        if (len <= size) {
2830            error = len;
2831            if (copy_to_user(buf, cwd, len))
2832                error = -EFAULT;
2833        }
2834    } else {
2835        write_sequnlock(&rename_lock);
2836    }
2837
2838out:
2839    path_put(&pwd);
2840    path_put(&root);
2841    free_page((unsigned long) page);
2842    return error;
2843}
2844
2845/*
2846 * Test whether new_dentry is a subdirectory of old_dentry.
2847 *
2848 * Trivially implemented using the dcache structure
2849 */
2850
2851/**
2852 * is_subdir - is new dentry a subdirectory of old_dentry
2853 * @new_dentry: new dentry
2854 * @old_dentry: old dentry
2855 *
2856 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2857 * Returns 0 otherwise.
2858 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2859 */
2860  
2861int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2862{
2863    int result;
2864    unsigned seq;
2865
2866    if (new_dentry == old_dentry)
2867        return 1;
2868
2869    do {
2870        /* for restarting inner loop in case of seq retry */
2871        seq = read_seqbegin(&rename_lock);
2872        /*
2873         * Need rcu_readlock to protect against the d_parent trashing
2874         * due to d_move
2875         */
2876        rcu_read_lock();
2877        if (d_ancestor(old_dentry, new_dentry))
2878            result = 1;
2879        else
2880            result = 0;
2881        rcu_read_unlock();
2882    } while (read_seqretry(&rename_lock, seq));
2883
2884    return result;
2885}
2886
2887int path_is_under(struct path *path1, struct path *path2)
2888{
2889    struct vfsmount *mnt = path1->mnt;
2890    struct dentry *dentry = path1->dentry;
2891    int res;
2892
2893    br_read_lock(vfsmount_lock);
2894    if (mnt != path2->mnt) {
2895        for (;;) {
2896            if (mnt->mnt_parent == mnt) {
2897                br_read_unlock(vfsmount_lock);
2898                return 0;
2899            }
2900            if (mnt->mnt_parent == path2->mnt)
2901                break;
2902            mnt = mnt->mnt_parent;
2903        }
2904        dentry = mnt->mnt_mountpoint;
2905    }
2906    res = is_subdir(dentry, path2->dentry);
2907    br_read_unlock(vfsmount_lock);
2908    return res;
2909}
2910EXPORT_SYMBOL(path_is_under);
2911
2912void d_genocide(struct dentry *root)
2913{
2914    struct dentry *this_parent;
2915    struct list_head *next;
2916    unsigned seq;
2917    int locked = 0;
2918
2919    seq = read_seqbegin(&rename_lock);
2920again:
2921    this_parent = root;
2922    spin_lock(&this_parent->d_lock);
2923repeat:
2924    next = this_parent->d_subdirs.next;
2925resume:
2926    while (next != &this_parent->d_subdirs) {
2927        struct list_head *tmp = next;
2928        struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2929        next = tmp->next;
2930
2931        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2932        if (d_unhashed(dentry) || !dentry->d_inode) {
2933            spin_unlock(&dentry->d_lock);
2934            continue;
2935        }
2936        if (!list_empty(&dentry->d_subdirs)) {
2937            spin_unlock(&this_parent->d_lock);
2938            spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2939            this_parent = dentry;
2940            spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2941            goto repeat;
2942        }
2943        if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2944            dentry->d_flags |= DCACHE_GENOCIDE;
2945            dentry->d_count--;
2946        }
2947        spin_unlock(&dentry->d_lock);
2948    }
2949    if (this_parent != root) {
2950        struct dentry *child = this_parent;
2951        if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2952            this_parent->d_flags |= DCACHE_GENOCIDE;
2953            this_parent->d_count--;
2954        }
2955        this_parent = try_to_ascend(this_parent, locked, seq);
2956        if (!this_parent)
2957            goto rename_retry;
2958        next = child->d_u.d_child.next;
2959        goto resume;
2960    }
2961    spin_unlock(&this_parent->d_lock);
2962    if (!locked && read_seqretry(&rename_lock, seq))
2963        goto rename_retry;
2964    if (locked)
2965        write_sequnlock(&rename_lock);
2966    return;
2967
2968rename_retry:
2969    locked = 1;
2970    write_seqlock(&rename_lock);
2971    goto again;
2972}
2973
2974/**
2975 * find_inode_number - check for dentry with name
2976 * @dir: directory to check
2977 * @name: Name to find.
2978 *
2979 * Check whether a dentry already exists for the given name,
2980 * and return the inode number if it has an inode. Otherwise
2981 * 0 is returned.
2982 *
2983 * This routine is used to post-process directory listings for
2984 * filesystems using synthetic inode numbers, and is necessary
2985 * to keep getcwd() working.
2986 */
2987 
2988ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2989{
2990    struct dentry * dentry;
2991    ino_t ino = 0;
2992
2993    dentry = d_hash_and_lookup(dir, name);
2994    if (dentry) {
2995        if (dentry->d_inode)
2996            ino = dentry->d_inode->i_ino;
2997        dput(dentry);
2998    }
2999    return ino;
3000}
3001EXPORT_SYMBOL(find_inode_number);
3002
3003static __initdata unsigned long dhash_entries;
3004static int __init set_dhash_entries(char *str)
3005{
3006    if (!str)
3007        return 0;
3008    dhash_entries = simple_strtoul(str, &str, 0);
3009    return 1;
3010}
3011__setup("dhash_entries=", set_dhash_entries);
3012
3013static void __init dcache_init_early(void)
3014{
3015    int loop;
3016
3017    /* If hashes are distributed across NUMA nodes, defer
3018     * hash allocation until vmalloc space is available.
3019     */
3020    if (hashdist)
3021        return;
3022
3023    dentry_hashtable =
3024        alloc_large_system_hash("Dentry cache",
3025                    sizeof(struct hlist_bl_head),
3026                    dhash_entries,
3027                    13,
3028                    HASH_EARLY,
3029                    &d_hash_shift,
3030                    &d_hash_mask,
3031                    0);
3032
3033    for (loop = 0; loop < (1 << d_hash_shift); loop++)
3034        INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3035}
3036
3037static void __init dcache_init(void)
3038{
3039    int loop;
3040
3041    /*
3042     * A constructor could be added for stable state like the lists,
3043     * but it is probably not worth it because of the cache nature
3044     * of the dcache.
3045     */
3046    dentry_cache = KMEM_CACHE(dentry,
3047        SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3048    
3049    register_shrinker(&dcache_shrinker);
3050
3051    /* Hash may have been set up in dcache_init_early */
3052    if (!hashdist)
3053        return;
3054
3055    dentry_hashtable =
3056        alloc_large_system_hash("Dentry cache",
3057                    sizeof(struct hlist_bl_head),
3058                    dhash_entries,
3059                    13,
3060                    0,
3061                    &d_hash_shift,
3062                    &d_hash_mask,
3063                    0);
3064
3065    for (loop = 0; loop < (1 << d_hash_shift); loop++)
3066        INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3067}
3068
3069/* SLAB cache for __getname() consumers */
3070struct kmem_cache *names_cachep __read_mostly;
3071EXPORT_SYMBOL(names_cachep);
3072
3073EXPORT_SYMBOL(d_genocide);
3074
3075void __init vfs_caches_init_early(void)
3076{
3077    dcache_init_early();
3078    inode_init_early();
3079}
3080
3081void __init vfs_caches_init(unsigned long mempages)
3082{
3083    unsigned long reserve;
3084
3085    /* Base hash sizes on available memory, with a reserve equal to
3086           150% of current kernel size */
3087
3088    reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3089    mempages -= reserve;
3090
3091    names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3092            SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3093
3094    dcache_init();
3095    inode_init();
3096    files_init(mempages);
3097    mnt_init();
3098    bdev_cache_init();
3099    chrdev_init();
3100}
3101

Archive Download this file



interactive