Root/lib/lru_cache.c

1/*
2   lru_cache.c
3
4   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10   drbd is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 2, or (at your option)
13   any later version.
14
15   drbd is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with drbd; see the file COPYING. If not, write to
22   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/module.h>
27#include <linux/bitops.h>
28#include <linux/slab.h>
29#include <linux/string.h> /* for memset */
30#include <linux/seq_file.h> /* for seq_printf */
31#include <linux/lru_cache.h>
32
33MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
34          "Lars Ellenberg <lars@linbit.com>");
35MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
36MODULE_LICENSE("GPL");
37
38/* this is developers aid only.
39 * it catches concurrent access (lack of locking on the users part) */
40#define PARANOIA_ENTRY() do { \
41    BUG_ON(!lc); \
42    BUG_ON(!lc->nr_elements); \
43    BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
44} while (0)
45
46#define RETURN(x...) do { \
47    clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
48    return x ; } while (0)
49
50/* BUG() if e is not one of the elements tracked by lc */
51#define PARANOIA_LC_ELEMENT(lc, e) do { \
52    struct lru_cache *lc_ = (lc); \
53    struct lc_element *e_ = (e); \
54    unsigned i = e_->lc_index; \
55    BUG_ON(i >= lc_->nr_elements); \
56    BUG_ON(lc_->lc_element[i] != e_); } while (0)
57
58
59/* We need to atomically
60 * - try to grab the lock (set LC_LOCKED)
61 * - only if there is no pending transaction
62 * (neither LC_DIRTY nor LC_STARVING is set)
63 * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
64 * it is not sufficient to just say
65 * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
66 */
67int lc_try_lock(struct lru_cache *lc)
68{
69    unsigned long val;
70    do {
71        val = cmpxchg(&lc->flags, 0, LC_LOCKED);
72    } while (unlikely (val == LC_PARANOIA));
73    /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
74    return 0 == val;
75#if 0
76    /* Alternative approach, spin in case someone enters or leaves a
77     * PARANOIA_ENTRY()/RETURN() section. */
78    unsigned long old, new, val;
79    do {
80        old = lc->flags & LC_PARANOIA;
81        new = old | LC_LOCKED;
82        val = cmpxchg(&lc->flags, old, new);
83    } while (unlikely (val == (old ^ LC_PARANOIA)));
84    return old == val;
85#endif
86}
87
88/**
89 * lc_create - prepares to track objects in an active set
90 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
91 * @max_pending_changes: maximum changes to accumulate until a transaction is required
92 * @e_count: number of elements allowed to be active simultaneously
93 * @e_size: size of the tracked objects
94 * @e_off: offset to the &struct lc_element member in a tracked object
95 *
96 * Returns a pointer to a newly initialized struct lru_cache on success,
97 * or NULL on (allocation) failure.
98 */
99struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
100        unsigned max_pending_changes,
101        unsigned e_count, size_t e_size, size_t e_off)
102{
103    struct hlist_head *slot = NULL;
104    struct lc_element **element = NULL;
105    struct lru_cache *lc;
106    struct lc_element *e;
107    unsigned cache_obj_size = kmem_cache_size(cache);
108    unsigned i;
109
110    WARN_ON(cache_obj_size < e_size);
111    if (cache_obj_size < e_size)
112        return NULL;
113
114    /* e_count too big; would probably fail the allocation below anyways.
115     * for typical use cases, e_count should be few thousand at most. */
116    if (e_count > LC_MAX_ACTIVE)
117        return NULL;
118
119    slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
120    if (!slot)
121        goto out_fail;
122    element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
123    if (!element)
124        goto out_fail;
125
126    lc = kzalloc(sizeof(*lc), GFP_KERNEL);
127    if (!lc)
128        goto out_fail;
129
130    INIT_LIST_HEAD(&lc->in_use);
131    INIT_LIST_HEAD(&lc->lru);
132    INIT_LIST_HEAD(&lc->free);
133    INIT_LIST_HEAD(&lc->to_be_changed);
134
135    lc->name = name;
136    lc->element_size = e_size;
137    lc->element_off = e_off;
138    lc->nr_elements = e_count;
139    lc->max_pending_changes = max_pending_changes;
140    lc->lc_cache = cache;
141    lc->lc_element = element;
142    lc->lc_slot = slot;
143
144    /* preallocate all objects */
145    for (i = 0; i < e_count; i++) {
146        void *p = kmem_cache_alloc(cache, GFP_KERNEL);
147        if (!p)
148            break;
149        memset(p, 0, lc->element_size);
150        e = p + e_off;
151        e->lc_index = i;
152        e->lc_number = LC_FREE;
153        e->lc_new_number = LC_FREE;
154        list_add(&e->list, &lc->free);
155        element[i] = e;
156    }
157    if (i == e_count)
158        return lc;
159
160    /* else: could not allocate all elements, give up */
161    for (i--; i; i--) {
162        void *p = element[i];
163        kmem_cache_free(cache, p - e_off);
164    }
165    kfree(lc);
166out_fail:
167    kfree(element);
168    kfree(slot);
169    return NULL;
170}
171
172void lc_free_by_index(struct lru_cache *lc, unsigned i)
173{
174    void *p = lc->lc_element[i];
175    WARN_ON(!p);
176    if (p) {
177        p -= lc->element_off;
178        kmem_cache_free(lc->lc_cache, p);
179    }
180}
181
182/**
183 * lc_destroy - frees memory allocated by lc_create()
184 * @lc: the lru cache to destroy
185 */
186void lc_destroy(struct lru_cache *lc)
187{
188    unsigned i;
189    if (!lc)
190        return;
191    for (i = 0; i < lc->nr_elements; i++)
192        lc_free_by_index(lc, i);
193    kfree(lc->lc_element);
194    kfree(lc->lc_slot);
195    kfree(lc);
196}
197
198/**
199 * lc_reset - does a full reset for @lc and the hash table slots.
200 * @lc: the lru cache to operate on
201 *
202 * It is roughly the equivalent of re-allocating a fresh lru_cache object,
203 * basically a short cut to lc_destroy(lc); lc = lc_create(...);
204 */
205void lc_reset(struct lru_cache *lc)
206{
207    unsigned i;
208
209    INIT_LIST_HEAD(&lc->in_use);
210    INIT_LIST_HEAD(&lc->lru);
211    INIT_LIST_HEAD(&lc->free);
212    INIT_LIST_HEAD(&lc->to_be_changed);
213    lc->used = 0;
214    lc->hits = 0;
215    lc->misses = 0;
216    lc->starving = 0;
217    lc->locked = 0;
218    lc->changed = 0;
219    lc->pending_changes = 0;
220    lc->flags = 0;
221    memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
222
223    for (i = 0; i < lc->nr_elements; i++) {
224        struct lc_element *e = lc->lc_element[i];
225        void *p = e;
226        p -= lc->element_off;
227        memset(p, 0, lc->element_size);
228        /* re-init it */
229        e->lc_index = i;
230        e->lc_number = LC_FREE;
231        e->lc_new_number = LC_FREE;
232        list_add(&e->list, &lc->free);
233    }
234}
235
236/**
237 * lc_seq_printf_stats - print stats about @lc into @seq
238 * @seq: the seq_file to print into
239 * @lc: the lru cache to print statistics of
240 */
241size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
242{
243    /* NOTE:
244     * total calls to lc_get are
245     * (starving + hits + misses)
246     * misses include "locked" count (update from an other thread in
247     * progress) and "changed", when this in fact lead to an successful
248     * update of the cache.
249     */
250    return seq_printf(seq, "\t%s: used:%u/%u "
251        "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
252        lc->name, lc->used, lc->nr_elements,
253        lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
254}
255
256static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
257{
258    return lc->lc_slot + (enr % lc->nr_elements);
259}
260
261
262static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
263        bool include_changing)
264{
265    struct lc_element *e;
266
267    BUG_ON(!lc);
268    BUG_ON(!lc->nr_elements);
269    hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) {
270        /* "about to be changed" elements, pending transaction commit,
271         * are hashed by their "new number". "Normal" elements have
272         * lc_number == lc_new_number. */
273        if (e->lc_new_number != enr)
274            continue;
275        if (e->lc_new_number == e->lc_number || include_changing)
276            return e;
277        break;
278    }
279    return NULL;
280}
281
282/**
283 * lc_find - find element by label, if present in the hash table
284 * @lc: The lru_cache object
285 * @enr: element number
286 *
287 * Returns the pointer to an element, if the element with the requested
288 * "label" or element number is present in the hash table,
289 * or NULL if not found. Does not change the refcnt.
290 * Ignores elements that are "about to be used", i.e. not yet in the active
291 * set, but still pending transaction commit.
292 */
293struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
294{
295    return __lc_find(lc, enr, 0);
296}
297
298/**
299 * lc_is_used - find element by label
300 * @lc: The lru_cache object
301 * @enr: element number
302 *
303 * Returns true, if the element with the requested "label" or element number is
304 * present in the hash table, and is used (refcnt > 0).
305 * Also finds elements that are not _currently_ used but only "about to be
306 * used", i.e. on the "to_be_changed" list, pending transaction commit.
307 */
308bool lc_is_used(struct lru_cache *lc, unsigned int enr)
309{
310    struct lc_element *e = __lc_find(lc, enr, 1);
311    return e && e->refcnt;
312}
313
314/**
315 * lc_del - removes an element from the cache
316 * @lc: The lru_cache object
317 * @e: The element to remove
318 *
319 * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
320 * sets @e->enr to %LC_FREE.
321 */
322void lc_del(struct lru_cache *lc, struct lc_element *e)
323{
324    PARANOIA_ENTRY();
325    PARANOIA_LC_ELEMENT(lc, e);
326    BUG_ON(e->refcnt);
327
328    e->lc_number = e->lc_new_number = LC_FREE;
329    hlist_del_init(&e->colision);
330    list_move(&e->list, &lc->free);
331    RETURN();
332}
333
334static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
335{
336    struct list_head *n;
337    struct lc_element *e;
338
339    if (!list_empty(&lc->free))
340        n = lc->free.next;
341    else if (!list_empty(&lc->lru))
342        n = lc->lru.prev;
343    else
344        return NULL;
345
346    e = list_entry(n, struct lc_element, list);
347    PARANOIA_LC_ELEMENT(lc, e);
348
349    e->lc_new_number = new_number;
350    if (!hlist_unhashed(&e->colision))
351        __hlist_del(&e->colision);
352    hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
353    list_move(&e->list, &lc->to_be_changed);
354
355    return e;
356}
357
358static int lc_unused_element_available(struct lru_cache *lc)
359{
360    if (!list_empty(&lc->free))
361        return 1; /* something on the free list */
362    if (!list_empty(&lc->lru))
363        return 1; /* something to evict */
364
365    return 0;
366}
367
368/* used as internal flags to __lc_get */
369enum {
370    LC_GET_MAY_CHANGE = 1,
371    LC_GET_MAY_USE_UNCOMMITTED = 2,
372};
373
374static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags)
375{
376    struct lc_element *e;
377
378    PARANOIA_ENTRY();
379    if (lc->flags & LC_STARVING) {
380        ++lc->starving;
381        RETURN(NULL);
382    }
383
384    e = __lc_find(lc, enr, 1);
385    /* if lc_new_number != lc_number,
386     * this enr is currently being pulled in already,
387     * and will be available once the pending transaction
388     * has been committed. */
389    if (e) {
390        if (e->lc_new_number != e->lc_number) {
391            /* It has been found above, but on the "to_be_changed"
392             * list, not yet committed. Don't pull it in twice,
393             * wait for the transaction, then try again...
394             */
395            if (!(flags & LC_GET_MAY_USE_UNCOMMITTED))
396                RETURN(NULL);
397            /* ... unless the caller is aware of the implications,
398             * probably preparing a cumulative transaction. */
399            ++e->refcnt;
400            ++lc->hits;
401            RETURN(e);
402        }
403        /* else: lc_new_number == lc_number; a real hit. */
404        ++lc->hits;
405        if (e->refcnt++ == 0)
406            lc->used++;
407        list_move(&e->list, &lc->in_use); /* Not evictable... */
408        RETURN(e);
409    }
410    /* e == NULL */
411
412    ++lc->misses;
413    if (!(flags & LC_GET_MAY_CHANGE))
414        RETURN(NULL);
415
416    /* To avoid races with lc_try_lock(), first, mark us dirty
417     * (using test_and_set_bit, as it implies memory barriers), ... */
418    test_and_set_bit(__LC_DIRTY, &lc->flags);
419
420    /* ... only then check if it is locked anyways. If lc_unlock clears
421     * the dirty bit again, that's not a problem, we will come here again.
422     */
423    if (test_bit(__LC_LOCKED, &lc->flags)) {
424        ++lc->locked;
425        RETURN(NULL);
426    }
427
428    /* In case there is nothing available and we can not kick out
429     * the LRU element, we have to wait ...
430     */
431    if (!lc_unused_element_available(lc)) {
432        __set_bit(__LC_STARVING, &lc->flags);
433        RETURN(NULL);
434    }
435
436    /* It was not present in the active set. We are going to recycle an
437     * unused (or even "free") element, but we won't accumulate more than
438     * max_pending_changes changes. */
439    if (lc->pending_changes >= lc->max_pending_changes)
440        RETURN(NULL);
441
442    e = lc_prepare_for_change(lc, enr);
443    BUG_ON(!e);
444
445    clear_bit(__LC_STARVING, &lc->flags);
446    BUG_ON(++e->refcnt != 1);
447    lc->used++;
448    lc->pending_changes++;
449
450    RETURN(e);
451}
452
453/**
454 * lc_get - get element by label, maybe change the active set
455 * @lc: the lru cache to operate on
456 * @enr: the label to look up
457 *
458 * Finds an element in the cache, increases its usage count,
459 * "touches" and returns it.
460 *
461 * In case the requested number is not present, it needs to be added to the
462 * cache. Therefore it is possible that an other element becomes evicted from
463 * the cache. In either case, the user is notified so he is able to e.g. keep
464 * a persistent log of the cache changes, and therefore the objects in use.
465 *
466 * Return values:
467 * NULL
468 * The cache was marked %LC_STARVING,
469 * or the requested label was not in the active set
470 * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
471 * Or no unused or free element could be recycled (@lc will be marked as
472 * %LC_STARVING, blocking further lc_get() operations).
473 *
474 * pointer to the element with the REQUESTED element number.
475 * In this case, it can be used right away
476 *
477 * pointer to an UNUSED element with some different element number,
478 * where that different number may also be %LC_FREE.
479 *
480 * In this case, the cache is marked %LC_DIRTY,
481 * so lc_try_lock() will no longer succeed.
482 * The returned element pointer is moved to the "to_be_changed" list,
483 * and registered with the new element number on the hash collision chains,
484 * so it is possible to pick it up from lc_is_used().
485 * Up to "max_pending_changes" (see lc_create()) can be accumulated.
486 * The user now should do whatever housekeeping is necessary,
487 * typically serialize on lc_try_lock_for_transaction(), then call
488 * lc_committed(lc) and lc_unlock(), to finish the change.
489 *
490 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
491 * any cache set change.
492 */
493struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
494{
495    return __lc_get(lc, enr, LC_GET_MAY_CHANGE);
496}
497
498/**
499 * lc_get_cumulative - like lc_get; also finds to-be-changed elements
500 * @lc: the lru cache to operate on
501 * @enr: the label to look up
502 *
503 * Unlike lc_get this also returns the element for @enr, if it is belonging to
504 * a pending transaction, so the return values are like for lc_get(),
505 * plus:
506 *
507 * pointer to an element already on the "to_be_changed" list.
508 * In this case, the cache was already marked %LC_DIRTY.
509 *
510 * Caller needs to make sure that the pending transaction is completed,
511 * before proceeding to actually use this element.
512 */
513struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr)
514{
515    return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED);
516}
517
518/**
519 * lc_try_get - get element by label, if present; do not change the active set
520 * @lc: the lru cache to operate on
521 * @enr: the label to look up
522 *
523 * Finds an element in the cache, increases its usage count,
524 * "touches" and returns it.
525 *
526 * Return values:
527 * NULL
528 * The cache was marked %LC_STARVING,
529 * or the requested label was not in the active set
530 *
531 * pointer to the element with the REQUESTED element number.
532 * In this case, it can be used right away
533 */
534struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
535{
536    return __lc_get(lc, enr, 0);
537}
538
539/**
540 * lc_committed - tell @lc that pending changes have been recorded
541 * @lc: the lru cache to operate on
542 *
543 * User is expected to serialize on explicit lc_try_lock_for_transaction()
544 * before the transaction is started, and later needs to lc_unlock() explicitly
545 * as well.
546 */
547void lc_committed(struct lru_cache *lc)
548{
549    struct lc_element *e, *tmp;
550
551    PARANOIA_ENTRY();
552    list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
553        /* count number of changes, not number of transactions */
554        ++lc->changed;
555        e->lc_number = e->lc_new_number;
556        list_move(&e->list, &lc->in_use);
557    }
558    lc->pending_changes = 0;
559    RETURN();
560}
561
562
563/**
564 * lc_put - give up refcnt of @e
565 * @lc: the lru cache to operate on
566 * @e: the element to put
567 *
568 * If refcnt reaches zero, the element is moved to the lru list,
569 * and a %LC_STARVING (if set) is cleared.
570 * Returns the new (post-decrement) refcnt.
571 */
572unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
573{
574    PARANOIA_ENTRY();
575    PARANOIA_LC_ELEMENT(lc, e);
576    BUG_ON(e->refcnt == 0);
577    BUG_ON(e->lc_number != e->lc_new_number);
578    if (--e->refcnt == 0) {
579        /* move it to the front of LRU. */
580        list_move(&e->list, &lc->lru);
581        lc->used--;
582        clear_bit_unlock(__LC_STARVING, &lc->flags);
583    }
584    RETURN(e->refcnt);
585}
586
587/**
588 * lc_element_by_index
589 * @lc: the lru cache to operate on
590 * @i: the index of the element to return
591 */
592struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
593{
594    BUG_ON(i >= lc->nr_elements);
595    BUG_ON(lc->lc_element[i] == NULL);
596    BUG_ON(lc->lc_element[i]->lc_index != i);
597    return lc->lc_element[i];
598}
599
600/**
601 * lc_index_of
602 * @lc: the lru cache to operate on
603 * @e: the element to query for its index position in lc->element
604 */
605unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
606{
607    PARANOIA_LC_ELEMENT(lc, e);
608    return e->lc_index;
609}
610
611/**
612 * lc_set - associate index with label
613 * @lc: the lru cache to operate on
614 * @enr: the label to set
615 * @index: the element index to associate label with.
616 *
617 * Used to initialize the active set to some previously recorded state.
618 */
619void lc_set(struct lru_cache *lc, unsigned int enr, int index)
620{
621    struct lc_element *e;
622    struct list_head *lh;
623
624    if (index < 0 || index >= lc->nr_elements)
625        return;
626
627    e = lc_element_by_index(lc, index);
628    BUG_ON(e->lc_number != e->lc_new_number);
629    BUG_ON(e->refcnt != 0);
630
631    e->lc_number = e->lc_new_number = enr;
632    hlist_del_init(&e->colision);
633    if (enr == LC_FREE)
634        lh = &lc->free;
635    else {
636        hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
637        lh = &lc->lru;
638    }
639    list_move(&e->list, lh);
640}
641
642/**
643 * lc_dump - Dump a complete LRU cache to seq in textual form.
644 * @lc: the lru cache to operate on
645 * @seq: the &struct seq_file pointer to seq_printf into
646 * @utext: user supplied "heading" or other info
647 * @detail: function pointer the user may provide to dump further details
648 * of the object the lc_element is embedded in.
649 */
650void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
651         void (*detail) (struct seq_file *, struct lc_element *))
652{
653    unsigned int nr_elements = lc->nr_elements;
654    struct lc_element *e;
655    int i;
656
657    seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
658    for (i = 0; i < nr_elements; i++) {
659        e = lc_element_by_index(lc, i);
660        if (e->lc_number == LC_FREE) {
661            seq_printf(seq, "\t%2d: FREE\n", i);
662        } else {
663            seq_printf(seq, "\t%2d: %4u %4u ", i,
664                   e->lc_number, e->refcnt);
665            detail(seq, e);
666        }
667    }
668}
669
670EXPORT_SYMBOL(lc_create);
671EXPORT_SYMBOL(lc_reset);
672EXPORT_SYMBOL(lc_destroy);
673EXPORT_SYMBOL(lc_set);
674EXPORT_SYMBOL(lc_del);
675EXPORT_SYMBOL(lc_try_get);
676EXPORT_SYMBOL(lc_find);
677EXPORT_SYMBOL(lc_get);
678EXPORT_SYMBOL(lc_put);
679EXPORT_SYMBOL(lc_committed);
680EXPORT_SYMBOL(lc_element_by_index);
681EXPORT_SYMBOL(lc_index_of);
682EXPORT_SYMBOL(lc_seq_printf_stats);
683EXPORT_SYMBOL(lc_seq_dump_details);
684EXPORT_SYMBOL(lc_try_lock);
685EXPORT_SYMBOL(lc_is_used);
686EXPORT_SYMBOL(lc_get_cumulative);
687

Archive Download this file



interactive