Root/
1 | /* |
2 | lru_cache.c |
3 | |
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. |
5 | |
6 | Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. |
7 | Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. |
8 | Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. |
9 | |
10 | drbd is free software; you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License as published by |
12 | the Free Software Foundation; either version 2, or (at your option) |
13 | any later version. |
14 | |
15 | drbd is distributed in the hope that it will be useful, |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | GNU General Public License for more details. |
19 | |
20 | You should have received a copy of the GNU General Public License |
21 | along with drbd; see the file COPYING. If not, write to |
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | |
24 | */ |
25 | |
26 | #include <linux/module.h> |
27 | #include <linux/bitops.h> |
28 | #include <linux/slab.h> |
29 | #include <linux/string.h> /* for memset */ |
30 | #include <linux/seq_file.h> /* for seq_printf */ |
31 | #include <linux/lru_cache.h> |
32 | |
33 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " |
34 | "Lars Ellenberg <lars@linbit.com>"); |
35 | MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); |
36 | MODULE_LICENSE("GPL"); |
37 | |
38 | /* this is developers aid only. |
39 | * it catches concurrent access (lack of locking on the users part) */ |
40 | #define PARANOIA_ENTRY() do { \ |
41 | BUG_ON(!lc); \ |
42 | BUG_ON(!lc->nr_elements); \ |
43 | BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ |
44 | } while (0) |
45 | |
46 | #define RETURN(x...) do { \ |
47 | clear_bit(__LC_PARANOIA, &lc->flags); \ |
48 | smp_mb__after_clear_bit(); return x ; } while (0) |
49 | |
50 | /* BUG() if e is not one of the elements tracked by lc */ |
51 | #define PARANOIA_LC_ELEMENT(lc, e) do { \ |
52 | struct lru_cache *lc_ = (lc); \ |
53 | struct lc_element *e_ = (e); \ |
54 | unsigned i = e_->lc_index; \ |
55 | BUG_ON(i >= lc_->nr_elements); \ |
56 | BUG_ON(lc_->lc_element[i] != e_); } while (0) |
57 | |
58 | /** |
59 | * lc_create - prepares to track objects in an active set |
60 | * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details |
61 | * @e_count: number of elements allowed to be active simultaneously |
62 | * @e_size: size of the tracked objects |
63 | * @e_off: offset to the &struct lc_element member in a tracked object |
64 | * |
65 | * Returns a pointer to a newly initialized struct lru_cache on success, |
66 | * or NULL on (allocation) failure. |
67 | */ |
68 | struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, |
69 | unsigned e_count, size_t e_size, size_t e_off) |
70 | { |
71 | struct hlist_head *slot = NULL; |
72 | struct lc_element **element = NULL; |
73 | struct lru_cache *lc; |
74 | struct lc_element *e; |
75 | unsigned cache_obj_size = kmem_cache_size(cache); |
76 | unsigned i; |
77 | |
78 | WARN_ON(cache_obj_size < e_size); |
79 | if (cache_obj_size < e_size) |
80 | return NULL; |
81 | |
82 | /* e_count too big; would probably fail the allocation below anyways. |
83 | * for typical use cases, e_count should be few thousand at most. */ |
84 | if (e_count > LC_MAX_ACTIVE) |
85 | return NULL; |
86 | |
87 | slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL); |
88 | if (!slot) |
89 | goto out_fail; |
90 | element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); |
91 | if (!element) |
92 | goto out_fail; |
93 | |
94 | lc = kzalloc(sizeof(*lc), GFP_KERNEL); |
95 | if (!lc) |
96 | goto out_fail; |
97 | |
98 | INIT_LIST_HEAD(&lc->in_use); |
99 | INIT_LIST_HEAD(&lc->lru); |
100 | INIT_LIST_HEAD(&lc->free); |
101 | |
102 | lc->name = name; |
103 | lc->element_size = e_size; |
104 | lc->element_off = e_off; |
105 | lc->nr_elements = e_count; |
106 | lc->new_number = LC_FREE; |
107 | lc->lc_cache = cache; |
108 | lc->lc_element = element; |
109 | lc->lc_slot = slot; |
110 | |
111 | /* preallocate all objects */ |
112 | for (i = 0; i < e_count; i++) { |
113 | void *p = kmem_cache_alloc(cache, GFP_KERNEL); |
114 | if (!p) |
115 | break; |
116 | memset(p, 0, lc->element_size); |
117 | e = p + e_off; |
118 | e->lc_index = i; |
119 | e->lc_number = LC_FREE; |
120 | list_add(&e->list, &lc->free); |
121 | element[i] = e; |
122 | } |
123 | if (i == e_count) |
124 | return lc; |
125 | |
126 | /* else: could not allocate all elements, give up */ |
127 | for (i--; i; i--) { |
128 | void *p = element[i]; |
129 | kmem_cache_free(cache, p - e_off); |
130 | } |
131 | kfree(lc); |
132 | out_fail: |
133 | kfree(element); |
134 | kfree(slot); |
135 | return NULL; |
136 | } |
137 | |
138 | void lc_free_by_index(struct lru_cache *lc, unsigned i) |
139 | { |
140 | void *p = lc->lc_element[i]; |
141 | WARN_ON(!p); |
142 | if (p) { |
143 | p -= lc->element_off; |
144 | kmem_cache_free(lc->lc_cache, p); |
145 | } |
146 | } |
147 | |
148 | /** |
149 | * lc_destroy - frees memory allocated by lc_create() |
150 | * @lc: the lru cache to destroy |
151 | */ |
152 | void lc_destroy(struct lru_cache *lc) |
153 | { |
154 | unsigned i; |
155 | if (!lc) |
156 | return; |
157 | for (i = 0; i < lc->nr_elements; i++) |
158 | lc_free_by_index(lc, i); |
159 | kfree(lc->lc_element); |
160 | kfree(lc->lc_slot); |
161 | kfree(lc); |
162 | } |
163 | |
164 | /** |
165 | * lc_reset - does a full reset for @lc and the hash table slots. |
166 | * @lc: the lru cache to operate on |
167 | * |
168 | * It is roughly the equivalent of re-allocating a fresh lru_cache object, |
169 | * basically a short cut to lc_destroy(lc); lc = lc_create(...); |
170 | */ |
171 | void lc_reset(struct lru_cache *lc) |
172 | { |
173 | unsigned i; |
174 | |
175 | INIT_LIST_HEAD(&lc->in_use); |
176 | INIT_LIST_HEAD(&lc->lru); |
177 | INIT_LIST_HEAD(&lc->free); |
178 | lc->used = 0; |
179 | lc->hits = 0; |
180 | lc->misses = 0; |
181 | lc->starving = 0; |
182 | lc->dirty = 0; |
183 | lc->changed = 0; |
184 | lc->flags = 0; |
185 | lc->changing_element = NULL; |
186 | lc->new_number = LC_FREE; |
187 | memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); |
188 | |
189 | for (i = 0; i < lc->nr_elements; i++) { |
190 | struct lc_element *e = lc->lc_element[i]; |
191 | void *p = e; |
192 | p -= lc->element_off; |
193 | memset(p, 0, lc->element_size); |
194 | /* re-init it */ |
195 | e->lc_index = i; |
196 | e->lc_number = LC_FREE; |
197 | list_add(&e->list, &lc->free); |
198 | } |
199 | } |
200 | |
201 | /** |
202 | * lc_seq_printf_stats - print stats about @lc into @seq |
203 | * @seq: the seq_file to print into |
204 | * @lc: the lru cache to print statistics of |
205 | */ |
206 | size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) |
207 | { |
208 | /* NOTE: |
209 | * total calls to lc_get are |
210 | * (starving + hits + misses) |
211 | * misses include "dirty" count (update from an other thread in |
212 | * progress) and "changed", when this in fact lead to an successful |
213 | * update of the cache. |
214 | */ |
215 | return seq_printf(seq, "\t%s: used:%u/%u " |
216 | "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", |
217 | lc->name, lc->used, lc->nr_elements, |
218 | lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); |
219 | } |
220 | |
221 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) |
222 | { |
223 | return lc->lc_slot + (enr % lc->nr_elements); |
224 | } |
225 | |
226 | |
227 | /** |
228 | * lc_find - find element by label, if present in the hash table |
229 | * @lc: The lru_cache object |
230 | * @enr: element number |
231 | * |
232 | * Returns the pointer to an element, if the element with the requested |
233 | * "label" or element number is present in the hash table, |
234 | * or NULL if not found. Does not change the refcnt. |
235 | */ |
236 | struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) |
237 | { |
238 | struct hlist_node *n; |
239 | struct lc_element *e; |
240 | |
241 | BUG_ON(!lc); |
242 | BUG_ON(!lc->nr_elements); |
243 | hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { |
244 | if (e->lc_number == enr) |
245 | return e; |
246 | } |
247 | return NULL; |
248 | } |
249 | |
250 | /* returned element will be "recycled" immediately */ |
251 | static struct lc_element *lc_evict(struct lru_cache *lc) |
252 | { |
253 | struct list_head *n; |
254 | struct lc_element *e; |
255 | |
256 | if (list_empty(&lc->lru)) |
257 | return NULL; |
258 | |
259 | n = lc->lru.prev; |
260 | e = list_entry(n, struct lc_element, list); |
261 | |
262 | PARANOIA_LC_ELEMENT(lc, e); |
263 | |
264 | list_del(&e->list); |
265 | hlist_del(&e->colision); |
266 | return e; |
267 | } |
268 | |
269 | /** |
270 | * lc_del - removes an element from the cache |
271 | * @lc: The lru_cache object |
272 | * @e: The element to remove |
273 | * |
274 | * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, |
275 | * sets @e->enr to %LC_FREE. |
276 | */ |
277 | void lc_del(struct lru_cache *lc, struct lc_element *e) |
278 | { |
279 | PARANOIA_ENTRY(); |
280 | PARANOIA_LC_ELEMENT(lc, e); |
281 | BUG_ON(e->refcnt); |
282 | |
283 | e->lc_number = LC_FREE; |
284 | hlist_del_init(&e->colision); |
285 | list_move(&e->list, &lc->free); |
286 | RETURN(); |
287 | } |
288 | |
289 | static struct lc_element *lc_get_unused_element(struct lru_cache *lc) |
290 | { |
291 | struct list_head *n; |
292 | |
293 | if (list_empty(&lc->free)) |
294 | return lc_evict(lc); |
295 | |
296 | n = lc->free.next; |
297 | list_del(n); |
298 | return list_entry(n, struct lc_element, list); |
299 | } |
300 | |
301 | static int lc_unused_element_available(struct lru_cache *lc) |
302 | { |
303 | if (!list_empty(&lc->free)) |
304 | return 1; /* something on the free list */ |
305 | if (!list_empty(&lc->lru)) |
306 | return 1; /* something to evict */ |
307 | |
308 | return 0; |
309 | } |
310 | |
311 | |
312 | /** |
313 | * lc_get - get element by label, maybe change the active set |
314 | * @lc: the lru cache to operate on |
315 | * @enr: the label to look up |
316 | * |
317 | * Finds an element in the cache, increases its usage count, |
318 | * "touches" and returns it. |
319 | * |
320 | * In case the requested number is not present, it needs to be added to the |
321 | * cache. Therefore it is possible that an other element becomes evicted from |
322 | * the cache. In either case, the user is notified so he is able to e.g. keep |
323 | * a persistent log of the cache changes, and therefore the objects in use. |
324 | * |
325 | * Return values: |
326 | * NULL |
327 | * The cache was marked %LC_STARVING, |
328 | * or the requested label was not in the active set |
329 | * and a changing transaction is still pending (@lc was marked %LC_DIRTY). |
330 | * Or no unused or free element could be recycled (@lc will be marked as |
331 | * %LC_STARVING, blocking further lc_get() operations). |
332 | * |
333 | * pointer to the element with the REQUESTED element number. |
334 | * In this case, it can be used right away |
335 | * |
336 | * pointer to an UNUSED element with some different element number, |
337 | * where that different number may also be %LC_FREE. |
338 | * |
339 | * In this case, the cache is marked %LC_DIRTY (blocking further changes), |
340 | * and the returned element pointer is removed from the lru list and |
341 | * hash collision chains. The user now should do whatever housekeeping |
342 | * is necessary. |
343 | * Then he must call lc_changed(lc,element_pointer), to finish |
344 | * the change. |
345 | * |
346 | * NOTE: The user needs to check the lc_number on EACH use, so he recognizes |
347 | * any cache set change. |
348 | */ |
349 | struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) |
350 | { |
351 | struct lc_element *e; |
352 | |
353 | PARANOIA_ENTRY(); |
354 | if (lc->flags & LC_STARVING) { |
355 | ++lc->starving; |
356 | RETURN(NULL); |
357 | } |
358 | |
359 | e = lc_find(lc, enr); |
360 | if (e) { |
361 | ++lc->hits; |
362 | if (e->refcnt++ == 0) |
363 | lc->used++; |
364 | list_move(&e->list, &lc->in_use); /* Not evictable... */ |
365 | RETURN(e); |
366 | } |
367 | |
368 | ++lc->misses; |
369 | |
370 | /* In case there is nothing available and we can not kick out |
371 | * the LRU element, we have to wait ... |
372 | */ |
373 | if (!lc_unused_element_available(lc)) { |
374 | __set_bit(__LC_STARVING, &lc->flags); |
375 | RETURN(NULL); |
376 | } |
377 | |
378 | /* it was not present in the active set. |
379 | * we are going to recycle an unused (or even "free") element. |
380 | * user may need to commit a transaction to record that change. |
381 | * we serialize on flags & TF_DIRTY */ |
382 | if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { |
383 | ++lc->dirty; |
384 | RETURN(NULL); |
385 | } |
386 | |
387 | e = lc_get_unused_element(lc); |
388 | BUG_ON(!e); |
389 | |
390 | clear_bit(__LC_STARVING, &lc->flags); |
391 | BUG_ON(++e->refcnt != 1); |
392 | lc->used++; |
393 | |
394 | lc->changing_element = e; |
395 | lc->new_number = enr; |
396 | |
397 | RETURN(e); |
398 | } |
399 | |
400 | /* similar to lc_get, |
401 | * but only gets a new reference on an existing element. |
402 | * you either get the requested element, or NULL. |
403 | * will be consolidated into one function. |
404 | */ |
405 | struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) |
406 | { |
407 | struct lc_element *e; |
408 | |
409 | PARANOIA_ENTRY(); |
410 | if (lc->flags & LC_STARVING) { |
411 | ++lc->starving; |
412 | RETURN(NULL); |
413 | } |
414 | |
415 | e = lc_find(lc, enr); |
416 | if (e) { |
417 | ++lc->hits; |
418 | if (e->refcnt++ == 0) |
419 | lc->used++; |
420 | list_move(&e->list, &lc->in_use); /* Not evictable... */ |
421 | } |
422 | RETURN(e); |
423 | } |
424 | |
425 | /** |
426 | * lc_changed - tell @lc that the change has been recorded |
427 | * @lc: the lru cache to operate on |
428 | * @e: the element pending label change |
429 | */ |
430 | void lc_changed(struct lru_cache *lc, struct lc_element *e) |
431 | { |
432 | PARANOIA_ENTRY(); |
433 | BUG_ON(e != lc->changing_element); |
434 | PARANOIA_LC_ELEMENT(lc, e); |
435 | ++lc->changed; |
436 | e->lc_number = lc->new_number; |
437 | list_add(&e->list, &lc->in_use); |
438 | hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); |
439 | lc->changing_element = NULL; |
440 | lc->new_number = LC_FREE; |
441 | clear_bit(__LC_DIRTY, &lc->flags); |
442 | smp_mb__after_clear_bit(); |
443 | RETURN(); |
444 | } |
445 | |
446 | |
447 | /** |
448 | * lc_put - give up refcnt of @e |
449 | * @lc: the lru cache to operate on |
450 | * @e: the element to put |
451 | * |
452 | * If refcnt reaches zero, the element is moved to the lru list, |
453 | * and a %LC_STARVING (if set) is cleared. |
454 | * Returns the new (post-decrement) refcnt. |
455 | */ |
456 | unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) |
457 | { |
458 | PARANOIA_ENTRY(); |
459 | PARANOIA_LC_ELEMENT(lc, e); |
460 | BUG_ON(e->refcnt == 0); |
461 | BUG_ON(e == lc->changing_element); |
462 | if (--e->refcnt == 0) { |
463 | /* move it to the front of LRU. */ |
464 | list_move(&e->list, &lc->lru); |
465 | lc->used--; |
466 | clear_bit(__LC_STARVING, &lc->flags); |
467 | smp_mb__after_clear_bit(); |
468 | } |
469 | RETURN(e->refcnt); |
470 | } |
471 | |
472 | /** |
473 | * lc_element_by_index |
474 | * @lc: the lru cache to operate on |
475 | * @i: the index of the element to return |
476 | */ |
477 | struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) |
478 | { |
479 | BUG_ON(i >= lc->nr_elements); |
480 | BUG_ON(lc->lc_element[i] == NULL); |
481 | BUG_ON(lc->lc_element[i]->lc_index != i); |
482 | return lc->lc_element[i]; |
483 | } |
484 | |
485 | /** |
486 | * lc_index_of |
487 | * @lc: the lru cache to operate on |
488 | * @e: the element to query for its index position in lc->element |
489 | */ |
490 | unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) |
491 | { |
492 | PARANOIA_LC_ELEMENT(lc, e); |
493 | return e->lc_index; |
494 | } |
495 | |
496 | /** |
497 | * lc_set - associate index with label |
498 | * @lc: the lru cache to operate on |
499 | * @enr: the label to set |
500 | * @index: the element index to associate label with. |
501 | * |
502 | * Used to initialize the active set to some previously recorded state. |
503 | */ |
504 | void lc_set(struct lru_cache *lc, unsigned int enr, int index) |
505 | { |
506 | struct lc_element *e; |
507 | |
508 | if (index < 0 || index >= lc->nr_elements) |
509 | return; |
510 | |
511 | e = lc_element_by_index(lc, index); |
512 | e->lc_number = enr; |
513 | |
514 | hlist_del_init(&e->colision); |
515 | hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); |
516 | list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); |
517 | } |
518 | |
519 | /** |
520 | * lc_dump - Dump a complete LRU cache to seq in textual form. |
521 | * @lc: the lru cache to operate on |
522 | * @seq: the &struct seq_file pointer to seq_printf into |
523 | * @utext: user supplied "heading" or other info |
524 | * @detail: function pointer the user may provide to dump further details |
525 | * of the object the lc_element is embedded in. |
526 | */ |
527 | void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, |
528 | void (*detail) (struct seq_file *, struct lc_element *)) |
529 | { |
530 | unsigned int nr_elements = lc->nr_elements; |
531 | struct lc_element *e; |
532 | int i; |
533 | |
534 | seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); |
535 | for (i = 0; i < nr_elements; i++) { |
536 | e = lc_element_by_index(lc, i); |
537 | if (e->lc_number == LC_FREE) { |
538 | seq_printf(seq, "\t%2d: FREE\n", i); |
539 | } else { |
540 | seq_printf(seq, "\t%2d: %4u %4u ", i, |
541 | e->lc_number, e->refcnt); |
542 | detail(seq, e); |
543 | } |
544 | } |
545 | } |
546 | |
547 | EXPORT_SYMBOL(lc_create); |
548 | EXPORT_SYMBOL(lc_reset); |
549 | EXPORT_SYMBOL(lc_destroy); |
550 | EXPORT_SYMBOL(lc_set); |
551 | EXPORT_SYMBOL(lc_del); |
552 | EXPORT_SYMBOL(lc_try_get); |
553 | EXPORT_SYMBOL(lc_find); |
554 | EXPORT_SYMBOL(lc_get); |
555 | EXPORT_SYMBOL(lc_put); |
556 | EXPORT_SYMBOL(lc_changed); |
557 | EXPORT_SYMBOL(lc_element_by_index); |
558 | EXPORT_SYMBOL(lc_index_of); |
559 | EXPORT_SYMBOL(lc_seq_printf_stats); |
560 | EXPORT_SYMBOL(lc_seq_dump_details); |
561 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9