Root/drivers/staging/zcache/tmem.c

1/*
2 * In-kernel transcendent memory (generic implementation)
3 *
4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5 *
6 * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
7 * "handles" (triples containing a pool id, and object id, and an index), to
8 * pages in a page-accessible memory (PAM). Tmem references the PAM pages via
9 * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
10 * set of functions (pamops). Each pampd contains some representation of
11 * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
12 * pages and must be able to insert, find, and delete these pages at a
13 * potential frequency of thousands per second concurrently across many CPUs,
14 * (and, if used with KVM, across many vcpus across many guests).
15 * Tmem is tracked with a hierarchy of data structures, organized by
16 * the elements in a handle-tuple: pool_id, object_id, and page index.
17 * One or more "clients" (e.g. guests) each provide one or more tmem_pools.
18 * Each pool, contains a hash table of rb_trees of tmem_objs. Each
19 * tmem_obj contains a radix-tree-like tree of pointers, with intermediate
20 * nodes called tmem_objnodes. Each leaf pointer in this tree points to
21 * a pampd, which is accessible only through a small set of callbacks
22 * registered by the PAM implementation (see tmem_register_pamops). Tmem
23 * does all memory allocation via a set of callbacks registered by the tmem
24 * host implementation (e.g. see tmem_register_hostops).
25 */
26
27#include <linux/list.h>
28#include <linux/spinlock.h>
29#include <linux/atomic.h>
30
31#include "tmem.h"
32
33/* data structure sentinels used for debugging... see tmem.h */
34#define POOL_SENTINEL 0x87658765
35#define OBJ_SENTINEL 0x12345678
36#define OBJNODE_SENTINEL 0xfedcba09
37
38/*
39 * A tmem host implementation must use this function to register callbacks
40 * for memory allocation.
41 */
42static struct tmem_hostops tmem_hostops;
43
44static void tmem_objnode_tree_init(void);
45
46void tmem_register_hostops(struct tmem_hostops *m)
47{
48    tmem_objnode_tree_init();
49    tmem_hostops = *m;
50}
51
52/*
53 * A tmem host implementation must use this function to register
54 * callbacks for a page-accessible memory (PAM) implementation
55 */
56static struct tmem_pamops tmem_pamops;
57
58void tmem_register_pamops(struct tmem_pamops *m)
59{
60    tmem_pamops = *m;
61}
62
63/*
64 * Oid's are potentially very sparse and tmem_objs may have an indeterminately
65 * short life, being added and deleted at a relatively high frequency.
66 * So an rb_tree is an ideal data structure to manage tmem_objs. But because
67 * of the potentially huge number of tmem_objs, each pool manages a hashtable
68 * of rb_trees to reduce search, insert, delete, and rebalancing time.
69 * Each hashbucket also has a lock to manage concurrent access.
70 *
71 * The following routines manage tmem_objs. When any tmem_obj is accessed,
72 * the hashbucket lock must be held.
73 */
74
75static struct tmem_obj
76*__tmem_obj_find(struct tmem_hashbucket*hb, struct tmem_oid *oidp,
77         struct rb_node **parent, struct rb_node ***link)
78{
79    struct rb_node *_parent = NULL, **rbnode;
80    struct tmem_obj *obj = NULL;
81
82    rbnode = &hb->obj_rb_root.rb_node;
83    while (*rbnode) {
84        BUG_ON(RB_EMPTY_NODE(*rbnode));
85        _parent = *rbnode;
86        obj = rb_entry(*rbnode, struct tmem_obj,
87                   rb_tree_node);
88        switch (tmem_oid_compare(oidp, &obj->oid)) {
89        case 0: /* equal */
90            goto out;
91        case -1:
92            rbnode = &(*rbnode)->rb_left;
93            break;
94        case 1:
95            rbnode = &(*rbnode)->rb_right;
96            break;
97        }
98    }
99
100    if (parent)
101        *parent = _parent;
102    if (link)
103        *link = rbnode;
104
105    obj = NULL;
106out:
107    return obj;
108}
109
110
111/* searches for object==oid in pool, returns locked object if found */
112static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
113                    struct tmem_oid *oidp)
114{
115    return __tmem_obj_find(hb, oidp, NULL, NULL);
116}
117
118static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
119
120/* free an object that has no more pampds in it */
121static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
122{
123    struct tmem_pool *pool;
124
125    BUG_ON(obj == NULL);
126    ASSERT_SENTINEL(obj, OBJ);
127    BUG_ON(obj->pampd_count > 0);
128    pool = obj->pool;
129    BUG_ON(pool == NULL);
130    if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
131        tmem_pampd_destroy_all_in_obj(obj);
132    BUG_ON(obj->objnode_tree_root != NULL);
133    BUG_ON((long)obj->objnode_count != 0);
134    atomic_dec(&pool->obj_count);
135    BUG_ON(atomic_read(&pool->obj_count) < 0);
136    INVERT_SENTINEL(obj, OBJ);
137    obj->pool = NULL;
138    tmem_oid_set_invalid(&obj->oid);
139    rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
140}
141
142/*
143 * initialize, and insert an tmem_object_root (called only if find failed)
144 */
145static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
146                    struct tmem_pool *pool,
147                    struct tmem_oid *oidp)
148{
149    struct rb_root *root = &hb->obj_rb_root;
150    struct rb_node **new = NULL, *parent = NULL;
151
152    BUG_ON(pool == NULL);
153    atomic_inc(&pool->obj_count);
154    obj->objnode_tree_height = 0;
155    obj->objnode_tree_root = NULL;
156    obj->pool = pool;
157    obj->oid = *oidp;
158    obj->objnode_count = 0;
159    obj->pampd_count = 0;
160    (*tmem_pamops.new_obj)(obj);
161    SET_SENTINEL(obj, OBJ);
162
163    if (__tmem_obj_find(hb, oidp, &parent, &new))
164        BUG();
165
166    rb_link_node(&obj->rb_tree_node, parent, new);
167    rb_insert_color(&obj->rb_tree_node, root);
168}
169
170/*
171 * Tmem is managed as a set of tmem_pools with certain attributes, such as
172 * "ephemeral" vs "persistent". These attributes apply to all tmem_objs
173 * and all pampds that belong to a tmem_pool. A tmem_pool is created
174 * or deleted relatively rarely (for example, when a filesystem is
175 * mounted or unmounted.
176 */
177
178/* flush all data from a pool and, optionally, free it */
179static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
180{
181    struct rb_node *rbnode;
182    struct tmem_obj *obj;
183    struct tmem_hashbucket *hb = &pool->hashbucket[0];
184    int i;
185
186    BUG_ON(pool == NULL);
187    for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
188        spin_lock(&hb->lock);
189        rbnode = rb_first(&hb->obj_rb_root);
190        while (rbnode != NULL) {
191            obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
192            rbnode = rb_next(rbnode);
193            tmem_pampd_destroy_all_in_obj(obj);
194            tmem_obj_free(obj, hb);
195            (*tmem_hostops.obj_free)(obj, pool);
196        }
197        spin_unlock(&hb->lock);
198    }
199    if (destroy)
200        list_del(&pool->pool_list);
201}
202
203/*
204 * A tmem_obj contains a radix-tree-like tree in which the intermediate
205 * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
206 * is very specialized and tuned for specific uses and is not particularly
207 * suited for use from this code, though some code from the core algorithms has
208 * been reused, thus the copyright notices below). Each tmem_objnode contains
209 * a set of pointers which point to either a set of intermediate tmem_objnodes
210 * or a set of of pampds.
211 *
212 * Portions Copyright (C) 2001 Momchil Velikov
213 * Portions Copyright (C) 2001 Christoph Hellwig
214 * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
215 */
216
217struct tmem_objnode_tree_path {
218    struct tmem_objnode *objnode;
219    int offset;
220};
221
222/* objnode height_to_maxindex translation */
223static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
224
225static void tmem_objnode_tree_init(void)
226{
227    unsigned int ht, tmp;
228
229    for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
230        tmp = ht * OBJNODE_TREE_MAP_SHIFT;
231        if (tmp >= OBJNODE_TREE_INDEX_BITS)
232            tmem_objnode_tree_h2max[ht] = ~0UL;
233        else
234            tmem_objnode_tree_h2max[ht] =
235                (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
236    }
237}
238
239static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
240{
241    struct tmem_objnode *objnode;
242
243    ASSERT_SENTINEL(obj, OBJ);
244    BUG_ON(obj->pool == NULL);
245    ASSERT_SENTINEL(obj->pool, POOL);
246    objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
247    if (unlikely(objnode == NULL))
248        goto out;
249    objnode->obj = obj;
250    SET_SENTINEL(objnode, OBJNODE);
251    memset(&objnode->slots, 0, sizeof(objnode->slots));
252    objnode->slots_in_use = 0;
253    obj->objnode_count++;
254out:
255    return objnode;
256}
257
258static void tmem_objnode_free(struct tmem_objnode *objnode)
259{
260    struct tmem_pool *pool;
261    int i;
262
263    BUG_ON(objnode == NULL);
264    for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
265        BUG_ON(objnode->slots[i] != NULL);
266    ASSERT_SENTINEL(objnode, OBJNODE);
267    INVERT_SENTINEL(objnode, OBJNODE);
268    BUG_ON(objnode->obj == NULL);
269    ASSERT_SENTINEL(objnode->obj, OBJ);
270    pool = objnode->obj->pool;
271    BUG_ON(pool == NULL);
272    ASSERT_SENTINEL(pool, POOL);
273    objnode->obj->objnode_count--;
274    objnode->obj = NULL;
275    (*tmem_hostops.objnode_free)(objnode, pool);
276}
277
278/*
279 * lookup index in object and return associated pampd (or NULL if not found)
280 */
281static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
282{
283    unsigned int height, shift;
284    struct tmem_objnode **slot = NULL;
285
286    BUG_ON(obj == NULL);
287    ASSERT_SENTINEL(obj, OBJ);
288    BUG_ON(obj->pool == NULL);
289    ASSERT_SENTINEL(obj->pool, POOL);
290
291    height = obj->objnode_tree_height;
292    if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
293        goto out;
294    if (height == 0 && obj->objnode_tree_root) {
295        slot = &obj->objnode_tree_root;
296        goto out;
297    }
298    shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
299    slot = &obj->objnode_tree_root;
300    while (height > 0) {
301        if (*slot == NULL)
302            goto out;
303        slot = (struct tmem_objnode **)
304            ((*slot)->slots +
305             ((index >> shift) & OBJNODE_TREE_MAP_MASK));
306        shift -= OBJNODE_TREE_MAP_SHIFT;
307        height--;
308    }
309out:
310    return slot != NULL ? (void **)slot : NULL;
311}
312
313static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
314{
315    struct tmem_objnode **slot;
316
317    slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
318    return slot != NULL ? *slot : NULL;
319}
320
321static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
322                    void *new_pampd)
323{
324    struct tmem_objnode **slot;
325    void *ret = NULL;
326
327    slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
328    if ((slot != NULL) && (*slot != NULL)) {
329        void *old_pampd = *(void **)slot;
330        *(void **)slot = new_pampd;
331        (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
332        ret = new_pampd;
333    }
334    return ret;
335}
336
337static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
338                    void *pampd)
339{
340    int ret = 0;
341    struct tmem_objnode *objnode = NULL, *newnode, *slot;
342    unsigned int height, shift;
343    int offset = 0;
344
345    /* if necessary, extend the tree to be higher */
346    if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
347        height = obj->objnode_tree_height + 1;
348        if (index > tmem_objnode_tree_h2max[height])
349            while (index > tmem_objnode_tree_h2max[height])
350                height++;
351        if (obj->objnode_tree_root == NULL) {
352            obj->objnode_tree_height = height;
353            goto insert;
354        }
355        do {
356            newnode = tmem_objnode_alloc(obj);
357            if (!newnode) {
358                ret = -ENOMEM;
359                goto out;
360            }
361            newnode->slots[0] = obj->objnode_tree_root;
362            newnode->slots_in_use = 1;
363            obj->objnode_tree_root = newnode;
364            obj->objnode_tree_height++;
365        } while (height > obj->objnode_tree_height);
366    }
367insert:
368    slot = obj->objnode_tree_root;
369    height = obj->objnode_tree_height;
370    shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
371    while (height > 0) {
372        if (slot == NULL) {
373            /* add a child objnode. */
374            slot = tmem_objnode_alloc(obj);
375            if (!slot) {
376                ret = -ENOMEM;
377                goto out;
378            }
379            if (objnode) {
380
381                objnode->slots[offset] = slot;
382                objnode->slots_in_use++;
383            } else
384                obj->objnode_tree_root = slot;
385        }
386        /* go down a level */
387        offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
388        objnode = slot;
389        slot = objnode->slots[offset];
390        shift -= OBJNODE_TREE_MAP_SHIFT;
391        height--;
392    }
393    BUG_ON(slot != NULL);
394    if (objnode) {
395        objnode->slots_in_use++;
396        objnode->slots[offset] = pampd;
397    } else
398        obj->objnode_tree_root = pampd;
399    obj->pampd_count++;
400out:
401    return ret;
402}
403
404static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
405{
406    struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
407    struct tmem_objnode_tree_path *pathp = path;
408    struct tmem_objnode *slot = NULL;
409    unsigned int height, shift;
410    int offset;
411
412    BUG_ON(obj == NULL);
413    ASSERT_SENTINEL(obj, OBJ);
414    BUG_ON(obj->pool == NULL);
415    ASSERT_SENTINEL(obj->pool, POOL);
416    height = obj->objnode_tree_height;
417    if (index > tmem_objnode_tree_h2max[height])
418        goto out;
419    slot = obj->objnode_tree_root;
420    if (height == 0 && obj->objnode_tree_root) {
421        obj->objnode_tree_root = NULL;
422        goto out;
423    }
424    shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
425    pathp->objnode = NULL;
426    do {
427        if (slot == NULL)
428            goto out;
429        pathp++;
430        offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
431        pathp->offset = offset;
432        pathp->objnode = slot;
433        slot = slot->slots[offset];
434        shift -= OBJNODE_TREE_MAP_SHIFT;
435        height--;
436    } while (height > 0);
437    if (slot == NULL)
438        goto out;
439    while (pathp->objnode) {
440        pathp->objnode->slots[pathp->offset] = NULL;
441        pathp->objnode->slots_in_use--;
442        if (pathp->objnode->slots_in_use) {
443            if (pathp->objnode == obj->objnode_tree_root) {
444                while (obj->objnode_tree_height > 0 &&
445                  obj->objnode_tree_root->slots_in_use == 1 &&
446                  obj->objnode_tree_root->slots[0]) {
447                    struct tmem_objnode *to_free =
448                        obj->objnode_tree_root;
449
450                    obj->objnode_tree_root =
451                            to_free->slots[0];
452                    obj->objnode_tree_height--;
453                    to_free->slots[0] = NULL;
454                    to_free->slots_in_use = 0;
455                    tmem_objnode_free(to_free);
456                }
457            }
458            goto out;
459        }
460        tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
461        pathp--;
462    }
463    obj->objnode_tree_height = 0;
464    obj->objnode_tree_root = NULL;
465
466out:
467    if (slot != NULL)
468        obj->pampd_count--;
469    BUG_ON(obj->pampd_count < 0);
470    return slot;
471}
472
473/* recursively walk the objnode_tree destroying pampds and objnodes */
474static void tmem_objnode_node_destroy(struct tmem_obj *obj,
475                    struct tmem_objnode *objnode,
476                    unsigned int ht)
477{
478    int i;
479
480    if (ht == 0)
481        return;
482    for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
483        if (objnode->slots[i]) {
484            if (ht == 1) {
485                obj->pampd_count--;
486                (*tmem_pamops.free)(objnode->slots[i],
487                        obj->pool, NULL, 0);
488                objnode->slots[i] = NULL;
489                continue;
490            }
491            tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
492            tmem_objnode_free(objnode->slots[i]);
493            objnode->slots[i] = NULL;
494        }
495    }
496}
497
498static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
499{
500    if (obj->objnode_tree_root == NULL)
501        return;
502    if (obj->objnode_tree_height == 0) {
503        obj->pampd_count--;
504        (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
505    } else {
506        tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
507                    obj->objnode_tree_height);
508        tmem_objnode_free(obj->objnode_tree_root);
509        obj->objnode_tree_height = 0;
510    }
511    obj->objnode_tree_root = NULL;
512    (*tmem_pamops.free_obj)(obj->pool, obj);
513}
514
515/*
516 * Tmem is operated on by a set of well-defined actions:
517 * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
518 * (The tmem ABI allows for subpages and exchanges but these operations
519 * are not included in this implementation.)
520 *
521 * These "tmem core" operations are implemented in the following functions.
522 */
523
524/*
525 * "Put" a page, e.g. copy a page from the kernel into newly allocated
526 * PAM space (if such space is available). Tmem_put is complicated by
527 * a corner case: What if a page with matching handle already exists in
528 * tmem? To guarantee coherency, one of two actions is necessary: Either
529 * the data for the page must be overwritten, or the page must be
530 * "flushed" so that the data is not accessible to a subsequent "get".
531 * Since these "duplicate puts" are relatively rare, this implementation
532 * always flushes for simplicity.
533 */
534int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
535        char *data, size_t size, bool raw, bool ephemeral)
536{
537    struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
538    void *pampd = NULL, *pampd_del = NULL;
539    int ret = -ENOMEM;
540    struct tmem_hashbucket *hb;
541
542    hb = &pool->hashbucket[tmem_oid_hash(oidp)];
543    spin_lock(&hb->lock);
544    obj = objfound = tmem_obj_find(hb, oidp);
545    if (obj != NULL) {
546        pampd = tmem_pampd_lookup_in_obj(objfound, index);
547        if (pampd != NULL) {
548            /* if found, is a dup put, flush the old one */
549            pampd_del = tmem_pampd_delete_from_obj(obj, index);
550            BUG_ON(pampd_del != pampd);
551            (*tmem_pamops.free)(pampd, pool, oidp, index);
552            if (obj->pampd_count == 0) {
553                objnew = obj;
554                objfound = NULL;
555            }
556            pampd = NULL;
557        }
558    } else {
559        obj = objnew = (*tmem_hostops.obj_alloc)(pool);
560        if (unlikely(obj == NULL)) {
561            ret = -ENOMEM;
562            goto out;
563        }
564        tmem_obj_init(obj, hb, pool, oidp);
565    }
566    BUG_ON(obj == NULL);
567    BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
568    pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
569                    obj->pool, &obj->oid, index);
570    if (unlikely(pampd == NULL))
571        goto free;
572    ret = tmem_pampd_add_to_obj(obj, index, pampd);
573    if (unlikely(ret == -ENOMEM))
574        /* may have partially built objnode tree ("stump") */
575        goto delete_and_free;
576    goto out;
577
578delete_and_free:
579    (void)tmem_pampd_delete_from_obj(obj, index);
580free:
581    if (pampd)
582        (*tmem_pamops.free)(pampd, pool, NULL, 0);
583    if (objnew) {
584        tmem_obj_free(objnew, hb);
585        (*tmem_hostops.obj_free)(objnew, pool);
586    }
587out:
588    spin_unlock(&hb->lock);
589    return ret;
590}
591
592/*
593 * "Get" a page, e.g. if one can be found, copy the tmem page with the
594 * matching handle from PAM space to the kernel. By tmem definition,
595 * when a "get" is successful on an ephemeral page, the page is "flushed",
596 * and when a "get" is successful on a persistent page, the page is retained
597 * in tmem. Note that to preserve
598 * coherency, "get" can never be skipped if tmem contains the data.
599 * That is, if a get is done with a certain handle and fails, any
600 * subsequent "get" must also fail (unless of course there is a
601 * "put" done with the same handle).
602
603 */
604int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
605        char *data, size_t *size, bool raw, int get_and_free)
606{
607    struct tmem_obj *obj;
608    void *pampd;
609    bool ephemeral = is_ephemeral(pool);
610    int ret = -1;
611    struct tmem_hashbucket *hb;
612    bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
613    bool lock_held = false;
614
615    hb = &pool->hashbucket[tmem_oid_hash(oidp)];
616    spin_lock(&hb->lock);
617    lock_held = true;
618    obj = tmem_obj_find(hb, oidp);
619    if (obj == NULL)
620        goto out;
621    if (free)
622        pampd = tmem_pampd_delete_from_obj(obj, index);
623    else
624        pampd = tmem_pampd_lookup_in_obj(obj, index);
625    if (pampd == NULL)
626        goto out;
627    if (free) {
628        if (obj->pampd_count == 0) {
629            tmem_obj_free(obj, hb);
630            (*tmem_hostops.obj_free)(obj, pool);
631            obj = NULL;
632        }
633    }
634    if (tmem_pamops.is_remote(pampd)) {
635        lock_held = false;
636        spin_unlock(&hb->lock);
637    }
638    if (free)
639        ret = (*tmem_pamops.get_data_and_free)(
640                data, size, raw, pampd, pool, oidp, index);
641    else
642        ret = (*tmem_pamops.get_data)(
643                data, size, raw, pampd, pool, oidp, index);
644    if (ret < 0)
645        goto out;
646    ret = 0;
647out:
648    if (lock_held)
649        spin_unlock(&hb->lock);
650    return ret;
651}
652
653/*
654 * If a page in tmem matches the handle, "flush" this page from tmem such
655 * that any subsequent "get" does not succeed (unless, of course, there
656 * was another "put" with the same handle).
657 */
658int tmem_flush_page(struct tmem_pool *pool,
659                struct tmem_oid *oidp, uint32_t index)
660{
661    struct tmem_obj *obj;
662    void *pampd;
663    int ret = -1;
664    struct tmem_hashbucket *hb;
665
666    hb = &pool->hashbucket[tmem_oid_hash(oidp)];
667    spin_lock(&hb->lock);
668    obj = tmem_obj_find(hb, oidp);
669    if (obj == NULL)
670        goto out;
671    pampd = tmem_pampd_delete_from_obj(obj, index);
672    if (pampd == NULL)
673        goto out;
674    (*tmem_pamops.free)(pampd, pool, oidp, index);
675    if (obj->pampd_count == 0) {
676        tmem_obj_free(obj, hb);
677        (*tmem_hostops.obj_free)(obj, pool);
678    }
679    ret = 0;
680
681out:
682    spin_unlock(&hb->lock);
683    return ret;
684}
685
686/*
687 * If a page in tmem matches the handle, replace the page so that any
688 * subsequent "get" gets the new page. Returns 0 if
689 * there was a page to replace, else returns -1.
690 */
691int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
692            uint32_t index, void *new_pampd)
693{
694    struct tmem_obj *obj;
695    int ret = -1;
696    struct tmem_hashbucket *hb;
697
698    hb = &pool->hashbucket[tmem_oid_hash(oidp)];
699    spin_lock(&hb->lock);
700    obj = tmem_obj_find(hb, oidp);
701    if (obj == NULL)
702        goto out;
703    new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
704    ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
705out:
706    spin_unlock(&hb->lock);
707    return ret;
708}
709
710/*
711 * "Flush" all pages in tmem matching this oid.
712 */
713int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
714{
715    struct tmem_obj *obj;
716    struct tmem_hashbucket *hb;
717    int ret = -1;
718
719    hb = &pool->hashbucket[tmem_oid_hash(oidp)];
720    spin_lock(&hb->lock);
721    obj = tmem_obj_find(hb, oidp);
722    if (obj == NULL)
723        goto out;
724    tmem_pampd_destroy_all_in_obj(obj);
725    tmem_obj_free(obj, hb);
726    (*tmem_hostops.obj_free)(obj, pool);
727    ret = 0;
728
729out:
730    spin_unlock(&hb->lock);
731    return ret;
732}
733
734/*
735 * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
736 * all subsequent access to this tmem_pool.
737 */
738int tmem_destroy_pool(struct tmem_pool *pool)
739{
740    int ret = -1;
741
742    if (pool == NULL)
743        goto out;
744    tmem_pool_flush(pool, 1);
745    ret = 0;
746out:
747    return ret;
748}
749
750static LIST_HEAD(tmem_global_pool_list);
751
752/*
753 * Create a new tmem_pool with the provided flag and return
754 * a pool id provided by the tmem host implementation.
755 */
756void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
757{
758    int persistent = flags & TMEM_POOL_PERSIST;
759    int shared = flags & TMEM_POOL_SHARED;
760    struct tmem_hashbucket *hb = &pool->hashbucket[0];
761    int i;
762
763    for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
764        hb->obj_rb_root = RB_ROOT;
765        spin_lock_init(&hb->lock);
766    }
767    INIT_LIST_HEAD(&pool->pool_list);
768    atomic_set(&pool->obj_count, 0);
769    SET_SENTINEL(pool, POOL);
770    list_add_tail(&pool->pool_list, &tmem_global_pool_list);
771    pool->persistent = persistent;
772    pool->shared = shared;
773}
774

Archive Download this file



interactive