Date:2010-05-26 07:00:54 (13 years 10 months ago)
Author:Nitin Gupta
Commit:6141d61bc7a710a2867192deb1d0eaac8d182cb8
Message:Rename ramzswap files to zram

Related changes:
- Modify revelant Kconfig and Makefile accordingly.
- Change include filenames in code.
- Remove dependency on CONFIG_SWAP in Kconfig as zram usage
is no longer limited to swap disks.

Signed-off-by: Nitin Gupta <ngupta@vflare.org>
Files: drivers/staging/Kconfig (1 diff)
drivers/staging/Makefile (1 diff)
drivers/staging/ramzswap/Kconfig (1 diff)
drivers/staging/ramzswap/Makefile (1 diff)
drivers/staging/ramzswap/ramzswap.txt (1 diff)
drivers/staging/ramzswap/ramzswap_drv.c (1 diff)
drivers/staging/ramzswap/ramzswap_drv.h (1 diff)
drivers/staging/ramzswap/ramzswap_ioctl.h (1 diff)
drivers/staging/ramzswap/xvmalloc.c (1 diff)
drivers/staging/ramzswap/xvmalloc.h (1 diff)
drivers/staging/ramzswap/xvmalloc_int.h (1 diff)
drivers/staging/zram/Kconfig (1 diff)
drivers/staging/zram/Makefile (1 diff)
drivers/staging/zram/xvmalloc.c (1 diff)
drivers/staging/zram/xvmalloc.h (1 diff)
drivers/staging/zram/xvmalloc_int.h (1 diff)
drivers/staging/zram/zram.txt (1 diff)
drivers/staging/zram/zram_drv.c (1 diff)
drivers/staging/zram/zram_drv.h (1 diff)
drivers/staging/zram/zram_ioctl.h (1 diff)

Change Details

drivers/staging/Kconfig
117117
118118source "drivers/staging/iio/Kconfig"
119119
120source "drivers/staging/ramzswap/Kconfig"
120source "drivers/staging/zram/Kconfig"
121121
122122source "drivers/staging/wlags49_h2/Kconfig"
123123
drivers/staging/Makefile
3939obj-$(CONFIG_MRST_RAR_HANDLER) += memrar/
4040obj-$(CONFIG_DX_SEP) += sep/
4141obj-$(CONFIG_IIO) += iio/
42obj-$(CONFIG_RAMZSWAP) += ramzswap/
42obj-$(CONFIG_ZRAM) += zram/
4343obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/
4444obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/
4545obj-$(CONFIG_BATMAN_ADV) += batman-adv/
drivers/staging/ramzswap/Kconfig
1config RAMZSWAP
2    tristate "Compressed in-memory swap device (ramzswap)"
3    depends on SWAP
4    select LZO_COMPRESS
5    select LZO_DECOMPRESS
6    default n
7    help
8      Creates virtual block devices which can (only) be used as swap
9      disks. Pages swapped to these disks are compressed and stored in
10      memory itself.
11
12      See ramzswap.txt for more information.
13      Project home: http://compcache.googlecode.com/
14
15config RAMZSWAP_STATS
16    bool "Enable ramzswap stats"
17    depends on RAMZSWAP
18    default y
19    help
20      Enable statistics collection for ramzswap. This adds only a minimal
21      overhead. In unsure, say Y.
drivers/staging/ramzswap/Makefile
1ramzswap-objs := ramzswap_drv.o xvmalloc.o
2
3obj-$(CONFIG_RAMZSWAP) += ramzswap.o
drivers/staging/ramzswap/ramzswap.txt
1ramzswap: Compressed RAM based swap device
2
3Project home: http://compcache.googlecode.com/
4
5* Introduction
6
7The ramzswap module creates RAM based block devices which can (only) be used as
8swap disks. Pages swapped to these devices are compressed and stored in memory
9itself. See project home for use cases, performance numbers and a lot more.
10
11Individual ramzswap devices are configured and initialized using rzscontrol
12userspace utility as shown in examples below. See rzscontrol man page for more
13details.
14
15* Usage
16
17Following shows a typical sequence of steps for using ramzswap.
18
191) Load Modules:
20    modprobe ramzswap num_devices=4
21    This creates 4 (uninitialized) devices: /dev/ramzswap{0,1,2,3}
22    (num_devices parameter is optional. Default: 1)
23
242) Initialize:
25    Use rzscontrol utility to configure and initialize individual
26    ramzswap devices. Example:
27    rzscontrol /dev/ramzswap2 --init # uses default value of disksize_kb
28
29    *See rzscontrol man page for more details and examples*
30
313) Activate:
32    swapon /dev/ramzswap2 # or any other initialized ramzswap device
33
344) Stats:
35    rzscontrol /dev/ramzswap2 --stats
36
375) Deactivate:
38    swapoff /dev/ramzswap2
39
406) Reset:
41    rzscontrol /dev/ramzswap2 --reset
42    (This frees all the memory allocated for this device).
43
44
45Please report any problems at:
46 - Mailing list: linux-mm-cc at laptop dot org
47 - Issue tracker: http://code.google.com/p/compcache/issues/list
48
49Nitin Gupta
50ngupta@vflare.org
drivers/staging/ramzswap/ramzswap_drv.c
1/*
2 * Compressed RAM based swap device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#define KMSG_COMPONENT "ramzswap"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
20#include <linux/bitops.h>
21#include <linux/blkdev.h>
22#include <linux/buffer_head.h>
23#include <linux/device.h>
24#include <linux/genhd.h>
25#include <linux/highmem.h>
26#include <linux/slab.h>
27#include <linux/lzo.h>
28#include <linux/string.h>
29#include <linux/swap.h>
30#include <linux/swapops.h>
31#include <linux/vmalloc.h>
32
33#include "ramzswap_drv.h"
34
35/* Globals */
36static int ramzswap_major;
37static struct ramzswap *devices;
38
39/* Module params (documentation at end) */
40static unsigned int num_devices;
41
42static int rzs_test_flag(struct ramzswap *rzs, u32 index,
43            enum rzs_pageflags flag)
44{
45    return rzs->table[index].flags & BIT(flag);
46}
47
48static void rzs_set_flag(struct ramzswap *rzs, u32 index,
49            enum rzs_pageflags flag)
50{
51    rzs->table[index].flags |= BIT(flag);
52}
53
54static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
55            enum rzs_pageflags flag)
56{
57    rzs->table[index].flags &= ~BIT(flag);
58}
59
60static int page_zero_filled(void *ptr)
61{
62    unsigned int pos;
63    unsigned long *page;
64
65    page = (unsigned long *)ptr;
66
67    for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
68        if (page[pos])
69            return 0;
70    }
71
72    return 1;
73}
74
75static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
76{
77    if (!rzs->disksize) {
78        pr_info(
79        "disk size not provided. You can use disksize_kb module "
80        "param to specify size.\nUsing default: (%u%% of RAM).\n",
81        default_disksize_perc_ram
82        );
83        rzs->disksize = default_disksize_perc_ram *
84                    (totalram_bytes / 100);
85    }
86
87    if (rzs->disksize > 2 * (totalram_bytes)) {
88        pr_info(
89        "There is little point creating a ramzswap of greater than "
90        "twice the size of memory since we expect a 2:1 compression "
91        "ratio. Note that ramzswap uses about 0.1%% of the size of "
92        "the swap device when not in use so a huge ramzswap is "
93        "wasteful.\n"
94        "\tMemory Size: %zu kB\n"
95        "\tSize you selected: %zu kB\n"
96        "Continuing anyway ...\n",
97        totalram_bytes >> 10, rzs->disksize
98        );
99    }
100
101    rzs->disksize &= PAGE_MASK;
102}
103
104static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
105            struct ramzswap_ioctl_stats *s)
106{
107    s->disksize = rzs->disksize;
108
109#if defined(CONFIG_RAMZSWAP_STATS)
110    {
111    struct ramzswap_stats *rs = &rzs->stats;
112    size_t succ_writes, mem_used;
113    unsigned int good_compress_perc = 0, no_compress_perc = 0;
114
115    mem_used = xv_get_total_size_bytes(rzs->mem_pool)
116            + (rs->pages_expand << PAGE_SHIFT);
117    succ_writes = rzs_stat64_read(rzs, &rs->num_writes) -
118            rzs_stat64_read(rzs, &rs->failed_writes);
119
120    if (succ_writes && rs->pages_stored) {
121        good_compress_perc = rs->good_compress * 100
122                    / rs->pages_stored;
123        no_compress_perc = rs->pages_expand * 100
124                    / rs->pages_stored;
125    }
126
127    s->num_reads = rzs_stat64_read(rzs, &rs->num_reads);
128    s->num_writes = rzs_stat64_read(rzs, &rs->num_writes);
129    s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads);
130    s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes);
131    s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io);
132    s->notify_free = rzs_stat64_read(rzs, &rs->notify_free);
133    s->pages_zero = rs->pages_zero;
134
135    s->good_compress_pct = good_compress_perc;
136    s->pages_expand_pct = no_compress_perc;
137
138    s->pages_stored = rs->pages_stored;
139    s->pages_used = mem_used >> PAGE_SHIFT;
140    s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
141    s->compr_data_size = rs->compr_size;
142    s->mem_used_total = mem_used;
143    }
144#endif /* CONFIG_RAMZSWAP_STATS */
145}
146
147static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
148{
149    u32 clen;
150    void *obj;
151
152    struct page *page = rzs->table[index].page;
153    u32 offset = rzs->table[index].offset;
154
155    if (unlikely(!page)) {
156        /*
157         * No memory is allocated for zero filled pages.
158         * Simply clear zero page flag.
159         */
160        if (rzs_test_flag(rzs, index, RZS_ZERO)) {
161            rzs_clear_flag(rzs, index, RZS_ZERO);
162            rzs_stat_dec(&rzs->stats.pages_zero);
163        }
164        return;
165    }
166
167    if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
168        clen = PAGE_SIZE;
169        __free_page(page);
170        rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
171        rzs_stat_dec(&rzs->stats.pages_expand);
172        goto out;
173    }
174
175    obj = kmap_atomic(page, KM_USER0) + offset;
176    clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
177    kunmap_atomic(obj, KM_USER0);
178
179    xv_free(rzs->mem_pool, page, offset);
180    if (clen <= PAGE_SIZE / 2)
181        rzs_stat_dec(&rzs->stats.good_compress);
182
183out:
184    rzs->stats.compr_size -= clen;
185    rzs_stat_dec(&rzs->stats.pages_stored);
186
187    rzs->table[index].page = NULL;
188    rzs->table[index].offset = 0;
189}
190
191static void handle_zero_page(struct page *page)
192{
193    void *user_mem;
194
195    user_mem = kmap_atomic(page, KM_USER0);
196    memset(user_mem, 0, PAGE_SIZE);
197    kunmap_atomic(user_mem, KM_USER0);
198
199    flush_dcache_page(page);
200}
201
202static void handle_uncompressed_page(struct ramzswap *rzs,
203                struct page *page, u32 index)
204{
205    unsigned char *user_mem, *cmem;
206
207    user_mem = kmap_atomic(page, KM_USER0);
208    cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
209            rzs->table[index].offset;
210
211    memcpy(user_mem, cmem, PAGE_SIZE);
212    kunmap_atomic(user_mem, KM_USER0);
213    kunmap_atomic(cmem, KM_USER1);
214
215    flush_dcache_page(page);
216}
217
218static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
219{
220
221    int i;
222    u32 index;
223    struct bio_vec *bvec;
224
225    rzs_stat64_inc(rzs, &rzs->stats.num_reads);
226
227    index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
228    bio_for_each_segment(bvec, bio, i) {
229        int ret;
230        size_t clen;
231        struct page *page;
232        struct zobj_header *zheader;
233        unsigned char *user_mem, *cmem;
234
235        page = bvec->bv_page;
236
237        if (rzs_test_flag(rzs, index, RZS_ZERO)) {
238            handle_zero_page(page);
239            continue;
240        }
241
242        /* Requested page is not present in compressed area */
243        if (unlikely(!rzs->table[index].page)) {
244            pr_debug("Read before write: sector=%lu, size=%u",
245                (ulong)(bio->bi_sector), bio->bi_size);
246            /* Do nothing */
247            continue;
248        }
249
250        /* Page is stored uncompressed since it's incompressible */
251        if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
252            handle_uncompressed_page(rzs, page, index);
253            continue;
254        }
255
256        user_mem = kmap_atomic(page, KM_USER0);
257        clen = PAGE_SIZE;
258
259        cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
260                rzs->table[index].offset;
261
262        ret = lzo1x_decompress_safe(
263            cmem + sizeof(*zheader),
264            xv_get_object_size(cmem) - sizeof(*zheader),
265            user_mem, &clen);
266
267        kunmap_atomic(user_mem, KM_USER0);
268        kunmap_atomic(cmem, KM_USER1);
269
270        /* Should NEVER happen. Return bio error if it does. */
271        if (unlikely(ret != LZO_E_OK)) {
272            pr_err("Decompression failed! err=%d, page=%u\n",
273                ret, index);
274            rzs_stat64_inc(rzs, &rzs->stats.failed_reads);
275            goto out;
276        }
277
278        flush_dcache_page(page);
279        index++;
280    }
281
282    set_bit(BIO_UPTODATE, &bio->bi_flags);
283    bio_endio(bio, 0);
284    return 0;
285
286out:
287    bio_io_error(bio);
288    return 0;
289}
290
291static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
292{
293    int i;
294    u32 index;
295    struct bio_vec *bvec;
296
297    rzs_stat64_inc(rzs, &rzs->stats.num_writes);
298
299    index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
300
301    bio_for_each_segment(bvec, bio, i) {
302        int ret;
303        u32 offset;
304        size_t clen;
305        struct zobj_header *zheader;
306        struct page *page, *page_store;
307        unsigned char *user_mem, *cmem, *src;
308
309        page = bvec->bv_page;
310        src = rzs->compress_buffer;
311
312        /*
313         * System overwrites unused sectors. Free memory associated
314         * with this sector now.
315         */
316        if (rzs->table[index].page ||
317                rzs_test_flag(rzs, index, RZS_ZERO))
318            ramzswap_free_page(rzs, index);
319
320        mutex_lock(&rzs->lock);
321
322        user_mem = kmap_atomic(page, KM_USER0);
323        if (page_zero_filled(user_mem)) {
324            kunmap_atomic(user_mem, KM_USER0);
325            mutex_unlock(&rzs->lock);
326            rzs_stat_inc(&rzs->stats.pages_zero);
327            rzs_set_flag(rzs, index, RZS_ZERO);
328            continue;
329        }
330
331        ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
332                    rzs->compress_workmem);
333
334        kunmap_atomic(user_mem, KM_USER0);
335
336        if (unlikely(ret != LZO_E_OK)) {
337            mutex_unlock(&rzs->lock);
338            pr_err("Compression failed! err=%d\n", ret);
339            rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
340            goto out;
341        }
342
343        /*
344         * Page is incompressible. Store it as-is (uncompressed)
345         * since we do not want to return too many swap write
346         * errors which has side effect of hanging the system.
347         */
348        if (unlikely(clen > max_zpage_size)) {
349            clen = PAGE_SIZE;
350            page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
351            if (unlikely(!page_store)) {
352                mutex_unlock(&rzs->lock);
353                pr_info("Error allocating memory for "
354                    "incompressible page: %u\n", index);
355                rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
356                goto out;
357            }
358
359            offset = 0;
360            rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
361            rzs_stat_inc(&rzs->stats.pages_expand);
362            rzs->table[index].page = page_store;
363            src = kmap_atomic(page, KM_USER0);
364            goto memstore;
365        }
366
367        if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
368                &rzs->table[index].page, &offset,
369                GFP_NOIO | __GFP_HIGHMEM)) {
370            mutex_unlock(&rzs->lock);
371            pr_info("Error allocating memory for compressed "
372                "page: %u, size=%zu\n", index, clen);
373            rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
374            goto out;
375        }
376
377memstore:
378        rzs->table[index].offset = offset;
379
380        cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
381                rzs->table[index].offset;
382
383#if 0
384        /* Back-reference needed for memory defragmentation */
385        if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
386            zheader = (struct zobj_header *)cmem;
387            zheader->table_idx = index;
388            cmem += sizeof(*zheader);
389        }
390#endif
391
392        memcpy(cmem, src, clen);
393
394        kunmap_atomic(cmem, KM_USER1);
395        if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
396            kunmap_atomic(src, KM_USER0);
397
398        /* Update stats */
399        rzs->stats.compr_size += clen;
400        rzs_stat_inc(&rzs->stats.pages_stored);
401        if (clen <= PAGE_SIZE / 2)
402            rzs_stat_inc(&rzs->stats.good_compress);
403
404        mutex_unlock(&rzs->lock);
405        index++;
406    }
407
408    set_bit(BIO_UPTODATE, &bio->bi_flags);
409    bio_endio(bio, 0);
410    return 0;
411
412out:
413    bio_io_error(bio);
414    return 0;
415}
416
417/*
418 * Check if request is within bounds and page aligned.
419 */
420static inline int valid_io_request(struct ramzswap *rzs, struct bio *bio)
421{
422    if (unlikely(
423        (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
424        (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
425        (bio->bi_size & (PAGE_SIZE - 1)))) {
426
427        return 0;
428    }
429
430    /* I/O request is valid */
431    return 1;
432}
433
434/*
435 * Handler function for all ramzswap I/O requests.
436 */
437static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
438{
439    int ret = 0;
440    struct ramzswap *rzs = queue->queuedata;
441
442    if (unlikely(!rzs->init_done)) {
443        bio_io_error(bio);
444        return 0;
445    }
446
447    if (!valid_io_request(rzs, bio)) {
448        rzs_stat64_inc(rzs, &rzs->stats.invalid_io);
449        bio_io_error(bio);
450        return 0;
451    }
452
453    switch (bio_data_dir(bio)) {
454    case READ:
455        ret = ramzswap_read(rzs, bio);
456        break;
457
458    case WRITE:
459        ret = ramzswap_write(rzs, bio);
460        break;
461    }
462
463    return ret;
464}
465
466static void reset_device(struct ramzswap *rzs)
467{
468    size_t index;
469
470    /* Do not accept any new I/O request */
471    rzs->init_done = 0;
472
473    /* Free various per-device buffers */
474    kfree(rzs->compress_workmem);
475    free_pages((unsigned long)rzs->compress_buffer, 1);
476
477    rzs->compress_workmem = NULL;
478    rzs->compress_buffer = NULL;
479
480    /* Free all pages that are still in this ramzswap device */
481    for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) {
482        struct page *page;
483        u16 offset;
484
485        page = rzs->table[index].page;
486        offset = rzs->table[index].offset;
487
488        if (!page)
489            continue;
490
491        if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
492            __free_page(page);
493        else
494            xv_free(rzs->mem_pool, page, offset);
495    }
496
497    vfree(rzs->table);
498    rzs->table = NULL;
499
500    xv_destroy_pool(rzs->mem_pool);
501    rzs->mem_pool = NULL;
502
503    /* Reset stats */
504    memset(&rzs->stats, 0, sizeof(rzs->stats));
505
506    rzs->disksize = 0;
507}
508
509static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
510{
511    int ret;
512    size_t num_pages;
513
514    if (rzs->init_done) {
515        pr_info("Device already initialized!\n");
516        return -EBUSY;
517    }
518
519    ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
520
521    rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
522    if (!rzs->compress_workmem) {
523        pr_err("Error allocating compressor working memory!\n");
524        ret = -ENOMEM;
525        goto fail;
526    }
527
528    rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
529    if (!rzs->compress_buffer) {
530        pr_err("Error allocating compressor buffer space\n");
531        ret = -ENOMEM;
532        goto fail;
533    }
534
535    num_pages = rzs->disksize >> PAGE_SHIFT;
536    rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
537    if (!rzs->table) {
538        pr_err("Error allocating ramzswap address table\n");
539        /* To prevent accessing table entries during cleanup */
540        rzs->disksize = 0;
541        ret = -ENOMEM;
542        goto fail;
543    }
544    memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
545
546    set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
547
548    /* ramzswap devices sort of resembles non-rotational disks */
549    queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
550
551    rzs->mem_pool = xv_create_pool();
552    if (!rzs->mem_pool) {
553        pr_err("Error creating memory pool\n");
554        ret = -ENOMEM;
555        goto fail;
556    }
557
558    rzs->init_done = 1;
559
560    pr_debug("Initialization done!\n");
561    return 0;
562
563fail:
564    reset_device(rzs);
565
566    pr_err("Initialization failed: err=%d\n", ret);
567    return ret;
568}
569
570static int ramzswap_ioctl_reset_device(struct ramzswap *rzs)
571{
572    if (rzs->init_done)
573        reset_device(rzs);
574
575    return 0;
576}
577
578static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
579            unsigned int cmd, unsigned long arg)
580{
581    int ret = 0;
582    size_t disksize_kb;
583
584    struct ramzswap *rzs = bdev->bd_disk->private_data;
585
586    switch (cmd) {
587    case RZSIO_SET_DISKSIZE_KB:
588        if (rzs->init_done) {
589            ret = -EBUSY;
590            goto out;
591        }
592        if (copy_from_user(&disksize_kb, (void *)arg,
593                        _IOC_SIZE(cmd))) {
594            ret = -EFAULT;
595            goto out;
596        }
597        rzs->disksize = disksize_kb << 10;
598        pr_info("Disk size set to %zu kB\n", disksize_kb);
599        break;
600
601    case RZSIO_GET_STATS:
602    {
603        struct ramzswap_ioctl_stats *stats;
604        if (!rzs->init_done) {
605            ret = -ENOTTY;
606            goto out;
607        }
608        stats = kzalloc(sizeof(*stats), GFP_KERNEL);
609        if (!stats) {
610            ret = -ENOMEM;
611            goto out;
612        }
613        ramzswap_ioctl_get_stats(rzs, stats);
614        if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
615            kfree(stats);
616            ret = -EFAULT;
617            goto out;
618        }
619        kfree(stats);
620        break;
621    }
622    case RZSIO_INIT:
623        ret = ramzswap_ioctl_init_device(rzs);
624        break;
625
626    case RZSIO_RESET:
627        /* Do not reset an active device! */
628        if (bdev->bd_holders) {
629            ret = -EBUSY;
630            goto out;
631        }
632
633        /* Make sure all pending I/O is finished */
634        if (bdev)
635            fsync_bdev(bdev);
636
637        ret = ramzswap_ioctl_reset_device(rzs);
638        break;
639
640    default:
641        pr_info("Invalid ioctl %u\n", cmd);
642        ret = -ENOTTY;
643    }
644
645out:
646    return ret;
647}
648
649void ramzswap_slot_free_notify(struct block_device *bdev, unsigned long index)
650{
651    struct ramzswap *rzs;
652
653    rzs = bdev->bd_disk->private_data;
654    ramzswap_free_page(rzs, index);
655    rzs_stat64_inc(rzs, &rzs->stats.notify_free);
656}
657
658static const struct block_device_operations ramzswap_devops = {
659    .ioctl = ramzswap_ioctl,
660    .swap_slot_free_notify = ramzswap_slot_free_notify,
661    .owner = THIS_MODULE
662};
663
664static int create_device(struct ramzswap *rzs, int device_id)
665{
666    int ret = 0;
667
668    mutex_init(&rzs->lock);
669    spin_lock_init(&rzs->stat64_lock);
670
671    rzs->queue = blk_alloc_queue(GFP_KERNEL);
672    if (!rzs->queue) {
673        pr_err("Error allocating disk queue for device %d\n",
674            device_id);
675        ret = -ENOMEM;
676        goto out;
677    }
678
679    blk_queue_make_request(rzs->queue, ramzswap_make_request);
680    rzs->queue->queuedata = rzs;
681
682     /* gendisk structure */
683    rzs->disk = alloc_disk(1);
684    if (!rzs->disk) {
685        blk_cleanup_queue(rzs->queue);
686        pr_warning("Error allocating disk structure for device %d\n",
687            device_id);
688        ret = -ENOMEM;
689        goto out;
690    }
691
692    rzs->disk->major = ramzswap_major;
693    rzs->disk->first_minor = device_id;
694    rzs->disk->fops = &ramzswap_devops;
695    rzs->disk->queue = rzs->queue;
696    rzs->disk->private_data = rzs;
697    snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
698
699    /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */
700    set_capacity(rzs->disk, 0);
701
702    /*
703     * To ensure that we always get PAGE_SIZE aligned
704     * and n*PAGE_SIZED sized I/O requests.
705     */
706    blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
707    blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
708    blk_queue_io_min(rzs->disk->queue, PAGE_SIZE);
709    blk_queue_io_opt(rzs->disk->queue, PAGE_SIZE);
710
711    add_disk(rzs->disk);
712
713    rzs->init_done = 0;
714
715out:
716    return ret;
717}
718
719static void destroy_device(struct ramzswap *rzs)
720{
721    if (rzs->disk) {
722        del_gendisk(rzs->disk);
723        put_disk(rzs->disk);
724    }
725
726    if (rzs->queue)
727        blk_cleanup_queue(rzs->queue);
728}
729
730static int __init ramzswap_init(void)
731{
732    int ret, dev_id;
733
734    if (num_devices > max_num_devices) {
735        pr_warning("Invalid value for num_devices: %u\n",
736                num_devices);
737        ret = -EINVAL;
738        goto out;
739    }
740
741    ramzswap_major = register_blkdev(0, "ramzswap");
742    if (ramzswap_major <= 0) {
743        pr_warning("Unable to get major number\n");
744        ret = -EBUSY;
745        goto out;
746    }
747
748    if (!num_devices) {
749        pr_info("num_devices not specified. Using default: 1\n");
750        num_devices = 1;
751    }
752
753    /* Allocate the device array and initialize each one */
754    pr_info("Creating %u devices ...\n", num_devices);
755    devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
756    if (!devices) {
757        ret = -ENOMEM;
758        goto unregister;
759    }
760
761    for (dev_id = 0; dev_id < num_devices; dev_id++) {
762        ret = create_device(&devices[dev_id], dev_id);
763        if (ret)
764            goto free_devices;
765    }
766
767    return 0;
768
769free_devices:
770    while (dev_id)
771        destroy_device(&devices[--dev_id]);
772unregister:
773    unregister_blkdev(ramzswap_major, "ramzswap");
774out:
775    return ret;
776}
777
778static void __exit ramzswap_exit(void)
779{
780    int i;
781    struct ramzswap *rzs;
782
783    for (i = 0; i < num_devices; i++) {
784        rzs = &devices[i];
785
786        destroy_device(rzs);
787        if (rzs->init_done)
788            reset_device(rzs);
789    }
790
791    unregister_blkdev(ramzswap_major, "ramzswap");
792
793    kfree(devices);
794    pr_debug("Cleanup done!\n");
795}
796
797module_param(num_devices, uint, 0);
798MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
799
800module_init(ramzswap_init);
801module_exit(ramzswap_exit);
802
803MODULE_LICENSE("Dual BSD/GPL");
804MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
805MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
drivers/staging/ramzswap/ramzswap_drv.h
1/*
2 * Compressed RAM based swap device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#ifndef _RAMZSWAP_DRV_H_
16#define _RAMZSWAP_DRV_H_
17
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20
21#include "ramzswap_ioctl.h"
22#include "xvmalloc.h"
23
24/*
25 * Some arbitrary value. This is just to catch
26 * invalid value for num_devices module parameter.
27 */
28static const unsigned max_num_devices = 32;
29
30/*
31 * Stored at beginning of each compressed object.
32 *
33 * It stores back-reference to table entry which points to this
34 * object. This is required to support memory defragmentation.
35 */
36struct zobj_header {
37#if 0
38    u32 table_idx;
39#endif
40};
41
42/*-- Configurable parameters */
43
44/* Default ramzswap disk size: 25% of total RAM */
45static const unsigned default_disksize_perc_ram = 25;
46
47/*
48 * Pages that compress to size greater than this are stored
49 * uncompressed in memory.
50 */
51static const unsigned max_zpage_size = PAGE_SIZE / 4 * 3;
52
53/*
54 * NOTE: max_zpage_size must be less than or equal to:
55 * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
56 * otherwise, xv_malloc() would always return failure.
57 */
58
59/*-- End of configurable params */
60
61#define SECTOR_SHIFT 9
62#define SECTOR_SIZE (1 << SECTOR_SHIFT)
63#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
64#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
65
66/* Flags for ramzswap pages (table[page_no].flags) */
67enum rzs_pageflags {
68    /* Page is stored uncompressed */
69    RZS_UNCOMPRESSED,
70
71    /* Page consists entirely of zeros */
72    RZS_ZERO,
73
74    __NR_RZS_PAGEFLAGS,
75};
76
77/*-- Data structures */
78
79/*
80 * Allocated for each swap slot, indexed by page no.
81 * These table entries must fit exactly in a page.
82 */
83struct table {
84    struct page *page;
85    u16 offset;
86    u8 count; /* object ref count (not yet used) */
87    u8 flags;
88} __attribute__((aligned(4)));
89
90struct ramzswap_stats {
91    /* basic stats */
92    size_t compr_size; /* compressed size of pages stored -
93                 * needed to enforce memlimit */
94    /* more stats */
95#if defined(CONFIG_RAMZSWAP_STATS)
96    u64 num_reads; /* failed + successful */
97    u64 num_writes; /* --do-- */
98    u64 failed_reads; /* should NEVER! happen */
99    u64 failed_writes; /* can happen when memory is too low */
100    u64 invalid_io; /* non-swap I/O requests */
101    u64 notify_free; /* no. of swap slot free notifications */
102    u32 pages_zero; /* no. of zero filled pages */
103    u32 pages_stored; /* no. of pages currently stored */
104    u32 good_compress; /* % of pages with compression ratio<=50% */
105    u32 pages_expand; /* % of incompressible pages */
106#endif
107};
108
109struct ramzswap {
110    struct xv_pool *mem_pool;
111    void *compress_workmem;
112    void *compress_buffer;
113    struct table *table;
114    spinlock_t stat64_lock; /* protect 64-bit stats */
115    struct mutex lock; /* protect compression buffers against
116                 * concurrent writes */
117    struct request_queue *queue;
118    struct gendisk *disk;
119    int init_done;
120    /*
121     * This is limit on amount of *uncompressed* worth of data
122     * we can hold. When backing swap device is provided, it is
123     * set equal to device size.
124     */
125    size_t disksize; /* bytes */
126
127    struct ramzswap_stats stats;
128};
129
130/*-- */
131
132/* Debugging and Stats */
133#if defined(CONFIG_RAMZSWAP_STATS)
134static void rzs_stat_inc(u32 *v)
135{
136    *v = *v + 1;
137}
138
139static void rzs_stat_dec(u32 *v)
140{
141    *v = *v - 1;
142}
143
144static void rzs_stat64_inc(struct ramzswap *rzs, u64 *v)
145{
146    spin_lock(&rzs->stat64_lock);
147    *v = *v + 1;
148    spin_unlock(&rzs->stat64_lock);
149}
150
151static u64 rzs_stat64_read(struct ramzswap *rzs, u64 *v)
152{
153    u64 val;
154
155    spin_lock(&rzs->stat64_lock);
156    val = *v;
157    spin_unlock(&rzs->stat64_lock);
158
159    return val;
160}
161#else
162#define rzs_stat_inc(v)
163#define rzs_stat_dec(v)
164#define rzs_stat64_inc(r, v)
165#define rzs_stat64_read(r, v)
166#endif /* CONFIG_RAMZSWAP_STATS */
167
168#endif
drivers/staging/ramzswap/ramzswap_ioctl.h
1/*
2 * Compressed RAM based swap device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#ifndef _RAMZSWAP_IOCTL_H_
16#define _RAMZSWAP_IOCTL_H_
17
18struct ramzswap_ioctl_stats {
19    u64 disksize; /* user specified or equal to backing swap
20                 * size (if present) */
21    u64 num_reads; /* failed + successful */
22    u64 num_writes; /* --do-- */
23    u64 failed_reads; /* should NEVER! happen */
24    u64 failed_writes; /* can happen when memory is too low */
25    u64 invalid_io; /* non-swap I/O requests */
26    u64 notify_free; /* no. of swap slot free notifications */
27    u32 pages_zero; /* no. of zero filled pages */
28    u32 good_compress_pct; /* no. of pages with compression ratio<=50% */
29    u32 pages_expand_pct; /* no. of incompressible pages */
30    u32 pages_stored;
31    u32 pages_used;
32    u64 orig_data_size;
33    u64 compr_data_size;
34    u64 mem_used_total;
35} __attribute__ ((packed, aligned(4)));
36
37#define RZSIO_SET_DISKSIZE_KB _IOW('z', 0, size_t)
38#define RZSIO_GET_STATS _IOR('z', 1, struct ramzswap_ioctl_stats)
39#define RZSIO_INIT _IO('z', 2)
40#define RZSIO_RESET _IO('z', 3)
41
42#endif
drivers/staging/ramzswap/xvmalloc.c
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#include <linux/bitops.h>
14#include <linux/errno.h>
15#include <linux/highmem.h>
16#include <linux/init.h>
17#include <linux/string.h>
18#include <linux/slab.h>
19
20#include "xvmalloc.h"
21#include "xvmalloc_int.h"
22
23static void stat_inc(u64 *value)
24{
25    *value = *value + 1;
26}
27
28static void stat_dec(u64 *value)
29{
30    *value = *value - 1;
31}
32
33static int test_flag(struct block_header *block, enum blockflags flag)
34{
35    return block->prev & BIT(flag);
36}
37
38static void set_flag(struct block_header *block, enum blockflags flag)
39{
40    block->prev |= BIT(flag);
41}
42
43static void clear_flag(struct block_header *block, enum blockflags flag)
44{
45    block->prev &= ~BIT(flag);
46}
47
48/*
49 * Given <page, offset> pair, provide a derefrencable pointer.
50 * This is called from xv_malloc/xv_free path, so it
51 * needs to be fast.
52 */
53static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
54{
55    unsigned char *base;
56
57    base = kmap_atomic(page, type);
58    return base + offset;
59}
60
61static void put_ptr_atomic(void *ptr, enum km_type type)
62{
63    kunmap_atomic(ptr, type);
64}
65
66static u32 get_blockprev(struct block_header *block)
67{
68    return block->prev & PREV_MASK;
69}
70
71static void set_blockprev(struct block_header *block, u16 new_offset)
72{
73    block->prev = new_offset | (block->prev & FLAGS_MASK);
74}
75
76static struct block_header *BLOCK_NEXT(struct block_header *block)
77{
78    return (struct block_header *)
79        ((char *)block + block->size + XV_ALIGN);
80}
81
82/*
83 * Get index of free list containing blocks of maximum size
84 * which is less than or equal to given size.
85 */
86static u32 get_index_for_insert(u32 size)
87{
88    if (unlikely(size > XV_MAX_ALLOC_SIZE))
89        size = XV_MAX_ALLOC_SIZE;
90    size &= ~FL_DELTA_MASK;
91    return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
92}
93
94/*
95 * Get index of free list having blocks of size greater than
96 * or equal to requested size.
97 */
98static u32 get_index(u32 size)
99{
100    if (unlikely(size < XV_MIN_ALLOC_SIZE))
101        size = XV_MIN_ALLOC_SIZE;
102    size = ALIGN(size, FL_DELTA);
103    return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
104}
105
106/**
107 * find_block - find block of at least given size
108 * @pool: memory pool to search from
109 * @size: size of block required
110 * @page: page containing required block
111 * @offset: offset within the page where block is located.
112 *
113 * Searches two level bitmap to locate block of at least
114 * the given size. If such a block is found, it provides
115 * <page, offset> to identify this block and returns index
116 * in freelist where we found this block.
117 * Otherwise, returns 0 and <page, offset> params are not touched.
118 */
119static u32 find_block(struct xv_pool *pool, u32 size,
120            struct page **page, u32 *offset)
121{
122    ulong flbitmap, slbitmap;
123    u32 flindex, slindex, slbitstart;
124
125    /* There are no free blocks in this pool */
126    if (!pool->flbitmap)
127        return 0;
128
129    /* Get freelist index correspoding to this size */
130    slindex = get_index(size);
131    slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
132    slbitstart = slindex % BITS_PER_LONG;
133
134    /*
135     * If freelist is not empty at this index, we found the
136     * block - head of this list. This is approximate best-fit match.
137     */
138    if (test_bit(slbitstart, &slbitmap)) {
139        *page = pool->freelist[slindex].page;
140        *offset = pool->freelist[slindex].offset;
141        return slindex;
142    }
143
144    /*
145     * No best-fit found. Search a bit further in bitmap for a free block.
146     * Second level bitmap consists of series of 32-bit chunks. Search
147     * further in the chunk where we expected a best-fit, starting from
148     * index location found above.
149     */
150    slbitstart++;
151    slbitmap >>= slbitstart;
152
153    /* Skip this search if we were already at end of this bitmap chunk */
154    if ((slbitstart != BITS_PER_LONG) && slbitmap) {
155        slindex += __ffs(slbitmap) + 1;
156        *page = pool->freelist[slindex].page;
157        *offset = pool->freelist[slindex].offset;
158        return slindex;
159    }
160
161    /* Now do a full two-level bitmap search to find next nearest fit */
162    flindex = slindex / BITS_PER_LONG;
163
164    flbitmap = (pool->flbitmap) >> (flindex + 1);
165    if (!flbitmap)
166        return 0;
167
168    flindex += __ffs(flbitmap) + 1;
169    slbitmap = pool->slbitmap[flindex];
170    slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
171    *page = pool->freelist[slindex].page;
172    *offset = pool->freelist[slindex].offset;
173
174    return slindex;
175}
176
177/*
178 * Insert block at <page, offset> in freelist of given pool.
179 * freelist used depends on block size.
180 */
181static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
182            struct block_header *block)
183{
184    u32 flindex, slindex;
185    struct block_header *nextblock;
186
187    slindex = get_index_for_insert(block->size);
188    flindex = slindex / BITS_PER_LONG;
189
190    block->link.prev_page = 0;
191    block->link.prev_offset = 0;
192    block->link.next_page = pool->freelist[slindex].page;
193    block->link.next_offset = pool->freelist[slindex].offset;
194    pool->freelist[slindex].page = page;
195    pool->freelist[slindex].offset = offset;
196
197    if (block->link.next_page) {
198        nextblock = get_ptr_atomic(block->link.next_page,
199                    block->link.next_offset, KM_USER1);
200        nextblock->link.prev_page = page;
201        nextblock->link.prev_offset = offset;
202        put_ptr_atomic(nextblock, KM_USER1);
203    }
204
205    __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
206    __set_bit(flindex, &pool->flbitmap);
207}
208
209/*
210 * Remove block from head of freelist. Index 'slindex' identifies the freelist.
211 */
212static void remove_block_head(struct xv_pool *pool,
213            struct block_header *block, u32 slindex)
214{
215    struct block_header *tmpblock;
216    u32 flindex = slindex / BITS_PER_LONG;
217
218    pool->freelist[slindex].page = block->link.next_page;
219    pool->freelist[slindex].offset = block->link.next_offset;
220    block->link.prev_page = 0;
221    block->link.prev_offset = 0;
222
223    if (!pool->freelist[slindex].page) {
224        __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
225        if (!pool->slbitmap[flindex])
226            __clear_bit(flindex, &pool->flbitmap);
227    } else {
228        /*
229         * DEBUG ONLY: We need not reinitialize freelist head previous
230         * pointer to 0 - we never depend on its value. But just for
231         * sanity, lets do it.
232         */
233        tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
234                pool->freelist[slindex].offset, KM_USER1);
235        tmpblock->link.prev_page = 0;
236        tmpblock->link.prev_offset = 0;
237        put_ptr_atomic(tmpblock, KM_USER1);
238    }
239}
240
241/*
242 * Remove block from freelist. Index 'slindex' identifies the freelist.
243 */
244static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
245            struct block_header *block, u32 slindex)
246{
247    u32 flindex;
248    struct block_header *tmpblock;
249
250    if (pool->freelist[slindex].page == page
251       && pool->freelist[slindex].offset == offset) {
252        remove_block_head(pool, block, slindex);
253        return;
254    }
255
256    flindex = slindex / BITS_PER_LONG;
257
258    if (block->link.prev_page) {
259        tmpblock = get_ptr_atomic(block->link.prev_page,
260                block->link.prev_offset, KM_USER1);
261        tmpblock->link.next_page = block->link.next_page;
262        tmpblock->link.next_offset = block->link.next_offset;
263        put_ptr_atomic(tmpblock, KM_USER1);
264    }
265
266    if (block->link.next_page) {
267        tmpblock = get_ptr_atomic(block->link.next_page,
268                block->link.next_offset, KM_USER1);
269        tmpblock->link.prev_page = block->link.prev_page;
270        tmpblock->link.prev_offset = block->link.prev_offset;
271        put_ptr_atomic(tmpblock, KM_USER1);
272    }
273}
274
275/*
276 * Allocate a page and add it to freelist of given pool.
277 */
278static int grow_pool(struct xv_pool *pool, gfp_t flags)
279{
280    struct page *page;
281    struct block_header *block;
282
283    page = alloc_page(flags);
284    if (unlikely(!page))
285        return -ENOMEM;
286
287    stat_inc(&pool->total_pages);
288
289    spin_lock(&pool->lock);
290    block = get_ptr_atomic(page, 0, KM_USER0);
291
292    block->size = PAGE_SIZE - XV_ALIGN;
293    set_flag(block, BLOCK_FREE);
294    clear_flag(block, PREV_FREE);
295    set_blockprev(block, 0);
296
297    insert_block(pool, page, 0, block);
298
299    put_ptr_atomic(block, KM_USER0);
300    spin_unlock(&pool->lock);
301
302    return 0;
303}
304
305/*
306 * Create a memory pool. Allocates freelist, bitmaps and other
307 * per-pool metadata.
308 */
309struct xv_pool *xv_create_pool(void)
310{
311    u32 ovhd_size;
312    struct xv_pool *pool;
313
314    ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
315    pool = kzalloc(ovhd_size, GFP_KERNEL);
316    if (!pool)
317        return NULL;
318
319    spin_lock_init(&pool->lock);
320
321    return pool;
322}
323
324void xv_destroy_pool(struct xv_pool *pool)
325{
326    kfree(pool);
327}
328
329/**
330 * xv_malloc - Allocate block of given size from pool.
331 * @pool: pool to allocate from
332 * @size: size of block to allocate
333 * @page: page no. that holds the object
334 * @offset: location of object within page
335 *
336 * On success, <page, offset> identifies block allocated
337 * and 0 is returned. On failure, <page, offset> is set to
338 * 0 and -ENOMEM is returned.
339 *
340 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
341 */
342int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
343        u32 *offset, gfp_t flags)
344{
345    int error;
346    u32 index, tmpsize, origsize, tmpoffset;
347    struct block_header *block, *tmpblock;
348
349    *page = NULL;
350    *offset = 0;
351    origsize = size;
352
353    if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
354        return -ENOMEM;
355
356    size = ALIGN(size, XV_ALIGN);
357
358    spin_lock(&pool->lock);
359
360    index = find_block(pool, size, page, offset);
361
362    if (!*page) {
363        spin_unlock(&pool->lock);
364        if (flags & GFP_NOWAIT)
365            return -ENOMEM;
366        error = grow_pool(pool, flags);
367        if (unlikely(error))
368            return error;
369
370        spin_lock(&pool->lock);
371        index = find_block(pool, size, page, offset);
372    }
373
374    if (!*page) {
375        spin_unlock(&pool->lock);
376        return -ENOMEM;
377    }
378
379    block = get_ptr_atomic(*page, *offset, KM_USER0);
380
381    remove_block_head(pool, block, index);
382
383    /* Split the block if required */
384    tmpoffset = *offset + size + XV_ALIGN;
385    tmpsize = block->size - size;
386    tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
387    if (tmpsize) {
388        tmpblock->size = tmpsize - XV_ALIGN;
389        set_flag(tmpblock, BLOCK_FREE);
390        clear_flag(tmpblock, PREV_FREE);
391
392        set_blockprev(tmpblock, *offset);
393        if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
394            insert_block(pool, *page, tmpoffset, tmpblock);
395
396        if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
397            tmpblock = BLOCK_NEXT(tmpblock);
398            set_blockprev(tmpblock, tmpoffset);
399        }
400    } else {
401        /* This block is exact fit */
402        if (tmpoffset != PAGE_SIZE)
403            clear_flag(tmpblock, PREV_FREE);
404    }
405
406    block->size = origsize;
407    clear_flag(block, BLOCK_FREE);
408
409    put_ptr_atomic(block, KM_USER0);
410    spin_unlock(&pool->lock);
411
412    *offset += XV_ALIGN;
413
414    return 0;
415}
416
417/*
418 * Free block identified with <page, offset>
419 */
420void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
421{
422    void *page_start;
423    struct block_header *block, *tmpblock;
424
425    offset -= XV_ALIGN;
426
427    spin_lock(&pool->lock);
428
429    page_start = get_ptr_atomic(page, 0, KM_USER0);
430    block = (struct block_header *)((char *)page_start + offset);
431
432    /* Catch double free bugs */
433    BUG_ON(test_flag(block, BLOCK_FREE));
434
435    block->size = ALIGN(block->size, XV_ALIGN);
436
437    tmpblock = BLOCK_NEXT(block);
438    if (offset + block->size + XV_ALIGN == PAGE_SIZE)
439        tmpblock = NULL;
440
441    /* Merge next block if its free */
442    if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
443        /*
444         * Blocks smaller than XV_MIN_ALLOC_SIZE
445         * are not inserted in any free list.
446         */
447        if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
448            remove_block(pool, page,
449                    offset + block->size + XV_ALIGN, tmpblock,
450                    get_index_for_insert(tmpblock->size));
451        }
452        block->size += tmpblock->size + XV_ALIGN;
453    }
454
455    /* Merge previous block if its free */
456    if (test_flag(block, PREV_FREE)) {
457        tmpblock = (struct block_header *)((char *)(page_start) +
458                        get_blockprev(block));
459        offset = offset - tmpblock->size - XV_ALIGN;
460
461        if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
462            remove_block(pool, page, offset, tmpblock,
463                    get_index_for_insert(tmpblock->size));
464
465        tmpblock->size += block->size + XV_ALIGN;
466        block = tmpblock;
467    }
468
469    /* No used objects in this page. Free it. */
470    if (block->size == PAGE_SIZE - XV_ALIGN) {
471        put_ptr_atomic(page_start, KM_USER0);
472        spin_unlock(&pool->lock);
473
474        __free_page(page);
475        stat_dec(&pool->total_pages);
476        return;
477    }
478
479    set_flag(block, BLOCK_FREE);
480    if (block->size >= XV_MIN_ALLOC_SIZE)
481        insert_block(pool, page, offset, block);
482
483    if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
484        tmpblock = BLOCK_NEXT(block);
485        set_flag(tmpblock, PREV_FREE);
486        set_blockprev(tmpblock, offset);
487    }
488
489    put_ptr_atomic(page_start, KM_USER0);
490    spin_unlock(&pool->lock);
491}
492
493u32 xv_get_object_size(void *obj)
494{
495    struct block_header *blk;
496
497    blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
498    return blk->size;
499}
500
501/*
502 * Returns total memory used by allocator (userdata + metadata)
503 */
504u64 xv_get_total_size_bytes(struct xv_pool *pool)
505{
506    return pool->total_pages << PAGE_SHIFT;
507}
drivers/staging/ramzswap/xvmalloc.h
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifndef _XV_MALLOC_H_
14#define _XV_MALLOC_H_
15
16#include <linux/types.h>
17
18struct xv_pool;
19
20struct xv_pool *xv_create_pool(void);
21void xv_destroy_pool(struct xv_pool *pool);
22
23int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
24            u32 *offset, gfp_t flags);
25void xv_free(struct xv_pool *pool, struct page *page, u32 offset);
26
27u32 xv_get_object_size(void *obj);
28u64 xv_get_total_size_bytes(struct xv_pool *pool);
29
30#endif
drivers/staging/ramzswap/xvmalloc_int.h
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifndef _XV_MALLOC_INT_H_
14#define _XV_MALLOC_INT_H_
15
16#include <linux/kernel.h>
17#include <linux/types.h>
18
19/* User configurable params */
20
21/* Must be power of two */
22#define XV_ALIGN_SHIFT 2
23#define XV_ALIGN (1 << XV_ALIGN_SHIFT)
24#define XV_ALIGN_MASK (XV_ALIGN - 1)
25
26/* This must be greater than sizeof(link_free) */
27#define XV_MIN_ALLOC_SIZE 32
28#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN)
29
30/* Free lists are separated by FL_DELTA bytes */
31#define FL_DELTA_SHIFT 3
32#define FL_DELTA (1 << FL_DELTA_SHIFT)
33#define FL_DELTA_MASK (FL_DELTA - 1)
34#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
35                / FL_DELTA + 1)
36
37#define MAX_FLI DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG)
38
39/* End of user params */
40
41enum blockflags {
42    BLOCK_FREE,
43    PREV_FREE,
44    __NR_BLOCKFLAGS,
45};
46
47#define FLAGS_MASK XV_ALIGN_MASK
48#define PREV_MASK (~FLAGS_MASK)
49
50struct freelist_entry {
51    struct page *page;
52    u16 offset;
53    u16 pad;
54};
55
56struct link_free {
57    struct page *prev_page;
58    struct page *next_page;
59    u16 prev_offset;
60    u16 next_offset;
61};
62
63struct block_header {
64    union {
65        /* This common header must be XV_ALIGN bytes */
66        u8 common[XV_ALIGN];
67        struct {
68            u16 size;
69            u16 prev;
70        };
71    };
72    struct link_free link;
73};
74
75struct xv_pool {
76    ulong flbitmap;
77    ulong slbitmap[MAX_FLI];
78    spinlock_t lock;
79
80    struct freelist_entry freelist[NUM_FREE_LISTS];
81
82    /* stats */
83    u64 total_pages;
84};
85
86#endif
drivers/staging/zram/Kconfig
1config ZRAM
2    tristate "Compressed RAM block device support"
3    select LZO_COMPRESS
4    select LZO_DECOMPRESS
5    default n
6    help
7      Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
8      Pages written to these disks are compressed and stored in memory
9      itself. These disks allow very fast I/O and compression provides
10      good amounts of memory savings.
11
12      It has several use cases, for example: /tmp storage, use as swap
13      disks and maybe many more.
14
15      See zram.txt for more information.
16      Project home: http://compcache.googlecode.com/
17
18config ZRAM_STATS
19    bool "Enable statistics for compressed RAM disks"
20    depends on ZRAM
21    default y
22    help
23      Enable statistics collection for compressed RAM devices. Statistics
24      are exported through ioctl interface, so you have to use zramconfig
25      program to get them. This adds only a minimal overhead.
26
27      If unsure, say Y.
28
drivers/staging/zram/Makefile
1zram-objs := zram_drv.o xvmalloc.o
2
3obj-$(CONFIG_ZRAM) += zram.o
drivers/staging/zram/xvmalloc.c
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#include <linux/bitops.h>
14#include <linux/errno.h>
15#include <linux/highmem.h>
16#include <linux/init.h>
17#include <linux/string.h>
18#include <linux/slab.h>
19
20#include "xvmalloc.h"
21#include "xvmalloc_int.h"
22
23static void stat_inc(u64 *value)
24{
25    *value = *value + 1;
26}
27
28static void stat_dec(u64 *value)
29{
30    *value = *value - 1;
31}
32
33static int test_flag(struct block_header *block, enum blockflags flag)
34{
35    return block->prev & BIT(flag);
36}
37
38static void set_flag(struct block_header *block, enum blockflags flag)
39{
40    block->prev |= BIT(flag);
41}
42
43static void clear_flag(struct block_header *block, enum blockflags flag)
44{
45    block->prev &= ~BIT(flag);
46}
47
48/*
49 * Given <page, offset> pair, provide a derefrencable pointer.
50 * This is called from xv_malloc/xv_free path, so it
51 * needs to be fast.
52 */
53static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
54{
55    unsigned char *base;
56
57    base = kmap_atomic(page, type);
58    return base + offset;
59}
60
61static void put_ptr_atomic(void *ptr, enum km_type type)
62{
63    kunmap_atomic(ptr, type);
64}
65
66static u32 get_blockprev(struct block_header *block)
67{
68    return block->prev & PREV_MASK;
69}
70
71static void set_blockprev(struct block_header *block, u16 new_offset)
72{
73    block->prev = new_offset | (block->prev & FLAGS_MASK);
74}
75
76static struct block_header *BLOCK_NEXT(struct block_header *block)
77{
78    return (struct block_header *)
79        ((char *)block + block->size + XV_ALIGN);
80}
81
82/*
83 * Get index of free list containing blocks of maximum size
84 * which is less than or equal to given size.
85 */
86static u32 get_index_for_insert(u32 size)
87{
88    if (unlikely(size > XV_MAX_ALLOC_SIZE))
89        size = XV_MAX_ALLOC_SIZE;
90    size &= ~FL_DELTA_MASK;
91    return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
92}
93
94/*
95 * Get index of free list having blocks of size greater than
96 * or equal to requested size.
97 */
98static u32 get_index(u32 size)
99{
100    if (unlikely(size < XV_MIN_ALLOC_SIZE))
101        size = XV_MIN_ALLOC_SIZE;
102    size = ALIGN(size, FL_DELTA);
103    return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
104}
105
106/**
107 * find_block - find block of at least given size
108 * @pool: memory pool to search from
109 * @size: size of block required
110 * @page: page containing required block
111 * @offset: offset within the page where block is located.
112 *
113 * Searches two level bitmap to locate block of at least
114 * the given size. If such a block is found, it provides
115 * <page, offset> to identify this block and returns index
116 * in freelist where we found this block.
117 * Otherwise, returns 0 and <page, offset> params are not touched.
118 */
119static u32 find_block(struct xv_pool *pool, u32 size,
120            struct page **page, u32 *offset)
121{
122    ulong flbitmap, slbitmap;
123    u32 flindex, slindex, slbitstart;
124
125    /* There are no free blocks in this pool */
126    if (!pool->flbitmap)
127        return 0;
128
129    /* Get freelist index correspoding to this size */
130    slindex = get_index(size);
131    slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
132    slbitstart = slindex % BITS_PER_LONG;
133
134    /*
135     * If freelist is not empty at this index, we found the
136     * block - head of this list. This is approximate best-fit match.
137     */
138    if (test_bit(slbitstart, &slbitmap)) {
139        *page = pool->freelist[slindex].page;
140        *offset = pool->freelist[slindex].offset;
141        return slindex;
142    }
143
144    /*
145     * No best-fit found. Search a bit further in bitmap for a free block.
146     * Second level bitmap consists of series of 32-bit chunks. Search
147     * further in the chunk where we expected a best-fit, starting from
148     * index location found above.
149     */
150    slbitstart++;
151    slbitmap >>= slbitstart;
152
153    /* Skip this search if we were already at end of this bitmap chunk */
154    if ((slbitstart != BITS_PER_LONG) && slbitmap) {
155        slindex += __ffs(slbitmap) + 1;
156        *page = pool->freelist[slindex].page;
157        *offset = pool->freelist[slindex].offset;
158        return slindex;
159    }
160
161    /* Now do a full two-level bitmap search to find next nearest fit */
162    flindex = slindex / BITS_PER_LONG;
163
164    flbitmap = (pool->flbitmap) >> (flindex + 1);
165    if (!flbitmap)
166        return 0;
167
168    flindex += __ffs(flbitmap) + 1;
169    slbitmap = pool->slbitmap[flindex];
170    slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
171    *page = pool->freelist[slindex].page;
172    *offset = pool->freelist[slindex].offset;
173
174    return slindex;
175}
176
177/*
178 * Insert block at <page, offset> in freelist of given pool.
179 * freelist used depends on block size.
180 */
181static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
182            struct block_header *block)
183{
184    u32 flindex, slindex;
185    struct block_header *nextblock;
186
187    slindex = get_index_for_insert(block->size);
188    flindex = slindex / BITS_PER_LONG;
189
190    block->link.prev_page = 0;
191    block->link.prev_offset = 0;
192    block->link.next_page = pool->freelist[slindex].page;
193    block->link.next_offset = pool->freelist[slindex].offset;
194    pool->freelist[slindex].page = page;
195    pool->freelist[slindex].offset = offset;
196
197    if (block->link.next_page) {
198        nextblock = get_ptr_atomic(block->link.next_page,
199                    block->link.next_offset, KM_USER1);
200        nextblock->link.prev_page = page;
201        nextblock->link.prev_offset = offset;
202        put_ptr_atomic(nextblock, KM_USER1);
203    }
204
205    __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
206    __set_bit(flindex, &pool->flbitmap);
207}
208
209/*
210 * Remove block from head of freelist. Index 'slindex' identifies the freelist.
211 */
212static void remove_block_head(struct xv_pool *pool,
213            struct block_header *block, u32 slindex)
214{
215    struct block_header *tmpblock;
216    u32 flindex = slindex / BITS_PER_LONG;
217
218    pool->freelist[slindex].page = block->link.next_page;
219    pool->freelist[slindex].offset = block->link.next_offset;
220    block->link.prev_page = 0;
221    block->link.prev_offset = 0;
222
223    if (!pool->freelist[slindex].page) {
224        __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
225        if (!pool->slbitmap[flindex])
226            __clear_bit(flindex, &pool->flbitmap);
227    } else {
228        /*
229         * DEBUG ONLY: We need not reinitialize freelist head previous
230         * pointer to 0 - we never depend on its value. But just for
231         * sanity, lets do it.
232         */
233        tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
234                pool->freelist[slindex].offset, KM_USER1);
235        tmpblock->link.prev_page = 0;
236        tmpblock->link.prev_offset = 0;
237        put_ptr_atomic(tmpblock, KM_USER1);
238    }
239}
240
241/*
242 * Remove block from freelist. Index 'slindex' identifies the freelist.
243 */
244static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
245            struct block_header *block, u32 slindex)
246{
247    u32 flindex;
248    struct block_header *tmpblock;
249
250    if (pool->freelist[slindex].page == page
251       && pool->freelist[slindex].offset == offset) {
252        remove_block_head(pool, block, slindex);
253        return;
254    }
255
256    flindex = slindex / BITS_PER_LONG;
257
258    if (block->link.prev_page) {
259        tmpblock = get_ptr_atomic(block->link.prev_page,
260                block->link.prev_offset, KM_USER1);
261        tmpblock->link.next_page = block->link.next_page;
262        tmpblock->link.next_offset = block->link.next_offset;
263        put_ptr_atomic(tmpblock, KM_USER1);
264    }
265
266    if (block->link.next_page) {
267        tmpblock = get_ptr_atomic(block->link.next_page,
268                block->link.next_offset, KM_USER1);
269        tmpblock->link.prev_page = block->link.prev_page;
270        tmpblock->link.prev_offset = block->link.prev_offset;
271        put_ptr_atomic(tmpblock, KM_USER1);
272    }
273}
274
275/*
276 * Allocate a page and add it to freelist of given pool.
277 */
278static int grow_pool(struct xv_pool *pool, gfp_t flags)
279{
280    struct page *page;
281    struct block_header *block;
282
283    page = alloc_page(flags);
284    if (unlikely(!page))
285        return -ENOMEM;
286
287    stat_inc(&pool->total_pages);
288
289    spin_lock(&pool->lock);
290    block = get_ptr_atomic(page, 0, KM_USER0);
291
292    block->size = PAGE_SIZE - XV_ALIGN;
293    set_flag(block, BLOCK_FREE);
294    clear_flag(block, PREV_FREE);
295    set_blockprev(block, 0);
296
297    insert_block(pool, page, 0, block);
298
299    put_ptr_atomic(block, KM_USER0);
300    spin_unlock(&pool->lock);
301
302    return 0;
303}
304
305/*
306 * Create a memory pool. Allocates freelist, bitmaps and other
307 * per-pool metadata.
308 */
309struct xv_pool *xv_create_pool(void)
310{
311    u32 ovhd_size;
312    struct xv_pool *pool;
313
314    ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
315    pool = kzalloc(ovhd_size, GFP_KERNEL);
316    if (!pool)
317        return NULL;
318
319    spin_lock_init(&pool->lock);
320
321    return pool;
322}
323
324void xv_destroy_pool(struct xv_pool *pool)
325{
326    kfree(pool);
327}
328
329/**
330 * xv_malloc - Allocate block of given size from pool.
331 * @pool: pool to allocate from
332 * @size: size of block to allocate
333 * @page: page no. that holds the object
334 * @offset: location of object within page
335 *
336 * On success, <page, offset> identifies block allocated
337 * and 0 is returned. On failure, <page, offset> is set to
338 * 0 and -ENOMEM is returned.
339 *
340 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
341 */
342int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
343        u32 *offset, gfp_t flags)
344{
345    int error;
346    u32 index, tmpsize, origsize, tmpoffset;
347    struct block_header *block, *tmpblock;
348
349    *page = NULL;
350    *offset = 0;
351    origsize = size;
352
353    if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
354        return -ENOMEM;
355
356    size = ALIGN(size, XV_ALIGN);
357
358    spin_lock(&pool->lock);
359
360    index = find_block(pool, size, page, offset);
361
362    if (!*page) {
363        spin_unlock(&pool->lock);
364        if (flags & GFP_NOWAIT)
365            return -ENOMEM;
366        error = grow_pool(pool, flags);
367        if (unlikely(error))
368            return error;
369
370        spin_lock(&pool->lock);
371        index = find_block(pool, size, page, offset);
372    }
373
374    if (!*page) {
375        spin_unlock(&pool->lock);
376        return -ENOMEM;
377    }
378
379    block = get_ptr_atomic(*page, *offset, KM_USER0);
380
381    remove_block_head(pool, block, index);
382
383    /* Split the block if required */
384    tmpoffset = *offset + size + XV_ALIGN;
385    tmpsize = block->size - size;
386    tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
387    if (tmpsize) {
388        tmpblock->size = tmpsize - XV_ALIGN;
389        set_flag(tmpblock, BLOCK_FREE);
390        clear_flag(tmpblock, PREV_FREE);
391
392        set_blockprev(tmpblock, *offset);
393        if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
394            insert_block(pool, *page, tmpoffset, tmpblock);
395
396        if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
397            tmpblock = BLOCK_NEXT(tmpblock);
398            set_blockprev(tmpblock, tmpoffset);
399        }
400    } else {
401        /* This block is exact fit */
402        if (tmpoffset != PAGE_SIZE)
403            clear_flag(tmpblock, PREV_FREE);
404    }
405
406    block->size = origsize;
407    clear_flag(block, BLOCK_FREE);
408
409    put_ptr_atomic(block, KM_USER0);
410    spin_unlock(&pool->lock);
411
412    *offset += XV_ALIGN;
413
414    return 0;
415}
416
417/*
418 * Free block identified with <page, offset>
419 */
420void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
421{
422    void *page_start;
423    struct block_header *block, *tmpblock;
424
425    offset -= XV_ALIGN;
426
427    spin_lock(&pool->lock);
428
429    page_start = get_ptr_atomic(page, 0, KM_USER0);
430    block = (struct block_header *)((char *)page_start + offset);
431
432    /* Catch double free bugs */
433    BUG_ON(test_flag(block, BLOCK_FREE));
434
435    block->size = ALIGN(block->size, XV_ALIGN);
436
437    tmpblock = BLOCK_NEXT(block);
438    if (offset + block->size + XV_ALIGN == PAGE_SIZE)
439        tmpblock = NULL;
440
441    /* Merge next block if its free */
442    if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
443        /*
444         * Blocks smaller than XV_MIN_ALLOC_SIZE
445         * are not inserted in any free list.
446         */
447        if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
448            remove_block(pool, page,
449                    offset + block->size + XV_ALIGN, tmpblock,
450                    get_index_for_insert(tmpblock->size));
451        }
452        block->size += tmpblock->size + XV_ALIGN;
453    }
454
455    /* Merge previous block if its free */
456    if (test_flag(block, PREV_FREE)) {
457        tmpblock = (struct block_header *)((char *)(page_start) +
458                        get_blockprev(block));
459        offset = offset - tmpblock->size - XV_ALIGN;
460
461        if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
462            remove_block(pool, page, offset, tmpblock,
463                    get_index_for_insert(tmpblock->size));
464
465        tmpblock->size += block->size + XV_ALIGN;
466        block = tmpblock;
467    }
468
469    /* No used objects in this page. Free it. */
470    if (block->size == PAGE_SIZE - XV_ALIGN) {
471        put_ptr_atomic(page_start, KM_USER0);
472        spin_unlock(&pool->lock);
473
474        __free_page(page);
475        stat_dec(&pool->total_pages);
476        return;
477    }
478
479    set_flag(block, BLOCK_FREE);
480    if (block->size >= XV_MIN_ALLOC_SIZE)
481        insert_block(pool, page, offset, block);
482
483    if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
484        tmpblock = BLOCK_NEXT(block);
485        set_flag(tmpblock, PREV_FREE);
486        set_blockprev(tmpblock, offset);
487    }
488
489    put_ptr_atomic(page_start, KM_USER0);
490    spin_unlock(&pool->lock);
491}
492
493u32 xv_get_object_size(void *obj)
494{
495    struct block_header *blk;
496
497    blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
498    return blk->size;
499}
500
501/*
502 * Returns total memory used by allocator (userdata + metadata)
503 */
504u64 xv_get_total_size_bytes(struct xv_pool *pool)
505{
506    return pool->total_pages << PAGE_SHIFT;
507}
drivers/staging/zram/xvmalloc.h
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifndef _XV_MALLOC_H_
14#define _XV_MALLOC_H_
15
16#include <linux/types.h>
17
18struct xv_pool;
19
20struct xv_pool *xv_create_pool(void);
21void xv_destroy_pool(struct xv_pool *pool);
22
23int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
24            u32 *offset, gfp_t flags);
25void xv_free(struct xv_pool *pool, struct page *page, u32 offset);
26
27u32 xv_get_object_size(void *obj);
28u64 xv_get_total_size_bytes(struct xv_pool *pool);
29
30#endif
drivers/staging/zram/xvmalloc_int.h
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifndef _XV_MALLOC_INT_H_
14#define _XV_MALLOC_INT_H_
15
16#include <linux/kernel.h>
17#include <linux/types.h>
18
19/* User configurable params */
20
21/* Must be power of two */
22#define XV_ALIGN_SHIFT 2
23#define XV_ALIGN (1 << XV_ALIGN_SHIFT)
24#define XV_ALIGN_MASK (XV_ALIGN - 1)
25
26/* This must be greater than sizeof(link_free) */
27#define XV_MIN_ALLOC_SIZE 32
28#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN)
29
30/* Free lists are separated by FL_DELTA bytes */
31#define FL_DELTA_SHIFT 3
32#define FL_DELTA (1 << FL_DELTA_SHIFT)
33#define FL_DELTA_MASK (FL_DELTA - 1)
34#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
35                / FL_DELTA + 1)
36
37#define MAX_FLI DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG)
38
39/* End of user params */
40
41enum blockflags {
42    BLOCK_FREE,
43    PREV_FREE,
44    __NR_BLOCKFLAGS,
45};
46
47#define FLAGS_MASK XV_ALIGN_MASK
48#define PREV_MASK (~FLAGS_MASK)
49
50struct freelist_entry {
51    struct page *page;
52    u16 offset;
53    u16 pad;
54};
55
56struct link_free {
57    struct page *prev_page;
58    struct page *next_page;
59    u16 prev_offset;
60    u16 next_offset;
61};
62
63struct block_header {
64    union {
65        /* This common header must be XV_ALIGN bytes */
66        u8 common[XV_ALIGN];
67        struct {
68            u16 size;
69            u16 prev;
70        };
71    };
72    struct link_free link;
73};
74
75struct xv_pool {
76    ulong flbitmap;
77    ulong slbitmap[MAX_FLI];
78    spinlock_t lock;
79
80    struct freelist_entry freelist[NUM_FREE_LISTS];
81
82    /* stats */
83    u64 total_pages;
84};
85
86#endif
drivers/staging/zram/zram.txt
1ramzswap: Compressed RAM based swap device
2-------------------------------------------
3
4Project home: http://compcache.googlecode.com/
5
6* Introduction
7
8The ramzswap module creates RAM based block devices which can (only) be used as
9swap disks. Pages swapped to these devices are compressed and stored in memory
10itself. See project home for use cases, performance numbers and a lot more.
11
12Individual ramzswap devices are configured and initialized using rzscontrol
13userspace utility as shown in examples below. See rzscontrol man page for more
14details.
15
16* Usage
17
18Following shows a typical sequence of steps for using ramzswap.
19
201) Load Modules:
21    modprobe ramzswap num_devices=4
22    This creates 4 (uninitialized) devices: /dev/ramzswap{0,1,2,3}
23    (num_devices parameter is optional. Default: 1)
24
252) Initialize:
26    Use rzscontrol utility to configure and initialize individual
27    ramzswap devices. Example:
28    rzscontrol /dev/ramzswap2 --init # uses default value of disksize_kb
29
30    *See rzscontrol man page for more details and examples*
31
323) Activate:
33    swapon /dev/ramzswap2 # or any other initialized ramzswap device
34
354) Stats:
36    rzscontrol /dev/ramzswap2 --stats
37
385) Deactivate:
39    swapoff /dev/ramzswap2
40
416) Reset:
42    rzscontrol /dev/ramzswap2 --reset
43    (This frees all the memory allocated for this device).
44
45
46Please report any problems at:
47 - Mailing list: linux-mm-cc at laptop dot org
48 - Issue tracker: http://code.google.com/p/compcache/issues/list
49
50Nitin Gupta
51ngupta@vflare.org
drivers/staging/zram/zram_drv.c
1/*
2 * Compressed RAM based swap device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#define KMSG_COMPONENT "ramzswap"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
20#include <linux/bitops.h>
21#include <linux/blkdev.h>
22#include <linux/buffer_head.h>
23#include <linux/device.h>
24#include <linux/genhd.h>
25#include <linux/highmem.h>
26#include <linux/slab.h>
27#include <linux/lzo.h>
28#include <linux/string.h>
29#include <linux/swap.h>
30#include <linux/swapops.h>
31#include <linux/vmalloc.h>
32
33#include "zram_drv.h"
34
35/* Globals */
36static int ramzswap_major;
37static struct ramzswap *devices;
38
39/* Module params (documentation at end) */
40static unsigned int num_devices;
41
42static int rzs_test_flag(struct ramzswap *rzs, u32 index,
43            enum rzs_pageflags flag)
44{
45    return rzs->table[index].flags & BIT(flag);
46}
47
48static void rzs_set_flag(struct ramzswap *rzs, u32 index,
49            enum rzs_pageflags flag)
50{
51    rzs->table[index].flags |= BIT(flag);
52}
53
54static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
55            enum rzs_pageflags flag)
56{
57    rzs->table[index].flags &= ~BIT(flag);
58}
59
60static int page_zero_filled(void *ptr)
61{
62    unsigned int pos;
63    unsigned long *page;
64
65    page = (unsigned long *)ptr;
66
67    for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
68        if (page[pos])
69            return 0;
70    }
71
72    return 1;
73}
74
75static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
76{
77    if (!rzs->disksize) {
78        pr_info(
79        "disk size not provided. You can use disksize_kb module "
80        "param to specify size.\nUsing default: (%u%% of RAM).\n",
81        default_disksize_perc_ram
82        );
83        rzs->disksize = default_disksize_perc_ram *
84                    (totalram_bytes / 100);
85    }
86
87    if (rzs->disksize > 2 * (totalram_bytes)) {
88        pr_info(
89        "There is little point creating a ramzswap of greater than "
90        "twice the size of memory since we expect a 2:1 compression "
91        "ratio. Note that ramzswap uses about 0.1%% of the size of "
92        "the swap device when not in use so a huge ramzswap is "
93        "wasteful.\n"
94        "\tMemory Size: %zu kB\n"
95        "\tSize you selected: %zu kB\n"
96        "Continuing anyway ...\n",
97        totalram_bytes >> 10, rzs->disksize
98        );
99    }
100
101    rzs->disksize &= PAGE_MASK;
102}
103
104static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
105            struct ramzswap_ioctl_stats *s)
106{
107    s->disksize = rzs->disksize;
108
109#if defined(CONFIG_RAMZSWAP_STATS)
110    {
111    struct ramzswap_stats *rs = &rzs->stats;
112    size_t succ_writes, mem_used;
113    unsigned int good_compress_perc = 0, no_compress_perc = 0;
114
115    mem_used = xv_get_total_size_bytes(rzs->mem_pool)
116            + (rs->pages_expand << PAGE_SHIFT);
117    succ_writes = rzs_stat64_read(rzs, &rs->num_writes) -
118            rzs_stat64_read(rzs, &rs->failed_writes);
119
120    if (succ_writes && rs->pages_stored) {
121        good_compress_perc = rs->good_compress * 100
122                    / rs->pages_stored;
123        no_compress_perc = rs->pages_expand * 100
124                    / rs->pages_stored;
125    }
126
127    s->num_reads = rzs_stat64_read(rzs, &rs->num_reads);
128    s->num_writes = rzs_stat64_read(rzs, &rs->num_writes);
129    s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads);
130    s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes);
131    s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io);
132    s->notify_free = rzs_stat64_read(rzs, &rs->notify_free);
133    s->pages_zero = rs->pages_zero;
134
135    s->good_compress_pct = good_compress_perc;
136    s->pages_expand_pct = no_compress_perc;
137
138    s->pages_stored = rs->pages_stored;
139    s->pages_used = mem_used >> PAGE_SHIFT;
140    s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
141    s->compr_data_size = rs->compr_size;
142    s->mem_used_total = mem_used;
143    }
144#endif /* CONFIG_RAMZSWAP_STATS */
145}
146
147static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
148{
149    u32 clen;
150    void *obj;
151
152    struct page *page = rzs->table[index].page;
153    u32 offset = rzs->table[index].offset;
154
155    if (unlikely(!page)) {
156        /*
157         * No memory is allocated for zero filled pages.
158         * Simply clear zero page flag.
159         */
160        if (rzs_test_flag(rzs, index, RZS_ZERO)) {
161            rzs_clear_flag(rzs, index, RZS_ZERO);
162            rzs_stat_dec(&rzs->stats.pages_zero);
163        }
164        return;
165    }
166
167    if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
168        clen = PAGE_SIZE;
169        __free_page(page);
170        rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
171        rzs_stat_dec(&rzs->stats.pages_expand);
172        goto out;
173    }
174
175    obj = kmap_atomic(page, KM_USER0) + offset;
176    clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
177    kunmap_atomic(obj, KM_USER0);
178
179    xv_free(rzs->mem_pool, page, offset);
180    if (clen <= PAGE_SIZE / 2)
181        rzs_stat_dec(&rzs->stats.good_compress);
182
183out:
184    rzs->stats.compr_size -= clen;
185    rzs_stat_dec(&rzs->stats.pages_stored);
186
187    rzs->table[index].page = NULL;
188    rzs->table[index].offset = 0;
189}
190
191static void handle_zero_page(struct page *page)
192{
193    void *user_mem;
194
195    user_mem = kmap_atomic(page, KM_USER0);
196    memset(user_mem, 0, PAGE_SIZE);
197    kunmap_atomic(user_mem, KM_USER0);
198
199    flush_dcache_page(page);
200}
201
202static void handle_uncompressed_page(struct ramzswap *rzs,
203                struct page *page, u32 index)
204{
205    unsigned char *user_mem, *cmem;
206
207    user_mem = kmap_atomic(page, KM_USER0);
208    cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
209            rzs->table[index].offset;
210
211    memcpy(user_mem, cmem, PAGE_SIZE);
212    kunmap_atomic(user_mem, KM_USER0);
213    kunmap_atomic(cmem, KM_USER1);
214
215    flush_dcache_page(page);
216}
217
218static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
219{
220
221    int i;
222    u32 index;
223    struct bio_vec *bvec;
224
225    rzs_stat64_inc(rzs, &rzs->stats.num_reads);
226
227    index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
228    bio_for_each_segment(bvec, bio, i) {
229        int ret;
230        size_t clen;
231        struct page *page;
232        struct zobj_header *zheader;
233        unsigned char *user_mem, *cmem;
234
235        page = bvec->bv_page;
236
237        if (rzs_test_flag(rzs, index, RZS_ZERO)) {
238            handle_zero_page(page);
239            continue;
240        }
241
242        /* Requested page is not present in compressed area */
243        if (unlikely(!rzs->table[index].page)) {
244            pr_debug("Read before write: sector=%lu, size=%u",
245                (ulong)(bio->bi_sector), bio->bi_size);
246            /* Do nothing */
247            continue;
248        }
249
250        /* Page is stored uncompressed since it's incompressible */
251        if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
252            handle_uncompressed_page(rzs, page, index);
253            continue;
254        }
255
256        user_mem = kmap_atomic(page, KM_USER0);
257        clen = PAGE_SIZE;
258
259        cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
260                rzs->table[index].offset;
261
262        ret = lzo1x_decompress_safe(
263            cmem + sizeof(*zheader),
264            xv_get_object_size(cmem) - sizeof(*zheader),
265            user_mem, &clen);
266
267        kunmap_atomic(user_mem, KM_USER0);
268        kunmap_atomic(cmem, KM_USER1);
269
270        /* Should NEVER happen. Return bio error if it does. */
271        if (unlikely(ret != LZO_E_OK)) {
272            pr_err("Decompression failed! err=%d, page=%u\n",
273                ret, index);
274            rzs_stat64_inc(rzs, &rzs->stats.failed_reads);
275            goto out;
276        }
277
278        flush_dcache_page(page);
279        index++;
280    }
281
282    set_bit(BIO_UPTODATE, &bio->bi_flags);
283    bio_endio(bio, 0);
284    return 0;
285
286out:
287    bio_io_error(bio);
288    return 0;
289}
290
291static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
292{
293    int i;
294    u32 index;
295    struct bio_vec *bvec;
296
297    rzs_stat64_inc(rzs, &rzs->stats.num_writes);
298
299    index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
300
301    bio_for_each_segment(bvec, bio, i) {
302        int ret;
303        u32 offset;
304        size_t clen;
305        struct zobj_header *zheader;
306        struct page *page, *page_store;
307        unsigned char *user_mem, *cmem, *src;
308
309        page = bvec->bv_page;
310        src = rzs->compress_buffer;
311
312        /*
313         * System overwrites unused sectors. Free memory associated
314         * with this sector now.
315         */
316        if (rzs->table[index].page ||
317                rzs_test_flag(rzs, index, RZS_ZERO))
318            ramzswap_free_page(rzs, index);
319
320        mutex_lock(&rzs->lock);
321
322        user_mem = kmap_atomic(page, KM_USER0);
323        if (page_zero_filled(user_mem)) {
324            kunmap_atomic(user_mem, KM_USER0);
325            mutex_unlock(&rzs->lock);
326            rzs_stat_inc(&rzs->stats.pages_zero);
327            rzs_set_flag(rzs, index, RZS_ZERO);
328            continue;
329        }
330
331        ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
332                    rzs->compress_workmem);
333
334        kunmap_atomic(user_mem, KM_USER0);
335
336        if (unlikely(ret != LZO_E_OK)) {
337            mutex_unlock(&rzs->lock);
338            pr_err("Compression failed! err=%d\n", ret);
339            rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
340            goto out;
341        }
342
343        /*
344         * Page is incompressible. Store it as-is (uncompressed)
345         * since we do not want to return too many swap write
346         * errors which has side effect of hanging the system.
347         */
348        if (unlikely(clen > max_zpage_size)) {
349            clen = PAGE_SIZE;
350            page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
351            if (unlikely(!page_store)) {
352                mutex_unlock(&rzs->lock);
353                pr_info("Error allocating memory for "
354                    "incompressible page: %u\n", index);
355                rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
356                goto out;
357            }
358
359            offset = 0;
360            rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
361            rzs_stat_inc(&rzs->stats.pages_expand);
362            rzs->table[index].page = page_store;
363            src = kmap_atomic(page, KM_USER0);
364            goto memstore;
365        }
366
367        if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
368                &rzs->table[index].page, &offset,
369                GFP_NOIO | __GFP_HIGHMEM)) {
370            mutex_unlock(&rzs->lock);
371            pr_info("Error allocating memory for compressed "
372                "page: %u, size=%zu\n", index, clen);
373            rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
374            goto out;
375        }
376
377memstore:
378        rzs->table[index].offset = offset;
379
380        cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
381                rzs->table[index].offset;
382
383#if 0
384        /* Back-reference needed for memory defragmentation */
385        if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
386            zheader = (struct zobj_header *)cmem;
387            zheader->table_idx = index;
388            cmem += sizeof(*zheader);
389        }
390#endif
391
392        memcpy(cmem, src, clen);
393
394        kunmap_atomic(cmem, KM_USER1);
395        if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
396            kunmap_atomic(src, KM_USER0);
397
398        /* Update stats */
399        rzs->stats.compr_size += clen;
400        rzs_stat_inc(&rzs->stats.pages_stored);
401        if (clen <= PAGE_SIZE / 2)
402            rzs_stat_inc(&rzs->stats.good_compress);
403
404        mutex_unlock(&rzs->lock);
405        index++;
406    }
407
408    set_bit(BIO_UPTODATE, &bio->bi_flags);
409    bio_endio(bio, 0);
410    return 0;
411
412out:
413    bio_io_error(bio);
414    return 0;
415}
416
417/*
418 * Check if request is within bounds and page aligned.
419 */
420static inline int valid_io_request(struct ramzswap *rzs, struct bio *bio)
421{
422    if (unlikely(
423        (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
424        (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
425        (bio->bi_size & (PAGE_SIZE - 1)))) {
426
427        return 0;
428    }
429
430    /* I/O request is valid */
431    return 1;
432}
433
434/*
435 * Handler function for all ramzswap I/O requests.
436 */
437static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
438{
439    int ret = 0;
440    struct ramzswap *rzs = queue->queuedata;
441
442    if (unlikely(!rzs->init_done)) {
443        bio_io_error(bio);
444        return 0;
445    }
446
447    if (!valid_io_request(rzs, bio)) {
448        rzs_stat64_inc(rzs, &rzs->stats.invalid_io);
449        bio_io_error(bio);
450        return 0;
451    }
452
453    switch (bio_data_dir(bio)) {
454    case READ:
455        ret = ramzswap_read(rzs, bio);
456        break;
457
458    case WRITE:
459        ret = ramzswap_write(rzs, bio);
460        break;
461    }
462
463    return ret;
464}
465
466static void reset_device(struct ramzswap *rzs)
467{
468    size_t index;
469
470    /* Do not accept any new I/O request */
471    rzs->init_done = 0;
472
473    /* Free various per-device buffers */
474    kfree(rzs->compress_workmem);
475    free_pages((unsigned long)rzs->compress_buffer, 1);
476
477    rzs->compress_workmem = NULL;
478    rzs->compress_buffer = NULL;
479
480    /* Free all pages that are still in this ramzswap device */
481    for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) {
482        struct page *page;
483        u16 offset;
484
485        page = rzs->table[index].page;
486        offset = rzs->table[index].offset;
487
488        if (!page)
489            continue;
490
491        if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
492            __free_page(page);
493        else
494            xv_free(rzs->mem_pool, page, offset);
495    }
496
497    vfree(rzs->table);
498    rzs->table = NULL;
499
500    xv_destroy_pool(rzs->mem_pool);
501    rzs->mem_pool = NULL;
502
503    /* Reset stats */
504    memset(&rzs->stats, 0, sizeof(rzs->stats));
505
506    rzs->disksize = 0;
507}
508
509static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
510{
511    int ret;
512    size_t num_pages;
513
514    if (rzs->init_done) {
515        pr_info("Device already initialized!\n");
516        return -EBUSY;
517    }
518
519    ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
520
521    rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
522    if (!rzs->compress_workmem) {
523        pr_err("Error allocating compressor working memory!\n");
524        ret = -ENOMEM;
525        goto fail;
526    }
527
528    rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
529    if (!rzs->compress_buffer) {
530        pr_err("Error allocating compressor buffer space\n");
531        ret = -ENOMEM;
532        goto fail;
533    }
534
535    num_pages = rzs->disksize >> PAGE_SHIFT;
536    rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
537    if (!rzs->table) {
538        pr_err("Error allocating ramzswap address table\n");
539        /* To prevent accessing table entries during cleanup */
540        rzs->disksize = 0;
541        ret = -ENOMEM;
542        goto fail;
543    }
544    memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
545
546    set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
547
548    /* ramzswap devices sort of resembles non-rotational disks */
549    queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
550
551    rzs->mem_pool = xv_create_pool();
552    if (!rzs->mem_pool) {
553        pr_err("Error creating memory pool\n");
554        ret = -ENOMEM;
555        goto fail;
556    }
557
558    rzs->init_done = 1;
559
560    pr_debug("Initialization done!\n");
561    return 0;
562
563fail:
564    reset_device(rzs);
565
566    pr_err("Initialization failed: err=%d\n", ret);
567    return ret;
568}
569
570static int ramzswap_ioctl_reset_device(struct ramzswap *rzs)
571{
572    if (rzs->init_done)
573        reset_device(rzs);
574
575    return 0;
576}
577
578static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
579            unsigned int cmd, unsigned long arg)
580{
581    int ret = 0;
582    size_t disksize_kb;
583
584    struct ramzswap *rzs = bdev->bd_disk->private_data;
585
586    switch (cmd) {
587    case RZSIO_SET_DISKSIZE_KB:
588        if (rzs->init_done) {
589            ret = -EBUSY;
590            goto out;
591        }
592        if (copy_from_user(&disksize_kb, (void *)arg,
593                        _IOC_SIZE(cmd))) {
594            ret = -EFAULT;
595            goto out;
596        }
597        rzs->disksize = disksize_kb << 10;
598        pr_info("Disk size set to %zu kB\n", disksize_kb);
599        break;
600
601    case RZSIO_GET_STATS:
602    {
603        struct ramzswap_ioctl_stats *stats;
604        if (!rzs->init_done) {
605            ret = -ENOTTY;
606            goto out;
607        }
608        stats = kzalloc(sizeof(*stats), GFP_KERNEL);
609        if (!stats) {
610            ret = -ENOMEM;
611            goto out;
612        }
613        ramzswap_ioctl_get_stats(rzs, stats);
614        if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
615            kfree(stats);
616            ret = -EFAULT;
617            goto out;
618        }
619        kfree(stats);
620        break;
621    }
622    case RZSIO_INIT:
623        ret = ramzswap_ioctl_init_device(rzs);
624        break;
625
626    case RZSIO_RESET:
627        /* Do not reset an active device! */
628        if (bdev->bd_holders) {
629            ret = -EBUSY;
630            goto out;
631        }
632
633        /* Make sure all pending I/O is finished */
634        if (bdev)
635            fsync_bdev(bdev);
636
637        ret = ramzswap_ioctl_reset_device(rzs);
638        break;
639
640    default:
641        pr_info("Invalid ioctl %u\n", cmd);
642        ret = -ENOTTY;
643    }
644
645out:
646    return ret;
647}
648
649void ramzswap_slot_free_notify(struct block_device *bdev, unsigned long index)
650{
651    struct ramzswap *rzs;
652
653    rzs = bdev->bd_disk->private_data;
654    ramzswap_free_page(rzs, index);
655    rzs_stat64_inc(rzs, &rzs->stats.notify_free);
656}
657
658static const struct block_device_operations ramzswap_devops = {
659    .ioctl = ramzswap_ioctl,
660    .swap_slot_free_notify = ramzswap_slot_free_notify,
661    .owner = THIS_MODULE
662};
663
664static int create_device(struct ramzswap *rzs, int device_id)
665{
666    int ret = 0;
667
668    mutex_init(&rzs->lock);
669    spin_lock_init(&rzs->stat64_lock);
670
671    rzs->queue = blk_alloc_queue(GFP_KERNEL);
672    if (!rzs->queue) {
673        pr_err("Error allocating disk queue for device %d\n",
674            device_id);
675        ret = -ENOMEM;
676        goto out;
677    }
678
679    blk_queue_make_request(rzs->queue, ramzswap_make_request);
680    rzs->queue->queuedata = rzs;
681
682     /* gendisk structure */
683    rzs->disk = alloc_disk(1);
684    if (!rzs->disk) {
685        blk_cleanup_queue(rzs->queue);
686        pr_warning("Error allocating disk structure for device %d\n",
687            device_id);
688        ret = -ENOMEM;
689        goto out;
690    }
691
692    rzs->disk->major = ramzswap_major;
693    rzs->disk->first_minor = device_id;
694    rzs->disk->fops = &ramzswap_devops;
695    rzs->disk->queue = rzs->queue;
696    rzs->disk->private_data = rzs;
697    snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
698
699    /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */
700    set_capacity(rzs->disk, 0);
701
702    /*
703     * To ensure that we always get PAGE_SIZE aligned
704     * and n*PAGE_SIZED sized I/O requests.
705     */
706    blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
707    blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
708    blk_queue_io_min(rzs->disk->queue, PAGE_SIZE);
709    blk_queue_io_opt(rzs->disk->queue, PAGE_SIZE);
710
711    add_disk(rzs->disk);
712
713    rzs->init_done = 0;
714
715out:
716    return ret;
717}
718
719static void destroy_device(struct ramzswap *rzs)
720{
721    if (rzs->disk) {
722        del_gendisk(rzs->disk);
723        put_disk(rzs->disk);
724    }
725
726    if (rzs->queue)
727        blk_cleanup_queue(rzs->queue);
728}
729
730static int __init ramzswap_init(void)
731{
732    int ret, dev_id;
733
734    if (num_devices > max_num_devices) {
735        pr_warning("Invalid value for num_devices: %u\n",
736                num_devices);
737        ret = -EINVAL;
738        goto out;
739    }
740
741    ramzswap_major = register_blkdev(0, "ramzswap");
742    if (ramzswap_major <= 0) {
743        pr_warning("Unable to get major number\n");
744        ret = -EBUSY;
745        goto out;
746    }
747
748    if (!num_devices) {
749        pr_info("num_devices not specified. Using default: 1\n");
750        num_devices = 1;
751    }
752
753    /* Allocate the device array and initialize each one */
754    pr_info("Creating %u devices ...\n", num_devices);
755    devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
756    if (!devices) {
757        ret = -ENOMEM;
758        goto unregister;
759    }
760
761    for (dev_id = 0; dev_id < num_devices; dev_id++) {
762        ret = create_device(&devices[dev_id], dev_id);
763        if (ret)
764            goto free_devices;
765    }
766
767    return 0;
768
769free_devices:
770    while (dev_id)
771        destroy_device(&devices[--dev_id]);
772unregister:
773    unregister_blkdev(ramzswap_major, "ramzswap");
774out:
775    return ret;
776}
777
778static void __exit ramzswap_exit(void)
779{
780    int i;
781    struct ramzswap *rzs;
782
783    for (i = 0; i < num_devices; i++) {
784        rzs = &devices[i];
785
786        destroy_device(rzs);
787        if (rzs->init_done)
788            reset_device(rzs);
789    }
790
791    unregister_blkdev(ramzswap_major, "ramzswap");
792
793    kfree(devices);
794    pr_debug("Cleanup done!\n");
795}
796
797module_param(num_devices, uint, 0);
798MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
799
800module_init(ramzswap_init);
801module_exit(ramzswap_exit);
802
803MODULE_LICENSE("Dual BSD/GPL");
804MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
805MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
drivers/staging/zram/zram_drv.h
1/*
2 * Compressed RAM based swap device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#ifndef _RAMZSWAP_DRV_H_
16#define _RAMZSWAP_DRV_H_
17
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20
21#include "zram_ioctl.h"
22#include "xvmalloc.h"
23
24/*
25 * Some arbitrary value. This is just to catch
26 * invalid value for num_devices module parameter.
27 */
28static const unsigned max_num_devices = 32;
29
30/*
31 * Stored at beginning of each compressed object.
32 *
33 * It stores back-reference to table entry which points to this
34 * object. This is required to support memory defragmentation.
35 */
36struct zobj_header {
37#if 0
38    u32 table_idx;
39#endif
40};
41
42/*-- Configurable parameters */
43
44/* Default ramzswap disk size: 25% of total RAM */
45static const unsigned default_disksize_perc_ram = 25;
46
47/*
48 * Pages that compress to size greater than this are stored
49 * uncompressed in memory.
50 */
51static const unsigned max_zpage_size = PAGE_SIZE / 4 * 3;
52
53/*
54 * NOTE: max_zpage_size must be less than or equal to:
55 * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
56 * otherwise, xv_malloc() would always return failure.
57 */
58
59/*-- End of configurable params */
60
61#define SECTOR_SHIFT 9
62#define SECTOR_SIZE (1 << SECTOR_SHIFT)
63#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
64#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
65
66/* Flags for ramzswap pages (table[page_no].flags) */
67enum rzs_pageflags {
68    /* Page is stored uncompressed */
69    RZS_UNCOMPRESSED,
70
71    /* Page consists entirely of zeros */
72    RZS_ZERO,
73
74    __NR_RZS_PAGEFLAGS,
75};
76
77/*-- Data structures */
78
79/*
80 * Allocated for each swap slot, indexed by page no.
81 * These table entries must fit exactly in a page.
82 */
83struct table {
84    struct page *page;
85    u16 offset;
86    u8 count; /* object ref count (not yet used) */
87    u8 flags;
88} __attribute__((aligned(4)));
89
90struct ramzswap_stats {
91    /* basic stats */
92    size_t compr_size; /* compressed size of pages stored -
93                 * needed to enforce memlimit */
94    /* more stats */
95#if defined(CONFIG_RAMZSWAP_STATS)
96    u64 num_reads; /* failed + successful */
97    u64 num_writes; /* --do-- */
98    u64 failed_reads; /* should NEVER! happen */
99    u64 failed_writes; /* can happen when memory is too low */
100    u64 invalid_io; /* non-swap I/O requests */
101    u64 notify_free; /* no. of swap slot free notifications */
102    u32 pages_zero; /* no. of zero filled pages */
103    u32 pages_stored; /* no. of pages currently stored */
104    u32 good_compress; /* % of pages with compression ratio<=50% */
105    u32 pages_expand; /* % of incompressible pages */
106#endif
107};
108
109struct ramzswap {
110    struct xv_pool *mem_pool;
111    void *compress_workmem;
112    void *compress_buffer;
113    struct table *table;
114    spinlock_t stat64_lock; /* protect 64-bit stats */
115    struct mutex lock; /* protect compression buffers against
116                 * concurrent writes */
117    struct request_queue *queue;
118    struct gendisk *disk;
119    int init_done;
120    /*
121     * This is limit on amount of *uncompressed* worth of data
122     * we can hold. When backing swap device is provided, it is
123     * set equal to device size.
124     */
125    size_t disksize; /* bytes */
126
127    struct ramzswap_stats stats;
128};
129
130/*-- */
131
132/* Debugging and Stats */
133#if defined(CONFIG_RAMZSWAP_STATS)
134static void rzs_stat_inc(u32 *v)
135{
136    *v = *v + 1;
137}
138
139static void rzs_stat_dec(u32 *v)
140{
141    *v = *v - 1;
142}
143
144static void rzs_stat64_inc(struct ramzswap *rzs, u64 *v)
145{
146    spin_lock(&rzs->stat64_lock);
147    *v = *v + 1;
148    spin_unlock(&rzs->stat64_lock);
149}
150
151static u64 rzs_stat64_read(struct ramzswap *rzs, u64 *v)
152{
153    u64 val;
154
155    spin_lock(&rzs->stat64_lock);
156    val = *v;
157    spin_unlock(&rzs->stat64_lock);
158
159    return val;
160}
161#else
162#define rzs_stat_inc(v)
163#define rzs_stat_dec(v)
164#define rzs_stat64_inc(r, v)
165#define rzs_stat64_read(r, v)
166#endif /* CONFIG_RAMZSWAP_STATS */
167
168#endif
drivers/staging/zram/zram_ioctl.h
1/*
2 * Compressed RAM based swap device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#ifndef _RAMZSWAP_IOCTL_H_
16#define _RAMZSWAP_IOCTL_H_
17
18struct ramzswap_ioctl_stats {
19    u64 disksize; /* user specified or equal to backing swap
20                 * size (if present) */
21    u64 num_reads; /* failed + successful */
22    u64 num_writes; /* --do-- */
23    u64 failed_reads; /* should NEVER! happen */
24    u64 failed_writes; /* can happen when memory is too low */
25    u64 invalid_io; /* non-swap I/O requests */
26    u64 notify_free; /* no. of swap slot free notifications */
27    u32 pages_zero; /* no. of zero filled pages */
28    u32 good_compress_pct; /* no. of pages with compression ratio<=50% */
29    u32 pages_expand_pct; /* no. of incompressible pages */
30    u32 pages_stored;
31    u32 pages_used;
32    u64 orig_data_size;
33    u64 compr_data_size;
34    u64 mem_used_total;
35} __attribute__ ((packed, aligned(4)));
36
37#define RZSIO_SET_DISKSIZE_KB _IOW('z', 0, size_t)
38#define RZSIO_GET_STATS _IOR('z', 1, struct ramzswap_ioctl_stats)
39#define RZSIO_INIT _IO('z', 2)
40#define RZSIO_RESET _IO('z', 3)
41
42#endif

Archive Download the corresponding diff file



interactive