drivers/staging/ramzswap/ramzswap_drv.c |
1 | | /* |
2 | | * Compressed RAM based swap device |
3 | | * |
4 | | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
5 | | * |
6 | | * This code is released using a dual license strategy: BSD/GPL |
7 | | * You can choose the licence that better fits your requirements. |
8 | | * |
9 | | * Released under the terms of 3-clause BSD License |
10 | | * Released under the terms of GNU General Public License Version 2.0 |
11 | | * |
12 | | * Project home: http://compcache.googlecode.com |
13 | | */ |
14 | | |
15 | | #define KMSG_COMPONENT "ramzswap" |
16 | | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | | |
18 | | #include <linux/module.h> |
19 | | #include <linux/kernel.h> |
20 | | #include <linux/bitops.h> |
21 | | #include <linux/blkdev.h> |
22 | | #include <linux/buffer_head.h> |
23 | | #include <linux/device.h> |
24 | | #include <linux/genhd.h> |
25 | | #include <linux/highmem.h> |
26 | | #include <linux/slab.h> |
27 | | #include <linux/lzo.h> |
28 | | #include <linux/string.h> |
29 | | #include <linux/swap.h> |
30 | | #include <linux/swapops.h> |
31 | | #include <linux/vmalloc.h> |
32 | | |
33 | | #include "ramzswap_drv.h" |
34 | | |
35 | | /* Globals */ |
36 | | static int ramzswap_major; |
37 | | static struct ramzswap *devices; |
38 | | |
39 | | /* Module params (documentation at end) */ |
40 | | static unsigned int num_devices; |
41 | | |
42 | | static int rzs_test_flag(struct ramzswap *rzs, u32 index, |
43 | | enum rzs_pageflags flag) |
44 | | { |
45 | | return rzs->table[index].flags & BIT(flag); |
46 | | } |
47 | | |
48 | | static void rzs_set_flag(struct ramzswap *rzs, u32 index, |
49 | | enum rzs_pageflags flag) |
50 | | { |
51 | | rzs->table[index].flags |= BIT(flag); |
52 | | } |
53 | | |
54 | | static void rzs_clear_flag(struct ramzswap *rzs, u32 index, |
55 | | enum rzs_pageflags flag) |
56 | | { |
57 | | rzs->table[index].flags &= ~BIT(flag); |
58 | | } |
59 | | |
60 | | static int page_zero_filled(void *ptr) |
61 | | { |
62 | | unsigned int pos; |
63 | | unsigned long *page; |
64 | | |
65 | | page = (unsigned long *)ptr; |
66 | | |
67 | | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { |
68 | | if (page[pos]) |
69 | | return 0; |
70 | | } |
71 | | |
72 | | return 1; |
73 | | } |
74 | | |
75 | | static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes) |
76 | | { |
77 | | if (!rzs->disksize) { |
78 | | pr_info( |
79 | | "disk size not provided. You can use disksize_kb module " |
80 | | "param to specify size.\nUsing default: (%u%% of RAM).\n", |
81 | | default_disksize_perc_ram |
82 | | ); |
83 | | rzs->disksize = default_disksize_perc_ram * |
84 | | (totalram_bytes / 100); |
85 | | } |
86 | | |
87 | | if (rzs->disksize > 2 * (totalram_bytes)) { |
88 | | pr_info( |
89 | | "There is little point creating a ramzswap of greater than " |
90 | | "twice the size of memory since we expect a 2:1 compression " |
91 | | "ratio. Note that ramzswap uses about 0.1%% of the size of " |
92 | | "the swap device when not in use so a huge ramzswap is " |
93 | | "wasteful.\n" |
94 | | "\tMemory Size: %zu kB\n" |
95 | | "\tSize you selected: %zu kB\n" |
96 | | "Continuing anyway ...\n", |
97 | | totalram_bytes >> 10, rzs->disksize |
98 | | ); |
99 | | } |
100 | | |
101 | | rzs->disksize &= PAGE_MASK; |
102 | | } |
103 | | |
104 | | static void ramzswap_ioctl_get_stats(struct ramzswap *rzs, |
105 | | struct ramzswap_ioctl_stats *s) |
106 | | { |
107 | | s->disksize = rzs->disksize; |
108 | | |
109 | | #if defined(CONFIG_RAMZSWAP_STATS) |
110 | | { |
111 | | struct ramzswap_stats *rs = &rzs->stats; |
112 | | size_t succ_writes, mem_used; |
113 | | unsigned int good_compress_perc = 0, no_compress_perc = 0; |
114 | | |
115 | | mem_used = xv_get_total_size_bytes(rzs->mem_pool) |
116 | | + (rs->pages_expand << PAGE_SHIFT); |
117 | | succ_writes = rzs_stat64_read(rzs, &rs->num_writes) - |
118 | | rzs_stat64_read(rzs, &rs->failed_writes); |
119 | | |
120 | | if (succ_writes && rs->pages_stored) { |
121 | | good_compress_perc = rs->good_compress * 100 |
122 | | / rs->pages_stored; |
123 | | no_compress_perc = rs->pages_expand * 100 |
124 | | / rs->pages_stored; |
125 | | } |
126 | | |
127 | | s->num_reads = rzs_stat64_read(rzs, &rs->num_reads); |
128 | | s->num_writes = rzs_stat64_read(rzs, &rs->num_writes); |
129 | | s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads); |
130 | | s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes); |
131 | | s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io); |
132 | | s->notify_free = rzs_stat64_read(rzs, &rs->notify_free); |
133 | | s->pages_zero = rs->pages_zero; |
134 | | |
135 | | s->good_compress_pct = good_compress_perc; |
136 | | s->pages_expand_pct = no_compress_perc; |
137 | | |
138 | | s->pages_stored = rs->pages_stored; |
139 | | s->pages_used = mem_used >> PAGE_SHIFT; |
140 | | s->orig_data_size = rs->pages_stored << PAGE_SHIFT; |
141 | | s->compr_data_size = rs->compr_size; |
142 | | s->mem_used_total = mem_used; |
143 | | } |
144 | | #endif /* CONFIG_RAMZSWAP_STATS */ |
145 | | } |
146 | | |
147 | | static void ramzswap_free_page(struct ramzswap *rzs, size_t index) |
148 | | { |
149 | | u32 clen; |
150 | | void *obj; |
151 | | |
152 | | struct page *page = rzs->table[index].page; |
153 | | u32 offset = rzs->table[index].offset; |
154 | | |
155 | | if (unlikely(!page)) { |
156 | | /* |
157 | | * No memory is allocated for zero filled pages. |
158 | | * Simply clear zero page flag. |
159 | | */ |
160 | | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
161 | | rzs_clear_flag(rzs, index, RZS_ZERO); |
162 | | rzs_stat_dec(&rzs->stats.pages_zero); |
163 | | } |
164 | | return; |
165 | | } |
166 | | |
167 | | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { |
168 | | clen = PAGE_SIZE; |
169 | | __free_page(page); |
170 | | rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED); |
171 | | rzs_stat_dec(&rzs->stats.pages_expand); |
172 | | goto out; |
173 | | } |
174 | | |
175 | | obj = kmap_atomic(page, KM_USER0) + offset; |
176 | | clen = xv_get_object_size(obj) - sizeof(struct zobj_header); |
177 | | kunmap_atomic(obj, KM_USER0); |
178 | | |
179 | | xv_free(rzs->mem_pool, page, offset); |
180 | | if (clen <= PAGE_SIZE / 2) |
181 | | rzs_stat_dec(&rzs->stats.good_compress); |
182 | | |
183 | | out: |
184 | | rzs->stats.compr_size -= clen; |
185 | | rzs_stat_dec(&rzs->stats.pages_stored); |
186 | | |
187 | | rzs->table[index].page = NULL; |
188 | | rzs->table[index].offset = 0; |
189 | | } |
190 | | |
191 | | static void handle_zero_page(struct page *page) |
192 | | { |
193 | | void *user_mem; |
194 | | |
195 | | user_mem = kmap_atomic(page, KM_USER0); |
196 | | memset(user_mem, 0, PAGE_SIZE); |
197 | | kunmap_atomic(user_mem, KM_USER0); |
198 | | |
199 | | flush_dcache_page(page); |
200 | | } |
201 | | |
202 | | static void handle_uncompressed_page(struct ramzswap *rzs, |
203 | | struct page *page, u32 index) |
204 | | { |
205 | | unsigned char *user_mem, *cmem; |
206 | | |
207 | | user_mem = kmap_atomic(page, KM_USER0); |
208 | | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
209 | | rzs->table[index].offset; |
210 | | |
211 | | memcpy(user_mem, cmem, PAGE_SIZE); |
212 | | kunmap_atomic(user_mem, KM_USER0); |
213 | | kunmap_atomic(cmem, KM_USER1); |
214 | | |
215 | | flush_dcache_page(page); |
216 | | } |
217 | | |
218 | | static int ramzswap_read(struct ramzswap *rzs, struct bio *bio) |
219 | | { |
220 | | |
221 | | int i; |
222 | | u32 index; |
223 | | struct bio_vec *bvec; |
224 | | |
225 | | rzs_stat64_inc(rzs, &rzs->stats.num_reads); |
226 | | |
227 | | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; |
228 | | bio_for_each_segment(bvec, bio, i) { |
229 | | int ret; |
230 | | size_t clen; |
231 | | struct page *page; |
232 | | struct zobj_header *zheader; |
233 | | unsigned char *user_mem, *cmem; |
234 | | |
235 | | page = bvec->bv_page; |
236 | | |
237 | | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
238 | | handle_zero_page(page); |
239 | | continue; |
240 | | } |
241 | | |
242 | | /* Requested page is not present in compressed area */ |
243 | | if (unlikely(!rzs->table[index].page)) { |
244 | | pr_debug("Read before write: sector=%lu, size=%u", |
245 | | (ulong)(bio->bi_sector), bio->bi_size); |
246 | | /* Do nothing */ |
247 | | continue; |
248 | | } |
249 | | |
250 | | /* Page is stored uncompressed since it's incompressible */ |
251 | | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { |
252 | | handle_uncompressed_page(rzs, page, index); |
253 | | continue; |
254 | | } |
255 | | |
256 | | user_mem = kmap_atomic(page, KM_USER0); |
257 | | clen = PAGE_SIZE; |
258 | | |
259 | | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
260 | | rzs->table[index].offset; |
261 | | |
262 | | ret = lzo1x_decompress_safe( |
263 | | cmem + sizeof(*zheader), |
264 | | xv_get_object_size(cmem) - sizeof(*zheader), |
265 | | user_mem, &clen); |
266 | | |
267 | | kunmap_atomic(user_mem, KM_USER0); |
268 | | kunmap_atomic(cmem, KM_USER1); |
269 | | |
270 | | /* Should NEVER happen. Return bio error if it does. */ |
271 | | if (unlikely(ret != LZO_E_OK)) { |
272 | | pr_err("Decompression failed! err=%d, page=%u\n", |
273 | | ret, index); |
274 | | rzs_stat64_inc(rzs, &rzs->stats.failed_reads); |
275 | | goto out; |
276 | | } |
277 | | |
278 | | flush_dcache_page(page); |
279 | | index++; |
280 | | } |
281 | | |
282 | | set_bit(BIO_UPTODATE, &bio->bi_flags); |
283 | | bio_endio(bio, 0); |
284 | | return 0; |
285 | | |
286 | | out: |
287 | | bio_io_error(bio); |
288 | | return 0; |
289 | | } |
290 | | |
291 | | static int ramzswap_write(struct ramzswap *rzs, struct bio *bio) |
292 | | { |
293 | | int i; |
294 | | u32 index; |
295 | | struct bio_vec *bvec; |
296 | | |
297 | | rzs_stat64_inc(rzs, &rzs->stats.num_writes); |
298 | | |
299 | | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; |
300 | | |
301 | | bio_for_each_segment(bvec, bio, i) { |
302 | | int ret; |
303 | | u32 offset; |
304 | | size_t clen; |
305 | | struct zobj_header *zheader; |
306 | | struct page *page, *page_store; |
307 | | unsigned char *user_mem, *cmem, *src; |
308 | | |
309 | | page = bvec->bv_page; |
310 | | src = rzs->compress_buffer; |
311 | | |
312 | | /* |
313 | | * System overwrites unused sectors. Free memory associated |
314 | | * with this sector now. |
315 | | */ |
316 | | if (rzs->table[index].page || |
317 | | rzs_test_flag(rzs, index, RZS_ZERO)) |
318 | | ramzswap_free_page(rzs, index); |
319 | | |
320 | | mutex_lock(&rzs->lock); |
321 | | |
322 | | user_mem = kmap_atomic(page, KM_USER0); |
323 | | if (page_zero_filled(user_mem)) { |
324 | | kunmap_atomic(user_mem, KM_USER0); |
325 | | mutex_unlock(&rzs->lock); |
326 | | rzs_stat_inc(&rzs->stats.pages_zero); |
327 | | rzs_set_flag(rzs, index, RZS_ZERO); |
328 | | continue; |
329 | | } |
330 | | |
331 | | ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, |
332 | | rzs->compress_workmem); |
333 | | |
334 | | kunmap_atomic(user_mem, KM_USER0); |
335 | | |
336 | | if (unlikely(ret != LZO_E_OK)) { |
337 | | mutex_unlock(&rzs->lock); |
338 | | pr_err("Compression failed! err=%d\n", ret); |
339 | | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
340 | | goto out; |
341 | | } |
342 | | |
343 | | /* |
344 | | * Page is incompressible. Store it as-is (uncompressed) |
345 | | * since we do not want to return too many swap write |
346 | | * errors which has side effect of hanging the system. |
347 | | */ |
348 | | if (unlikely(clen > max_zpage_size)) { |
349 | | clen = PAGE_SIZE; |
350 | | page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); |
351 | | if (unlikely(!page_store)) { |
352 | | mutex_unlock(&rzs->lock); |
353 | | pr_info("Error allocating memory for " |
354 | | "incompressible page: %u\n", index); |
355 | | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
356 | | goto out; |
357 | | } |
358 | | |
359 | | offset = 0; |
360 | | rzs_set_flag(rzs, index, RZS_UNCOMPRESSED); |
361 | | rzs_stat_inc(&rzs->stats.pages_expand); |
362 | | rzs->table[index].page = page_store; |
363 | | src = kmap_atomic(page, KM_USER0); |
364 | | goto memstore; |
365 | | } |
366 | | |
367 | | if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader), |
368 | | &rzs->table[index].page, &offset, |
369 | | GFP_NOIO | __GFP_HIGHMEM)) { |
370 | | mutex_unlock(&rzs->lock); |
371 | | pr_info("Error allocating memory for compressed " |
372 | | "page: %u, size=%zu\n", index, clen); |
373 | | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
374 | | goto out; |
375 | | } |
376 | | |
377 | | memstore: |
378 | | rzs->table[index].offset = offset; |
379 | | |
380 | | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
381 | | rzs->table[index].offset; |
382 | | |
383 | | #if 0 |
384 | | /* Back-reference needed for memory defragmentation */ |
385 | | if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) { |
386 | | zheader = (struct zobj_header *)cmem; |
387 | | zheader->table_idx = index; |
388 | | cmem += sizeof(*zheader); |
389 | | } |
390 | | #endif |
391 | | |
392 | | memcpy(cmem, src, clen); |
393 | | |
394 | | kunmap_atomic(cmem, KM_USER1); |
395 | | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) |
396 | | kunmap_atomic(src, KM_USER0); |
397 | | |
398 | | /* Update stats */ |
399 | | rzs->stats.compr_size += clen; |
400 | | rzs_stat_inc(&rzs->stats.pages_stored); |
401 | | if (clen <= PAGE_SIZE / 2) |
402 | | rzs_stat_inc(&rzs->stats.good_compress); |
403 | | |
404 | | mutex_unlock(&rzs->lock); |
405 | | index++; |
406 | | } |
407 | | |
408 | | set_bit(BIO_UPTODATE, &bio->bi_flags); |
409 | | bio_endio(bio, 0); |
410 | | return 0; |
411 | | |
412 | | out: |
413 | | bio_io_error(bio); |
414 | | return 0; |
415 | | } |
416 | | |
417 | | /* |
418 | | * Check if request is within bounds and page aligned. |
419 | | */ |
420 | | static inline int valid_io_request(struct ramzswap *rzs, struct bio *bio) |
421 | | { |
422 | | if (unlikely( |
423 | | (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) || |
424 | | (bio->bi_sector & (SECTORS_PER_PAGE - 1)) || |
425 | | (bio->bi_size & (PAGE_SIZE - 1)))) { |
426 | | |
427 | | return 0; |
428 | | } |
429 | | |
430 | | /* I/O request is valid */ |
431 | | return 1; |
432 | | } |
433 | | |
434 | | /* |
435 | | * Handler function for all ramzswap I/O requests. |
436 | | */ |
437 | | static int ramzswap_make_request(struct request_queue *queue, struct bio *bio) |
438 | | { |
439 | | int ret = 0; |
440 | | struct ramzswap *rzs = queue->queuedata; |
441 | | |
442 | | if (unlikely(!rzs->init_done)) { |
443 | | bio_io_error(bio); |
444 | | return 0; |
445 | | } |
446 | | |
447 | | if (!valid_io_request(rzs, bio)) { |
448 | | rzs_stat64_inc(rzs, &rzs->stats.invalid_io); |
449 | | bio_io_error(bio); |
450 | | return 0; |
451 | | } |
452 | | |
453 | | switch (bio_data_dir(bio)) { |
454 | | case READ: |
455 | | ret = ramzswap_read(rzs, bio); |
456 | | break; |
457 | | |
458 | | case WRITE: |
459 | | ret = ramzswap_write(rzs, bio); |
460 | | break; |
461 | | } |
462 | | |
463 | | return ret; |
464 | | } |
465 | | |
466 | | static void reset_device(struct ramzswap *rzs) |
467 | | { |
468 | | size_t index; |
469 | | |
470 | | /* Do not accept any new I/O request */ |
471 | | rzs->init_done = 0; |
472 | | |
473 | | /* Free various per-device buffers */ |
474 | | kfree(rzs->compress_workmem); |
475 | | free_pages((unsigned long)rzs->compress_buffer, 1); |
476 | | |
477 | | rzs->compress_workmem = NULL; |
478 | | rzs->compress_buffer = NULL; |
479 | | |
480 | | /* Free all pages that are still in this ramzswap device */ |
481 | | for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) { |
482 | | struct page *page; |
483 | | u16 offset; |
484 | | |
485 | | page = rzs->table[index].page; |
486 | | offset = rzs->table[index].offset; |
487 | | |
488 | | if (!page) |
489 | | continue; |
490 | | |
491 | | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) |
492 | | __free_page(page); |
493 | | else |
494 | | xv_free(rzs->mem_pool, page, offset); |
495 | | } |
496 | | |
497 | | vfree(rzs->table); |
498 | | rzs->table = NULL; |
499 | | |
500 | | xv_destroy_pool(rzs->mem_pool); |
501 | | rzs->mem_pool = NULL; |
502 | | |
503 | | /* Reset stats */ |
504 | | memset(&rzs->stats, 0, sizeof(rzs->stats)); |
505 | | |
506 | | rzs->disksize = 0; |
507 | | } |
508 | | |
509 | | static int ramzswap_ioctl_init_device(struct ramzswap *rzs) |
510 | | { |
511 | | int ret; |
512 | | size_t num_pages; |
513 | | |
514 | | if (rzs->init_done) { |
515 | | pr_info("Device already initialized!\n"); |
516 | | return -EBUSY; |
517 | | } |
518 | | |
519 | | ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT); |
520 | | |
521 | | rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); |
522 | | if (!rzs->compress_workmem) { |
523 | | pr_err("Error allocating compressor working memory!\n"); |
524 | | ret = -ENOMEM; |
525 | | goto fail; |
526 | | } |
527 | | |
528 | | rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); |
529 | | if (!rzs->compress_buffer) { |
530 | | pr_err("Error allocating compressor buffer space\n"); |
531 | | ret = -ENOMEM; |
532 | | goto fail; |
533 | | } |
534 | | |
535 | | num_pages = rzs->disksize >> PAGE_SHIFT; |
536 | | rzs->table = vmalloc(num_pages * sizeof(*rzs->table)); |
537 | | if (!rzs->table) { |
538 | | pr_err("Error allocating ramzswap address table\n"); |
539 | | /* To prevent accessing table entries during cleanup */ |
540 | | rzs->disksize = 0; |
541 | | ret = -ENOMEM; |
542 | | goto fail; |
543 | | } |
544 | | memset(rzs->table, 0, num_pages * sizeof(*rzs->table)); |
545 | | |
546 | | set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT); |
547 | | |
548 | | /* ramzswap devices sort of resembles non-rotational disks */ |
549 | | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue); |
550 | | |
551 | | rzs->mem_pool = xv_create_pool(); |
552 | | if (!rzs->mem_pool) { |
553 | | pr_err("Error creating memory pool\n"); |
554 | | ret = -ENOMEM; |
555 | | goto fail; |
556 | | } |
557 | | |
558 | | rzs->init_done = 1; |
559 | | |
560 | | pr_debug("Initialization done!\n"); |
561 | | return 0; |
562 | | |
563 | | fail: |
564 | | reset_device(rzs); |
565 | | |
566 | | pr_err("Initialization failed: err=%d\n", ret); |
567 | | return ret; |
568 | | } |
569 | | |
570 | | static int ramzswap_ioctl_reset_device(struct ramzswap *rzs) |
571 | | { |
572 | | if (rzs->init_done) |
573 | | reset_device(rzs); |
574 | | |
575 | | return 0; |
576 | | } |
577 | | |
578 | | static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode, |
579 | | unsigned int cmd, unsigned long arg) |
580 | | { |
581 | | int ret = 0; |
582 | | size_t disksize_kb; |
583 | | |
584 | | struct ramzswap *rzs = bdev->bd_disk->private_data; |
585 | | |
586 | | switch (cmd) { |
587 | | case RZSIO_SET_DISKSIZE_KB: |
588 | | if (rzs->init_done) { |
589 | | ret = -EBUSY; |
590 | | goto out; |
591 | | } |
592 | | if (copy_from_user(&disksize_kb, (void *)arg, |
593 | | _IOC_SIZE(cmd))) { |
594 | | ret = -EFAULT; |
595 | | goto out; |
596 | | } |
597 | | rzs->disksize = disksize_kb << 10; |
598 | | pr_info("Disk size set to %zu kB\n", disksize_kb); |
599 | | break; |
600 | | |
601 | | case RZSIO_GET_STATS: |
602 | | { |
603 | | struct ramzswap_ioctl_stats *stats; |
604 | | if (!rzs->init_done) { |
605 | | ret = -ENOTTY; |
606 | | goto out; |
607 | | } |
608 | | stats = kzalloc(sizeof(*stats), GFP_KERNEL); |
609 | | if (!stats) { |
610 | | ret = -ENOMEM; |
611 | | goto out; |
612 | | } |
613 | | ramzswap_ioctl_get_stats(rzs, stats); |
614 | | if (copy_to_user((void *)arg, stats, sizeof(*stats))) { |
615 | | kfree(stats); |
616 | | ret = -EFAULT; |
617 | | goto out; |
618 | | } |
619 | | kfree(stats); |
620 | | break; |
621 | | } |
622 | | case RZSIO_INIT: |
623 | | ret = ramzswap_ioctl_init_device(rzs); |
624 | | break; |
625 | | |
626 | | case RZSIO_RESET: |
627 | | /* Do not reset an active device! */ |
628 | | if (bdev->bd_holders) { |
629 | | ret = -EBUSY; |
630 | | goto out; |
631 | | } |
632 | | |
633 | | /* Make sure all pending I/O is finished */ |
634 | | if (bdev) |
635 | | fsync_bdev(bdev); |
636 | | |
637 | | ret = ramzswap_ioctl_reset_device(rzs); |
638 | | break; |
639 | | |
640 | | default: |
641 | | pr_info("Invalid ioctl %u\n", cmd); |
642 | | ret = -ENOTTY; |
643 | | } |
644 | | |
645 | | out: |
646 | | return ret; |
647 | | } |
648 | | |
649 | | void ramzswap_slot_free_notify(struct block_device *bdev, unsigned long index) |
650 | | { |
651 | | struct ramzswap *rzs; |
652 | | |
653 | | rzs = bdev->bd_disk->private_data; |
654 | | ramzswap_free_page(rzs, index); |
655 | | rzs_stat64_inc(rzs, &rzs->stats.notify_free); |
656 | | } |
657 | | |
658 | | static const struct block_device_operations ramzswap_devops = { |
659 | | .ioctl = ramzswap_ioctl, |
660 | | .swap_slot_free_notify = ramzswap_slot_free_notify, |
661 | | .owner = THIS_MODULE |
662 | | }; |
663 | | |
664 | | static int create_device(struct ramzswap *rzs, int device_id) |
665 | | { |
666 | | int ret = 0; |
667 | | |
668 | | mutex_init(&rzs->lock); |
669 | | spin_lock_init(&rzs->stat64_lock); |
670 | | |
671 | | rzs->queue = blk_alloc_queue(GFP_KERNEL); |
672 | | if (!rzs->queue) { |
673 | | pr_err("Error allocating disk queue for device %d\n", |
674 | | device_id); |
675 | | ret = -ENOMEM; |
676 | | goto out; |
677 | | } |
678 | | |
679 | | blk_queue_make_request(rzs->queue, ramzswap_make_request); |
680 | | rzs->queue->queuedata = rzs; |
681 | | |
682 | | /* gendisk structure */ |
683 | | rzs->disk = alloc_disk(1); |
684 | | if (!rzs->disk) { |
685 | | blk_cleanup_queue(rzs->queue); |
686 | | pr_warning("Error allocating disk structure for device %d\n", |
687 | | device_id); |
688 | | ret = -ENOMEM; |
689 | | goto out; |
690 | | } |
691 | | |
692 | | rzs->disk->major = ramzswap_major; |
693 | | rzs->disk->first_minor = device_id; |
694 | | rzs->disk->fops = &ramzswap_devops; |
695 | | rzs->disk->queue = rzs->queue; |
696 | | rzs->disk->private_data = rzs; |
697 | | snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id); |
698 | | |
699 | | /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */ |
700 | | set_capacity(rzs->disk, 0); |
701 | | |
702 | | /* |
703 | | * To ensure that we always get PAGE_SIZE aligned |
704 | | * and n*PAGE_SIZED sized I/O requests. |
705 | | */ |
706 | | blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE); |
707 | | blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE); |
708 | | blk_queue_io_min(rzs->disk->queue, PAGE_SIZE); |
709 | | blk_queue_io_opt(rzs->disk->queue, PAGE_SIZE); |
710 | | |
711 | | add_disk(rzs->disk); |
712 | | |
713 | | rzs->init_done = 0; |
714 | | |
715 | | out: |
716 | | return ret; |
717 | | } |
718 | | |
719 | | static void destroy_device(struct ramzswap *rzs) |
720 | | { |
721 | | if (rzs->disk) { |
722 | | del_gendisk(rzs->disk); |
723 | | put_disk(rzs->disk); |
724 | | } |
725 | | |
726 | | if (rzs->queue) |
727 | | blk_cleanup_queue(rzs->queue); |
728 | | } |
729 | | |
730 | | static int __init ramzswap_init(void) |
731 | | { |
732 | | int ret, dev_id; |
733 | | |
734 | | if (num_devices > max_num_devices) { |
735 | | pr_warning("Invalid value for num_devices: %u\n", |
736 | | num_devices); |
737 | | ret = -EINVAL; |
738 | | goto out; |
739 | | } |
740 | | |
741 | | ramzswap_major = register_blkdev(0, "ramzswap"); |
742 | | if (ramzswap_major <= 0) { |
743 | | pr_warning("Unable to get major number\n"); |
744 | | ret = -EBUSY; |
745 | | goto out; |
746 | | } |
747 | | |
748 | | if (!num_devices) { |
749 | | pr_info("num_devices not specified. Using default: 1\n"); |
750 | | num_devices = 1; |
751 | | } |
752 | | |
753 | | /* Allocate the device array and initialize each one */ |
754 | | pr_info("Creating %u devices ...\n", num_devices); |
755 | | devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL); |
756 | | if (!devices) { |
757 | | ret = -ENOMEM; |
758 | | goto unregister; |
759 | | } |
760 | | |
761 | | for (dev_id = 0; dev_id < num_devices; dev_id++) { |
762 | | ret = create_device(&devices[dev_id], dev_id); |
763 | | if (ret) |
764 | | goto free_devices; |
765 | | } |
766 | | |
767 | | return 0; |
768 | | |
769 | | free_devices: |
770 | | while (dev_id) |
771 | | destroy_device(&devices[--dev_id]); |
772 | | unregister: |
773 | | unregister_blkdev(ramzswap_major, "ramzswap"); |
774 | | out: |
775 | | return ret; |
776 | | } |
777 | | |
778 | | static void __exit ramzswap_exit(void) |
779 | | { |
780 | | int i; |
781 | | struct ramzswap *rzs; |
782 | | |
783 | | for (i = 0; i < num_devices; i++) { |
784 | | rzs = &devices[i]; |
785 | | |
786 | | destroy_device(rzs); |
787 | | if (rzs->init_done) |
788 | | reset_device(rzs); |
789 | | } |
790 | | |
791 | | unregister_blkdev(ramzswap_major, "ramzswap"); |
792 | | |
793 | | kfree(devices); |
794 | | pr_debug("Cleanup done!\n"); |
795 | | } |
796 | | |
797 | | module_param(num_devices, uint, 0); |
798 | | MODULE_PARM_DESC(num_devices, "Number of ramzswap devices"); |
799 | | |
800 | | module_init(ramzswap_init); |
801 | | module_exit(ramzswap_exit); |
802 | | |
803 | | MODULE_LICENSE("Dual BSD/GPL"); |
804 | | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); |
805 | | MODULE_DESCRIPTION("Compressed RAM Based Swap Device"); |
drivers/staging/ramzswap/xvmalloc.c |
1 | | /* |
2 | | * xvmalloc memory allocator |
3 | | * |
4 | | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
5 | | * |
6 | | * This code is released using a dual license strategy: BSD/GPL |
7 | | * You can choose the licence that better fits your requirements. |
8 | | * |
9 | | * Released under the terms of 3-clause BSD License |
10 | | * Released under the terms of GNU General Public License Version 2.0 |
11 | | */ |
12 | | |
13 | | #include <linux/bitops.h> |
14 | | #include <linux/errno.h> |
15 | | #include <linux/highmem.h> |
16 | | #include <linux/init.h> |
17 | | #include <linux/string.h> |
18 | | #include <linux/slab.h> |
19 | | |
20 | | #include "xvmalloc.h" |
21 | | #include "xvmalloc_int.h" |
22 | | |
23 | | static void stat_inc(u64 *value) |
24 | | { |
25 | | *value = *value + 1; |
26 | | } |
27 | | |
28 | | static void stat_dec(u64 *value) |
29 | | { |
30 | | *value = *value - 1; |
31 | | } |
32 | | |
33 | | static int test_flag(struct block_header *block, enum blockflags flag) |
34 | | { |
35 | | return block->prev & BIT(flag); |
36 | | } |
37 | | |
38 | | static void set_flag(struct block_header *block, enum blockflags flag) |
39 | | { |
40 | | block->prev |= BIT(flag); |
41 | | } |
42 | | |
43 | | static void clear_flag(struct block_header *block, enum blockflags flag) |
44 | | { |
45 | | block->prev &= ~BIT(flag); |
46 | | } |
47 | | |
48 | | /* |
49 | | * Given <page, offset> pair, provide a derefrencable pointer. |
50 | | * This is called from xv_malloc/xv_free path, so it |
51 | | * needs to be fast. |
52 | | */ |
53 | | static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type) |
54 | | { |
55 | | unsigned char *base; |
56 | | |
57 | | base = kmap_atomic(page, type); |
58 | | return base + offset; |
59 | | } |
60 | | |
61 | | static void put_ptr_atomic(void *ptr, enum km_type type) |
62 | | { |
63 | | kunmap_atomic(ptr, type); |
64 | | } |
65 | | |
66 | | static u32 get_blockprev(struct block_header *block) |
67 | | { |
68 | | return block->prev & PREV_MASK; |
69 | | } |
70 | | |
71 | | static void set_blockprev(struct block_header *block, u16 new_offset) |
72 | | { |
73 | | block->prev = new_offset | (block->prev & FLAGS_MASK); |
74 | | } |
75 | | |
76 | | static struct block_header *BLOCK_NEXT(struct block_header *block) |
77 | | { |
78 | | return (struct block_header *) |
79 | | ((char *)block + block->size + XV_ALIGN); |
80 | | } |
81 | | |
82 | | /* |
83 | | * Get index of free list containing blocks of maximum size |
84 | | * which is less than or equal to given size. |
85 | | */ |
86 | | static u32 get_index_for_insert(u32 size) |
87 | | { |
88 | | if (unlikely(size > XV_MAX_ALLOC_SIZE)) |
89 | | size = XV_MAX_ALLOC_SIZE; |
90 | | size &= ~FL_DELTA_MASK; |
91 | | return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; |
92 | | } |
93 | | |
94 | | /* |
95 | | * Get index of free list having blocks of size greater than |
96 | | * or equal to requested size. |
97 | | */ |
98 | | static u32 get_index(u32 size) |
99 | | { |
100 | | if (unlikely(size < XV_MIN_ALLOC_SIZE)) |
101 | | size = XV_MIN_ALLOC_SIZE; |
102 | | size = ALIGN(size, FL_DELTA); |
103 | | return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; |
104 | | } |
105 | | |
106 | | /** |
107 | | * find_block - find block of at least given size |
108 | | * @pool: memory pool to search from |
109 | | * @size: size of block required |
110 | | * @page: page containing required block |
111 | | * @offset: offset within the page where block is located. |
112 | | * |
113 | | * Searches two level bitmap to locate block of at least |
114 | | * the given size. If such a block is found, it provides |
115 | | * <page, offset> to identify this block and returns index |
116 | | * in freelist where we found this block. |
117 | | * Otherwise, returns 0 and <page, offset> params are not touched. |
118 | | */ |
119 | | static u32 find_block(struct xv_pool *pool, u32 size, |
120 | | struct page **page, u32 *offset) |
121 | | { |
122 | | ulong flbitmap, slbitmap; |
123 | | u32 flindex, slindex, slbitstart; |
124 | | |
125 | | /* There are no free blocks in this pool */ |
126 | | if (!pool->flbitmap) |
127 | | return 0; |
128 | | |
129 | | /* Get freelist index correspoding to this size */ |
130 | | slindex = get_index(size); |
131 | | slbitmap = pool->slbitmap[slindex / BITS_PER_LONG]; |
132 | | slbitstart = slindex % BITS_PER_LONG; |
133 | | |
134 | | /* |
135 | | * If freelist is not empty at this index, we found the |
136 | | * block - head of this list. This is approximate best-fit match. |
137 | | */ |
138 | | if (test_bit(slbitstart, &slbitmap)) { |
139 | | *page = pool->freelist[slindex].page; |
140 | | *offset = pool->freelist[slindex].offset; |
141 | | return slindex; |
142 | | } |
143 | | |
144 | | /* |
145 | | * No best-fit found. Search a bit further in bitmap for a free block. |
146 | | * Second level bitmap consists of series of 32-bit chunks. Search |
147 | | * further in the chunk where we expected a best-fit, starting from |
148 | | * index location found above. |
149 | | */ |
150 | | slbitstart++; |
151 | | slbitmap >>= slbitstart; |
152 | | |
153 | | /* Skip this search if we were already at end of this bitmap chunk */ |
154 | | if ((slbitstart != BITS_PER_LONG) && slbitmap) { |
155 | | slindex += __ffs(slbitmap) + 1; |
156 | | *page = pool->freelist[slindex].page; |
157 | | *offset = pool->freelist[slindex].offset; |
158 | | return slindex; |
159 | | } |
160 | | |
161 | | /* Now do a full two-level bitmap search to find next nearest fit */ |
162 | | flindex = slindex / BITS_PER_LONG; |
163 | | |
164 | | flbitmap = (pool->flbitmap) >> (flindex + 1); |
165 | | if (!flbitmap) |
166 | | return 0; |
167 | | |
168 | | flindex += __ffs(flbitmap) + 1; |
169 | | slbitmap = pool->slbitmap[flindex]; |
170 | | slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap); |
171 | | *page = pool->freelist[slindex].page; |
172 | | *offset = pool->freelist[slindex].offset; |
173 | | |
174 | | return slindex; |
175 | | } |
176 | | |
177 | | /* |
178 | | * Insert block at <page, offset> in freelist of given pool. |
179 | | * freelist used depends on block size. |
180 | | */ |
181 | | static void insert_block(struct xv_pool *pool, struct page *page, u32 offset, |
182 | | struct block_header *block) |
183 | | { |
184 | | u32 flindex, slindex; |
185 | | struct block_header *nextblock; |
186 | | |
187 | | slindex = get_index_for_insert(block->size); |
188 | | flindex = slindex / BITS_PER_LONG; |
189 | | |
190 | | block->link.prev_page = 0; |
191 | | block->link.prev_offset = 0; |
192 | | block->link.next_page = pool->freelist[slindex].page; |
193 | | block->link.next_offset = pool->freelist[slindex].offset; |
194 | | pool->freelist[slindex].page = page; |
195 | | pool->freelist[slindex].offset = offset; |
196 | | |
197 | | if (block->link.next_page) { |
198 | | nextblock = get_ptr_atomic(block->link.next_page, |
199 | | block->link.next_offset, KM_USER1); |
200 | | nextblock->link.prev_page = page; |
201 | | nextblock->link.prev_offset = offset; |
202 | | put_ptr_atomic(nextblock, KM_USER1); |
203 | | } |
204 | | |
205 | | __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); |
206 | | __set_bit(flindex, &pool->flbitmap); |
207 | | } |
208 | | |
209 | | /* |
210 | | * Remove block from head of freelist. Index 'slindex' identifies the freelist. |
211 | | */ |
212 | | static void remove_block_head(struct xv_pool *pool, |
213 | | struct block_header *block, u32 slindex) |
214 | | { |
215 | | struct block_header *tmpblock; |
216 | | u32 flindex = slindex / BITS_PER_LONG; |
217 | | |
218 | | pool->freelist[slindex].page = block->link.next_page; |
219 | | pool->freelist[slindex].offset = block->link.next_offset; |
220 | | block->link.prev_page = 0; |
221 | | block->link.prev_offset = 0; |
222 | | |
223 | | if (!pool->freelist[slindex].page) { |
224 | | __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); |
225 | | if (!pool->slbitmap[flindex]) |
226 | | __clear_bit(flindex, &pool->flbitmap); |
227 | | } else { |
228 | | /* |
229 | | * DEBUG ONLY: We need not reinitialize freelist head previous |
230 | | * pointer to 0 - we never depend on its value. But just for |
231 | | * sanity, lets do it. |
232 | | */ |
233 | | tmpblock = get_ptr_atomic(pool->freelist[slindex].page, |
234 | | pool->freelist[slindex].offset, KM_USER1); |
235 | | tmpblock->link.prev_page = 0; |
236 | | tmpblock->link.prev_offset = 0; |
237 | | put_ptr_atomic(tmpblock, KM_USER1); |
238 | | } |
239 | | } |
240 | | |
241 | | /* |
242 | | * Remove block from freelist. Index 'slindex' identifies the freelist. |
243 | | */ |
244 | | static void remove_block(struct xv_pool *pool, struct page *page, u32 offset, |
245 | | struct block_header *block, u32 slindex) |
246 | | { |
247 | | u32 flindex; |
248 | | struct block_header *tmpblock; |
249 | | |
250 | | if (pool->freelist[slindex].page == page |
251 | | && pool->freelist[slindex].offset == offset) { |
252 | | remove_block_head(pool, block, slindex); |
253 | | return; |
254 | | } |
255 | | |
256 | | flindex = slindex / BITS_PER_LONG; |
257 | | |
258 | | if (block->link.prev_page) { |
259 | | tmpblock = get_ptr_atomic(block->link.prev_page, |
260 | | block->link.prev_offset, KM_USER1); |
261 | | tmpblock->link.next_page = block->link.next_page; |
262 | | tmpblock->link.next_offset = block->link.next_offset; |
263 | | put_ptr_atomic(tmpblock, KM_USER1); |
264 | | } |
265 | | |
266 | | if (block->link.next_page) { |
267 | | tmpblock = get_ptr_atomic(block->link.next_page, |
268 | | block->link.next_offset, KM_USER1); |
269 | | tmpblock->link.prev_page = block->link.prev_page; |
270 | | tmpblock->link.prev_offset = block->link.prev_offset; |
271 | | put_ptr_atomic(tmpblock, KM_USER1); |
272 | | } |
273 | | } |
274 | | |
275 | | /* |
276 | | * Allocate a page and add it to freelist of given pool. |
277 | | */ |
278 | | static int grow_pool(struct xv_pool *pool, gfp_t flags) |
279 | | { |
280 | | struct page *page; |
281 | | struct block_header *block; |
282 | | |
283 | | page = alloc_page(flags); |
284 | | if (unlikely(!page)) |
285 | | return -ENOMEM; |
286 | | |
287 | | stat_inc(&pool->total_pages); |
288 | | |
289 | | spin_lock(&pool->lock); |
290 | | block = get_ptr_atomic(page, 0, KM_USER0); |
291 | | |
292 | | block->size = PAGE_SIZE - XV_ALIGN; |
293 | | set_flag(block, BLOCK_FREE); |
294 | | clear_flag(block, PREV_FREE); |
295 | | set_blockprev(block, 0); |
296 | | |
297 | | insert_block(pool, page, 0, block); |
298 | | |
299 | | put_ptr_atomic(block, KM_USER0); |
300 | | spin_unlock(&pool->lock); |
301 | | |
302 | | return 0; |
303 | | } |
304 | | |
305 | | /* |
306 | | * Create a memory pool. Allocates freelist, bitmaps and other |
307 | | * per-pool metadata. |
308 | | */ |
309 | | struct xv_pool *xv_create_pool(void) |
310 | | { |
311 | | u32 ovhd_size; |
312 | | struct xv_pool *pool; |
313 | | |
314 | | ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); |
315 | | pool = kzalloc(ovhd_size, GFP_KERNEL); |
316 | | if (!pool) |
317 | | return NULL; |
318 | | |
319 | | spin_lock_init(&pool->lock); |
320 | | |
321 | | return pool; |
322 | | } |
323 | | |
324 | | void xv_destroy_pool(struct xv_pool *pool) |
325 | | { |
326 | | kfree(pool); |
327 | | } |
328 | | |
329 | | /** |
330 | | * xv_malloc - Allocate block of given size from pool. |
331 | | * @pool: pool to allocate from |
332 | | * @size: size of block to allocate |
333 | | * @page: page no. that holds the object |
334 | | * @offset: location of object within page |
335 | | * |
336 | | * On success, <page, offset> identifies block allocated |
337 | | * and 0 is returned. On failure, <page, offset> is set to |
338 | | * 0 and -ENOMEM is returned. |
339 | | * |
340 | | * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail. |
341 | | */ |
342 | | int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, |
343 | | u32 *offset, gfp_t flags) |
344 | | { |
345 | | int error; |
346 | | u32 index, tmpsize, origsize, tmpoffset; |
347 | | struct block_header *block, *tmpblock; |
348 | | |
349 | | *page = NULL; |
350 | | *offset = 0; |
351 | | origsize = size; |
352 | | |
353 | | if (unlikely(!size || size > XV_MAX_ALLOC_SIZE)) |
354 | | return -ENOMEM; |
355 | | |
356 | | size = ALIGN(size, XV_ALIGN); |
357 | | |
358 | | spin_lock(&pool->lock); |
359 | | |
360 | | index = find_block(pool, size, page, offset); |
361 | | |
362 | | if (!*page) { |
363 | | spin_unlock(&pool->lock); |
364 | | if (flags & GFP_NOWAIT) |
365 | | return -ENOMEM; |
366 | | error = grow_pool(pool, flags); |
367 | | if (unlikely(error)) |
368 | | return error; |
369 | | |
370 | | spin_lock(&pool->lock); |
371 | | index = find_block(pool, size, page, offset); |
372 | | } |
373 | | |
374 | | if (!*page) { |
375 | | spin_unlock(&pool->lock); |
376 | | return -ENOMEM; |
377 | | } |
378 | | |
379 | | block = get_ptr_atomic(*page, *offset, KM_USER0); |
380 | | |
381 | | remove_block_head(pool, block, index); |
382 | | |
383 | | /* Split the block if required */ |
384 | | tmpoffset = *offset + size + XV_ALIGN; |
385 | | tmpsize = block->size - size; |
386 | | tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN); |
387 | | if (tmpsize) { |
388 | | tmpblock->size = tmpsize - XV_ALIGN; |
389 | | set_flag(tmpblock, BLOCK_FREE); |
390 | | clear_flag(tmpblock, PREV_FREE); |
391 | | |
392 | | set_blockprev(tmpblock, *offset); |
393 | | if (tmpblock->size >= XV_MIN_ALLOC_SIZE) |
394 | | insert_block(pool, *page, tmpoffset, tmpblock); |
395 | | |
396 | | if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) { |
397 | | tmpblock = BLOCK_NEXT(tmpblock); |
398 | | set_blockprev(tmpblock, tmpoffset); |
399 | | } |
400 | | } else { |
401 | | /* This block is exact fit */ |
402 | | if (tmpoffset != PAGE_SIZE) |
403 | | clear_flag(tmpblock, PREV_FREE); |
404 | | } |
405 | | |
406 | | block->size = origsize; |
407 | | clear_flag(block, BLOCK_FREE); |
408 | | |
409 | | put_ptr_atomic(block, KM_USER0); |
410 | | spin_unlock(&pool->lock); |
411 | | |
412 | | *offset += XV_ALIGN; |
413 | | |
414 | | return 0; |
415 | | } |
416 | | |
417 | | /* |
418 | | * Free block identified with <page, offset> |
419 | | */ |
420 | | void xv_free(struct xv_pool *pool, struct page *page, u32 offset) |
421 | | { |
422 | | void *page_start; |
423 | | struct block_header *block, *tmpblock; |
424 | | |
425 | | offset -= XV_ALIGN; |
426 | | |
427 | | spin_lock(&pool->lock); |
428 | | |
429 | | page_start = get_ptr_atomic(page, 0, KM_USER0); |
430 | | block = (struct block_header *)((char *)page_start + offset); |
431 | | |
432 | | /* Catch double free bugs */ |
433 | | BUG_ON(test_flag(block, BLOCK_FREE)); |
434 | | |
435 | | block->size = ALIGN(block->size, XV_ALIGN); |
436 | | |
437 | | tmpblock = BLOCK_NEXT(block); |
438 | | if (offset + block->size + XV_ALIGN == PAGE_SIZE) |
439 | | tmpblock = NULL; |
440 | | |
441 | | /* Merge next block if its free */ |
442 | | if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) { |
443 | | /* |
444 | | * Blocks smaller than XV_MIN_ALLOC_SIZE |
445 | | * are not inserted in any free list. |
446 | | */ |
447 | | if (tmpblock->size >= XV_MIN_ALLOC_SIZE) { |
448 | | remove_block(pool, page, |
449 | | offset + block->size + XV_ALIGN, tmpblock, |
450 | | get_index_for_insert(tmpblock->size)); |
451 | | } |
452 | | block->size += tmpblock->size + XV_ALIGN; |
453 | | } |
454 | | |
455 | | /* Merge previous block if its free */ |
456 | | if (test_flag(block, PREV_FREE)) { |
457 | | tmpblock = (struct block_header *)((char *)(page_start) + |
458 | | get_blockprev(block)); |
459 | | offset = offset - tmpblock->size - XV_ALIGN; |
460 | | |
461 | | if (tmpblock->size >= XV_MIN_ALLOC_SIZE) |
462 | | remove_block(pool, page, offset, tmpblock, |
463 | | get_index_for_insert(tmpblock->size)); |
464 | | |
465 | | tmpblock->size += block->size + XV_ALIGN; |
466 | | block = tmpblock; |
467 | | } |
468 | | |
469 | | /* No used objects in this page. Free it. */ |
470 | | if (block->size == PAGE_SIZE - XV_ALIGN) { |
471 | | put_ptr_atomic(page_start, KM_USER0); |
472 | | spin_unlock(&pool->lock); |
473 | | |
474 | | __free_page(page); |
475 | | stat_dec(&pool->total_pages); |
476 | | return; |
477 | | } |
478 | | |
479 | | set_flag(block, BLOCK_FREE); |
480 | | if (block->size >= XV_MIN_ALLOC_SIZE) |
481 | | insert_block(pool, page, offset, block); |
482 | | |
483 | | if (offset + block->size + XV_ALIGN != PAGE_SIZE) { |
484 | | tmpblock = BLOCK_NEXT(block); |
485 | | set_flag(tmpblock, PREV_FREE); |
486 | | set_blockprev(tmpblock, offset); |
487 | | } |
488 | | |
489 | | put_ptr_atomic(page_start, KM_USER0); |
490 | | spin_unlock(&pool->lock); |
491 | | } |
492 | | |
493 | | u32 xv_get_object_size(void *obj) |
494 | | { |
495 | | struct block_header *blk; |
496 | | |
497 | | blk = (struct block_header *)((char *)(obj) - XV_ALIGN); |
498 | | return blk->size; |
499 | | } |
500 | | |
501 | | /* |
502 | | * Returns total memory used by allocator (userdata + metadata) |
503 | | */ |
504 | | u64 xv_get_total_size_bytes(struct xv_pool *pool) |
505 | | { |
506 | | return pool->total_pages << PAGE_SHIFT; |
507 | | } |
drivers/staging/zram/xvmalloc.c |
| 1 | /* |
| 2 | * xvmalloc memory allocator |
| 3 | * |
| 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
| 5 | * |
| 6 | * This code is released using a dual license strategy: BSD/GPL |
| 7 | * You can choose the licence that better fits your requirements. |
| 8 | * |
| 9 | * Released under the terms of 3-clause BSD License |
| 10 | * Released under the terms of GNU General Public License Version 2.0 |
| 11 | */ |
| 12 | |
| 13 | #include <linux/bitops.h> |
| 14 | #include <linux/errno.h> |
| 15 | #include <linux/highmem.h> |
| 16 | #include <linux/init.h> |
| 17 | #include <linux/string.h> |
| 18 | #include <linux/slab.h> |
| 19 | |
| 20 | #include "xvmalloc.h" |
| 21 | #include "xvmalloc_int.h" |
| 22 | |
| 23 | static void stat_inc(u64 *value) |
| 24 | { |
| 25 | *value = *value + 1; |
| 26 | } |
| 27 | |
| 28 | static void stat_dec(u64 *value) |
| 29 | { |
| 30 | *value = *value - 1; |
| 31 | } |
| 32 | |
| 33 | static int test_flag(struct block_header *block, enum blockflags flag) |
| 34 | { |
| 35 | return block->prev & BIT(flag); |
| 36 | } |
| 37 | |
| 38 | static void set_flag(struct block_header *block, enum blockflags flag) |
| 39 | { |
| 40 | block->prev |= BIT(flag); |
| 41 | } |
| 42 | |
| 43 | static void clear_flag(struct block_header *block, enum blockflags flag) |
| 44 | { |
| 45 | block->prev &= ~BIT(flag); |
| 46 | } |
| 47 | |
| 48 | /* |
| 49 | * Given <page, offset> pair, provide a derefrencable pointer. |
| 50 | * This is called from xv_malloc/xv_free path, so it |
| 51 | * needs to be fast. |
| 52 | */ |
| 53 | static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type) |
| 54 | { |
| 55 | unsigned char *base; |
| 56 | |
| 57 | base = kmap_atomic(page, type); |
| 58 | return base + offset; |
| 59 | } |
| 60 | |
| 61 | static void put_ptr_atomic(void *ptr, enum km_type type) |
| 62 | { |
| 63 | kunmap_atomic(ptr, type); |
| 64 | } |
| 65 | |
| 66 | static u32 get_blockprev(struct block_header *block) |
| 67 | { |
| 68 | return block->prev & PREV_MASK; |
| 69 | } |
| 70 | |
| 71 | static void set_blockprev(struct block_header *block, u16 new_offset) |
| 72 | { |
| 73 | block->prev = new_offset | (block->prev & FLAGS_MASK); |
| 74 | } |
| 75 | |
| 76 | static struct block_header *BLOCK_NEXT(struct block_header *block) |
| 77 | { |
| 78 | return (struct block_header *) |
| 79 | ((char *)block + block->size + XV_ALIGN); |
| 80 | } |
| 81 | |
| 82 | /* |
| 83 | * Get index of free list containing blocks of maximum size |
| 84 | * which is less than or equal to given size. |
| 85 | */ |
| 86 | static u32 get_index_for_insert(u32 size) |
| 87 | { |
| 88 | if (unlikely(size > XV_MAX_ALLOC_SIZE)) |
| 89 | size = XV_MAX_ALLOC_SIZE; |
| 90 | size &= ~FL_DELTA_MASK; |
| 91 | return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * Get index of free list having blocks of size greater than |
| 96 | * or equal to requested size. |
| 97 | */ |
| 98 | static u32 get_index(u32 size) |
| 99 | { |
| 100 | if (unlikely(size < XV_MIN_ALLOC_SIZE)) |
| 101 | size = XV_MIN_ALLOC_SIZE; |
| 102 | size = ALIGN(size, FL_DELTA); |
| 103 | return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; |
| 104 | } |
| 105 | |
| 106 | /** |
| 107 | * find_block - find block of at least given size |
| 108 | * @pool: memory pool to search from |
| 109 | * @size: size of block required |
| 110 | * @page: page containing required block |
| 111 | * @offset: offset within the page where block is located. |
| 112 | * |
| 113 | * Searches two level bitmap to locate block of at least |
| 114 | * the given size. If such a block is found, it provides |
| 115 | * <page, offset> to identify this block and returns index |
| 116 | * in freelist where we found this block. |
| 117 | * Otherwise, returns 0 and <page, offset> params are not touched. |
| 118 | */ |
| 119 | static u32 find_block(struct xv_pool *pool, u32 size, |
| 120 | struct page **page, u32 *offset) |
| 121 | { |
| 122 | ulong flbitmap, slbitmap; |
| 123 | u32 flindex, slindex, slbitstart; |
| 124 | |
| 125 | /* There are no free blocks in this pool */ |
| 126 | if (!pool->flbitmap) |
| 127 | return 0; |
| 128 | |
| 129 | /* Get freelist index correspoding to this size */ |
| 130 | slindex = get_index(size); |
| 131 | slbitmap = pool->slbitmap[slindex / BITS_PER_LONG]; |
| 132 | slbitstart = slindex % BITS_PER_LONG; |
| 133 | |
| 134 | /* |
| 135 | * If freelist is not empty at this index, we found the |
| 136 | * block - head of this list. This is approximate best-fit match. |
| 137 | */ |
| 138 | if (test_bit(slbitstart, &slbitmap)) { |
| 139 | *page = pool->freelist[slindex].page; |
| 140 | *offset = pool->freelist[slindex].offset; |
| 141 | return slindex; |
| 142 | } |
| 143 | |
| 144 | /* |
| 145 | * No best-fit found. Search a bit further in bitmap for a free block. |
| 146 | * Second level bitmap consists of series of 32-bit chunks. Search |
| 147 | * further in the chunk where we expected a best-fit, starting from |
| 148 | * index location found above. |
| 149 | */ |
| 150 | slbitstart++; |
| 151 | slbitmap >>= slbitstart; |
| 152 | |
| 153 | /* Skip this search if we were already at end of this bitmap chunk */ |
| 154 | if ((slbitstart != BITS_PER_LONG) && slbitmap) { |
| 155 | slindex += __ffs(slbitmap) + 1; |
| 156 | *page = pool->freelist[slindex].page; |
| 157 | *offset = pool->freelist[slindex].offset; |
| 158 | return slindex; |
| 159 | } |
| 160 | |
| 161 | /* Now do a full two-level bitmap search to find next nearest fit */ |
| 162 | flindex = slindex / BITS_PER_LONG; |
| 163 | |
| 164 | flbitmap = (pool->flbitmap) >> (flindex + 1); |
| 165 | if (!flbitmap) |
| 166 | return 0; |
| 167 | |
| 168 | flindex += __ffs(flbitmap) + 1; |
| 169 | slbitmap = pool->slbitmap[flindex]; |
| 170 | slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap); |
| 171 | *page = pool->freelist[slindex].page; |
| 172 | *offset = pool->freelist[slindex].offset; |
| 173 | |
| 174 | return slindex; |
| 175 | } |
| 176 | |
| 177 | /* |
| 178 | * Insert block at <page, offset> in freelist of given pool. |
| 179 | * freelist used depends on block size. |
| 180 | */ |
| 181 | static void insert_block(struct xv_pool *pool, struct page *page, u32 offset, |
| 182 | struct block_header *block) |
| 183 | { |
| 184 | u32 flindex, slindex; |
| 185 | struct block_header *nextblock; |
| 186 | |
| 187 | slindex = get_index_for_insert(block->size); |
| 188 | flindex = slindex / BITS_PER_LONG; |
| 189 | |
| 190 | block->link.prev_page = 0; |
| 191 | block->link.prev_offset = 0; |
| 192 | block->link.next_page = pool->freelist[slindex].page; |
| 193 | block->link.next_offset = pool->freelist[slindex].offset; |
| 194 | pool->freelist[slindex].page = page; |
| 195 | pool->freelist[slindex].offset = offset; |
| 196 | |
| 197 | if (block->link.next_page) { |
| 198 | nextblock = get_ptr_atomic(block->link.next_page, |
| 199 | block->link.next_offset, KM_USER1); |
| 200 | nextblock->link.prev_page = page; |
| 201 | nextblock->link.prev_offset = offset; |
| 202 | put_ptr_atomic(nextblock, KM_USER1); |
| 203 | } |
| 204 | |
| 205 | __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); |
| 206 | __set_bit(flindex, &pool->flbitmap); |
| 207 | } |
| 208 | |
| 209 | /* |
| 210 | * Remove block from head of freelist. Index 'slindex' identifies the freelist. |
| 211 | */ |
| 212 | static void remove_block_head(struct xv_pool *pool, |
| 213 | struct block_header *block, u32 slindex) |
| 214 | { |
| 215 | struct block_header *tmpblock; |
| 216 | u32 flindex = slindex / BITS_PER_LONG; |
| 217 | |
| 218 | pool->freelist[slindex].page = block->link.next_page; |
| 219 | pool->freelist[slindex].offset = block->link.next_offset; |
| 220 | block->link.prev_page = 0; |
| 221 | block->link.prev_offset = 0; |
| 222 | |
| 223 | if (!pool->freelist[slindex].page) { |
| 224 | __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); |
| 225 | if (!pool->slbitmap[flindex]) |
| 226 | __clear_bit(flindex, &pool->flbitmap); |
| 227 | } else { |
| 228 | /* |
| 229 | * DEBUG ONLY: We need not reinitialize freelist head previous |
| 230 | * pointer to 0 - we never depend on its value. But just for |
| 231 | * sanity, lets do it. |
| 232 | */ |
| 233 | tmpblock = get_ptr_atomic(pool->freelist[slindex].page, |
| 234 | pool->freelist[slindex].offset, KM_USER1); |
| 235 | tmpblock->link.prev_page = 0; |
| 236 | tmpblock->link.prev_offset = 0; |
| 237 | put_ptr_atomic(tmpblock, KM_USER1); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /* |
| 242 | * Remove block from freelist. Index 'slindex' identifies the freelist. |
| 243 | */ |
| 244 | static void remove_block(struct xv_pool *pool, struct page *page, u32 offset, |
| 245 | struct block_header *block, u32 slindex) |
| 246 | { |
| 247 | u32 flindex; |
| 248 | struct block_header *tmpblock; |
| 249 | |
| 250 | if (pool->freelist[slindex].page == page |
| 251 | && pool->freelist[slindex].offset == offset) { |
| 252 | remove_block_head(pool, block, slindex); |
| 253 | return; |
| 254 | } |
| 255 | |
| 256 | flindex = slindex / BITS_PER_LONG; |
| 257 | |
| 258 | if (block->link.prev_page) { |
| 259 | tmpblock = get_ptr_atomic(block->link.prev_page, |
| 260 | block->link.prev_offset, KM_USER1); |
| 261 | tmpblock->link.next_page = block->link.next_page; |
| 262 | tmpblock->link.next_offset = block->link.next_offset; |
| 263 | put_ptr_atomic(tmpblock, KM_USER1); |
| 264 | } |
| 265 | |
| 266 | if (block->link.next_page) { |
| 267 | tmpblock = get_ptr_atomic(block->link.next_page, |
| 268 | block->link.next_offset, KM_USER1); |
| 269 | tmpblock->link.prev_page = block->link.prev_page; |
| 270 | tmpblock->link.prev_offset = block->link.prev_offset; |
| 271 | put_ptr_atomic(tmpblock, KM_USER1); |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | /* |
| 276 | * Allocate a page and add it to freelist of given pool. |
| 277 | */ |
| 278 | static int grow_pool(struct xv_pool *pool, gfp_t flags) |
| 279 | { |
| 280 | struct page *page; |
| 281 | struct block_header *block; |
| 282 | |
| 283 | page = alloc_page(flags); |
| 284 | if (unlikely(!page)) |
| 285 | return -ENOMEM; |
| 286 | |
| 287 | stat_inc(&pool->total_pages); |
| 288 | |
| 289 | spin_lock(&pool->lock); |
| 290 | block = get_ptr_atomic(page, 0, KM_USER0); |
| 291 | |
| 292 | block->size = PAGE_SIZE - XV_ALIGN; |
| 293 | set_flag(block, BLOCK_FREE); |
| 294 | clear_flag(block, PREV_FREE); |
| 295 | set_blockprev(block, 0); |
| 296 | |
| 297 | insert_block(pool, page, 0, block); |
| 298 | |
| 299 | put_ptr_atomic(block, KM_USER0); |
| 300 | spin_unlock(&pool->lock); |
| 301 | |
| 302 | return 0; |
| 303 | } |
| 304 | |
| 305 | /* |
| 306 | * Create a memory pool. Allocates freelist, bitmaps and other |
| 307 | * per-pool metadata. |
| 308 | */ |
| 309 | struct xv_pool *xv_create_pool(void) |
| 310 | { |
| 311 | u32 ovhd_size; |
| 312 | struct xv_pool *pool; |
| 313 | |
| 314 | ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); |
| 315 | pool = kzalloc(ovhd_size, GFP_KERNEL); |
| 316 | if (!pool) |
| 317 | return NULL; |
| 318 | |
| 319 | spin_lock_init(&pool->lock); |
| 320 | |
| 321 | return pool; |
| 322 | } |
| 323 | |
| 324 | void xv_destroy_pool(struct xv_pool *pool) |
| 325 | { |
| 326 | kfree(pool); |
| 327 | } |
| 328 | |
| 329 | /** |
| 330 | * xv_malloc - Allocate block of given size from pool. |
| 331 | * @pool: pool to allocate from |
| 332 | * @size: size of block to allocate |
| 333 | * @page: page no. that holds the object |
| 334 | * @offset: location of object within page |
| 335 | * |
| 336 | * On success, <page, offset> identifies block allocated |
| 337 | * and 0 is returned. On failure, <page, offset> is set to |
| 338 | * 0 and -ENOMEM is returned. |
| 339 | * |
| 340 | * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail. |
| 341 | */ |
| 342 | int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, |
| 343 | u32 *offset, gfp_t flags) |
| 344 | { |
| 345 | int error; |
| 346 | u32 index, tmpsize, origsize, tmpoffset; |
| 347 | struct block_header *block, *tmpblock; |
| 348 | |
| 349 | *page = NULL; |
| 350 | *offset = 0; |
| 351 | origsize = size; |
| 352 | |
| 353 | if (unlikely(!size || size > XV_MAX_ALLOC_SIZE)) |
| 354 | return -ENOMEM; |
| 355 | |
| 356 | size = ALIGN(size, XV_ALIGN); |
| 357 | |
| 358 | spin_lock(&pool->lock); |
| 359 | |
| 360 | index = find_block(pool, size, page, offset); |
| 361 | |
| 362 | if (!*page) { |
| 363 | spin_unlock(&pool->lock); |
| 364 | if (flags & GFP_NOWAIT) |
| 365 | return -ENOMEM; |
| 366 | error = grow_pool(pool, flags); |
| 367 | if (unlikely(error)) |
| 368 | return error; |
| 369 | |
| 370 | spin_lock(&pool->lock); |
| 371 | index = find_block(pool, size, page, offset); |
| 372 | } |
| 373 | |
| 374 | if (!*page) { |
| 375 | spin_unlock(&pool->lock); |
| 376 | return -ENOMEM; |
| 377 | } |
| 378 | |
| 379 | block = get_ptr_atomic(*page, *offset, KM_USER0); |
| 380 | |
| 381 | remove_block_head(pool, block, index); |
| 382 | |
| 383 | /* Split the block if required */ |
| 384 | tmpoffset = *offset + size + XV_ALIGN; |
| 385 | tmpsize = block->size - size; |
| 386 | tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN); |
| 387 | if (tmpsize) { |
| 388 | tmpblock->size = tmpsize - XV_ALIGN; |
| 389 | set_flag(tmpblock, BLOCK_FREE); |
| 390 | clear_flag(tmpblock, PREV_FREE); |
| 391 | |
| 392 | set_blockprev(tmpblock, *offset); |
| 393 | if (tmpblock->size >= XV_MIN_ALLOC_SIZE) |
| 394 | insert_block(pool, *page, tmpoffset, tmpblock); |
| 395 | |
| 396 | if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) { |
| 397 | tmpblock = BLOCK_NEXT(tmpblock); |
| 398 | set_blockprev(tmpblock, tmpoffset); |
| 399 | } |
| 400 | } else { |
| 401 | /* This block is exact fit */ |
| 402 | if (tmpoffset != PAGE_SIZE) |
| 403 | clear_flag(tmpblock, PREV_FREE); |
| 404 | } |
| 405 | |
| 406 | block->size = origsize; |
| 407 | clear_flag(block, BLOCK_FREE); |
| 408 | |
| 409 | put_ptr_atomic(block, KM_USER0); |
| 410 | spin_unlock(&pool->lock); |
| 411 | |
| 412 | *offset += XV_ALIGN; |
| 413 | |
| 414 | return 0; |
| 415 | } |
| 416 | |
| 417 | /* |
| 418 | * Free block identified with <page, offset> |
| 419 | */ |
| 420 | void xv_free(struct xv_pool *pool, struct page *page, u32 offset) |
| 421 | { |
| 422 | void *page_start; |
| 423 | struct block_header *block, *tmpblock; |
| 424 | |
| 425 | offset -= XV_ALIGN; |
| 426 | |
| 427 | spin_lock(&pool->lock); |
| 428 | |
| 429 | page_start = get_ptr_atomic(page, 0, KM_USER0); |
| 430 | block = (struct block_header *)((char *)page_start + offset); |
| 431 | |
| 432 | /* Catch double free bugs */ |
| 433 | BUG_ON(test_flag(block, BLOCK_FREE)); |
| 434 | |
| 435 | block->size = ALIGN(block->size, XV_ALIGN); |
| 436 | |
| 437 | tmpblock = BLOCK_NEXT(block); |
| 438 | if (offset + block->size + XV_ALIGN == PAGE_SIZE) |
| 439 | tmpblock = NULL; |
| 440 | |
| 441 | /* Merge next block if its free */ |
| 442 | if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) { |
| 443 | /* |
| 444 | * Blocks smaller than XV_MIN_ALLOC_SIZE |
| 445 | * are not inserted in any free list. |
| 446 | */ |
| 447 | if (tmpblock->size >= XV_MIN_ALLOC_SIZE) { |
| 448 | remove_block(pool, page, |
| 449 | offset + block->size + XV_ALIGN, tmpblock, |
| 450 | get_index_for_insert(tmpblock->size)); |
| 451 | } |
| 452 | block->size += tmpblock->size + XV_ALIGN; |
| 453 | } |
| 454 | |
| 455 | /* Merge previous block if its free */ |
| 456 | if (test_flag(block, PREV_FREE)) { |
| 457 | tmpblock = (struct block_header *)((char *)(page_start) + |
| 458 | get_blockprev(block)); |
| 459 | offset = offset - tmpblock->size - XV_ALIGN; |
| 460 | |
| 461 | if (tmpblock->size >= XV_MIN_ALLOC_SIZE) |
| 462 | remove_block(pool, page, offset, tmpblock, |
| 463 | get_index_for_insert(tmpblock->size)); |
| 464 | |
| 465 | tmpblock->size += block->size + XV_ALIGN; |
| 466 | block = tmpblock; |
| 467 | } |
| 468 | |
| 469 | /* No used objects in this page. Free it. */ |
| 470 | if (block->size == PAGE_SIZE - XV_ALIGN) { |
| 471 | put_ptr_atomic(page_start, KM_USER0); |
| 472 | spin_unlock(&pool->lock); |
| 473 | |
| 474 | __free_page(page); |
| 475 | stat_dec(&pool->total_pages); |
| 476 | return; |
| 477 | } |
| 478 | |
| 479 | set_flag(block, BLOCK_FREE); |
| 480 | if (block->size >= XV_MIN_ALLOC_SIZE) |
| 481 | insert_block(pool, page, offset, block); |
| 482 | |
| 483 | if (offset + block->size + XV_ALIGN != PAGE_SIZE) { |
| 484 | tmpblock = BLOCK_NEXT(block); |
| 485 | set_flag(tmpblock, PREV_FREE); |
| 486 | set_blockprev(tmpblock, offset); |
| 487 | } |
| 488 | |
| 489 | put_ptr_atomic(page_start, KM_USER0); |
| 490 | spin_unlock(&pool->lock); |
| 491 | } |
| 492 | |
| 493 | u32 xv_get_object_size(void *obj) |
| 494 | { |
| 495 | struct block_header *blk; |
| 496 | |
| 497 | blk = (struct block_header *)((char *)(obj) - XV_ALIGN); |
| 498 | return blk->size; |
| 499 | } |
| 500 | |
| 501 | /* |
| 502 | * Returns total memory used by allocator (userdata + metadata) |
| 503 | */ |
| 504 | u64 xv_get_total_size_bytes(struct xv_pool *pool) |
| 505 | { |
| 506 | return pool->total_pages << PAGE_SHIFT; |
| 507 | } |
drivers/staging/zram/zram_drv.c |
| 1 | /* |
| 2 | * Compressed RAM based swap device |
| 3 | * |
| 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
| 5 | * |
| 6 | * This code is released using a dual license strategy: BSD/GPL |
| 7 | * You can choose the licence that better fits your requirements. |
| 8 | * |
| 9 | * Released under the terms of 3-clause BSD License |
| 10 | * Released under the terms of GNU General Public License Version 2.0 |
| 11 | * |
| 12 | * Project home: http://compcache.googlecode.com |
| 13 | */ |
| 14 | |
| 15 | #define KMSG_COMPONENT "ramzswap" |
| 16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
| 17 | |
| 18 | #include <linux/module.h> |
| 19 | #include <linux/kernel.h> |
| 20 | #include <linux/bitops.h> |
| 21 | #include <linux/blkdev.h> |
| 22 | #include <linux/buffer_head.h> |
| 23 | #include <linux/device.h> |
| 24 | #include <linux/genhd.h> |
| 25 | #include <linux/highmem.h> |
| 26 | #include <linux/slab.h> |
| 27 | #include <linux/lzo.h> |
| 28 | #include <linux/string.h> |
| 29 | #include <linux/swap.h> |
| 30 | #include <linux/swapops.h> |
| 31 | #include <linux/vmalloc.h> |
| 32 | |
| 33 | #include "zram_drv.h" |
| 34 | |
| 35 | /* Globals */ |
| 36 | static int ramzswap_major; |
| 37 | static struct ramzswap *devices; |
| 38 | |
| 39 | /* Module params (documentation at end) */ |
| 40 | static unsigned int num_devices; |
| 41 | |
| 42 | static int rzs_test_flag(struct ramzswap *rzs, u32 index, |
| 43 | enum rzs_pageflags flag) |
| 44 | { |
| 45 | return rzs->table[index].flags & BIT(flag); |
| 46 | } |
| 47 | |
| 48 | static void rzs_set_flag(struct ramzswap *rzs, u32 index, |
| 49 | enum rzs_pageflags flag) |
| 50 | { |
| 51 | rzs->table[index].flags |= BIT(flag); |
| 52 | } |
| 53 | |
| 54 | static void rzs_clear_flag(struct ramzswap *rzs, u32 index, |
| 55 | enum rzs_pageflags flag) |
| 56 | { |
| 57 | rzs->table[index].flags &= ~BIT(flag); |
| 58 | } |
| 59 | |
| 60 | static int page_zero_filled(void *ptr) |
| 61 | { |
| 62 | unsigned int pos; |
| 63 | unsigned long *page; |
| 64 | |
| 65 | page = (unsigned long *)ptr; |
| 66 | |
| 67 | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { |
| 68 | if (page[pos]) |
| 69 | return 0; |
| 70 | } |
| 71 | |
| 72 | return 1; |
| 73 | } |
| 74 | |
| 75 | static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes) |
| 76 | { |
| 77 | if (!rzs->disksize) { |
| 78 | pr_info( |
| 79 | "disk size not provided. You can use disksize_kb module " |
| 80 | "param to specify size.\nUsing default: (%u%% of RAM).\n", |
| 81 | default_disksize_perc_ram |
| 82 | ); |
| 83 | rzs->disksize = default_disksize_perc_ram * |
| 84 | (totalram_bytes / 100); |
| 85 | } |
| 86 | |
| 87 | if (rzs->disksize > 2 * (totalram_bytes)) { |
| 88 | pr_info( |
| 89 | "There is little point creating a ramzswap of greater than " |
| 90 | "twice the size of memory since we expect a 2:1 compression " |
| 91 | "ratio. Note that ramzswap uses about 0.1%% of the size of " |
| 92 | "the swap device when not in use so a huge ramzswap is " |
| 93 | "wasteful.\n" |
| 94 | "\tMemory Size: %zu kB\n" |
| 95 | "\tSize you selected: %zu kB\n" |
| 96 | "Continuing anyway ...\n", |
| 97 | totalram_bytes >> 10, rzs->disksize |
| 98 | ); |
| 99 | } |
| 100 | |
| 101 | rzs->disksize &= PAGE_MASK; |
| 102 | } |
| 103 | |
| 104 | static void ramzswap_ioctl_get_stats(struct ramzswap *rzs, |
| 105 | struct ramzswap_ioctl_stats *s) |
| 106 | { |
| 107 | s->disksize = rzs->disksize; |
| 108 | |
| 109 | #if defined(CONFIG_RAMZSWAP_STATS) |
| 110 | { |
| 111 | struct ramzswap_stats *rs = &rzs->stats; |
| 112 | size_t succ_writes, mem_used; |
| 113 | unsigned int good_compress_perc = 0, no_compress_perc = 0; |
| 114 | |
| 115 | mem_used = xv_get_total_size_bytes(rzs->mem_pool) |
| 116 | + (rs->pages_expand << PAGE_SHIFT); |
| 117 | succ_writes = rzs_stat64_read(rzs, &rs->num_writes) - |
| 118 | rzs_stat64_read(rzs, &rs->failed_writes); |
| 119 | |
| 120 | if (succ_writes && rs->pages_stored) { |
| 121 | good_compress_perc = rs->good_compress * 100 |
| 122 | / rs->pages_stored; |
| 123 | no_compress_perc = rs->pages_expand * 100 |
| 124 | / rs->pages_stored; |
| 125 | } |
| 126 | |
| 127 | s->num_reads = rzs_stat64_read(rzs, &rs->num_reads); |
| 128 | s->num_writes = rzs_stat64_read(rzs, &rs->num_writes); |
| 129 | s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads); |
| 130 | s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes); |
| 131 | s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io); |
| 132 | s->notify_free = rzs_stat64_read(rzs, &rs->notify_free); |
| 133 | s->pages_zero = rs->pages_zero; |
| 134 | |
| 135 | s->good_compress_pct = good_compress_perc; |
| 136 | s->pages_expand_pct = no_compress_perc; |
| 137 | |
| 138 | s->pages_stored = rs->pages_stored; |
| 139 | s->pages_used = mem_used >> PAGE_SHIFT; |
| 140 | s->orig_data_size = rs->pages_stored << PAGE_SHIFT; |
| 141 | s->compr_data_size = rs->compr_size; |
| 142 | s->mem_used_total = mem_used; |
| 143 | } |
| 144 | #endif /* CONFIG_RAMZSWAP_STATS */ |
| 145 | } |
| 146 | |
| 147 | static void ramzswap_free_page(struct ramzswap *rzs, size_t index) |
| 148 | { |
| 149 | u32 clen; |
| 150 | void *obj; |
| 151 | |
| 152 | struct page *page = rzs->table[index].page; |
| 153 | u32 offset = rzs->table[index].offset; |
| 154 | |
| 155 | if (unlikely(!page)) { |
| 156 | /* |
| 157 | * No memory is allocated for zero filled pages. |
| 158 | * Simply clear zero page flag. |
| 159 | */ |
| 160 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
| 161 | rzs_clear_flag(rzs, index, RZS_ZERO); |
| 162 | rzs_stat_dec(&rzs->stats.pages_zero); |
| 163 | } |
| 164 | return; |
| 165 | } |
| 166 | |
| 167 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { |
| 168 | clen = PAGE_SIZE; |
| 169 | __free_page(page); |
| 170 | rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED); |
| 171 | rzs_stat_dec(&rzs->stats.pages_expand); |
| 172 | goto out; |
| 173 | } |
| 174 | |
| 175 | obj = kmap_atomic(page, KM_USER0) + offset; |
| 176 | clen = xv_get_object_size(obj) - sizeof(struct zobj_header); |
| 177 | kunmap_atomic(obj, KM_USER0); |
| 178 | |
| 179 | xv_free(rzs->mem_pool, page, offset); |
| 180 | if (clen <= PAGE_SIZE / 2) |
| 181 | rzs_stat_dec(&rzs->stats.good_compress); |
| 182 | |
| 183 | out: |
| 184 | rzs->stats.compr_size -= clen; |
| 185 | rzs_stat_dec(&rzs->stats.pages_stored); |
| 186 | |
| 187 | rzs->table[index].page = NULL; |
| 188 | rzs->table[index].offset = 0; |
| 189 | } |
| 190 | |
| 191 | static void handle_zero_page(struct page *page) |
| 192 | { |
| 193 | void *user_mem; |
| 194 | |
| 195 | user_mem = kmap_atomic(page, KM_USER0); |
| 196 | memset(user_mem, 0, PAGE_SIZE); |
| 197 | kunmap_atomic(user_mem, KM_USER0); |
| 198 | |
| 199 | flush_dcache_page(page); |
| 200 | } |
| 201 | |
| 202 | static void handle_uncompressed_page(struct ramzswap *rzs, |
| 203 | struct page *page, u32 index) |
| 204 | { |
| 205 | unsigned char *user_mem, *cmem; |
| 206 | |
| 207 | user_mem = kmap_atomic(page, KM_USER0); |
| 208 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
| 209 | rzs->table[index].offset; |
| 210 | |
| 211 | memcpy(user_mem, cmem, PAGE_SIZE); |
| 212 | kunmap_atomic(user_mem, KM_USER0); |
| 213 | kunmap_atomic(cmem, KM_USER1); |
| 214 | |
| 215 | flush_dcache_page(page); |
| 216 | } |
| 217 | |
| 218 | static int ramzswap_read(struct ramzswap *rzs, struct bio *bio) |
| 219 | { |
| 220 | |
| 221 | int i; |
| 222 | u32 index; |
| 223 | struct bio_vec *bvec; |
| 224 | |
| 225 | rzs_stat64_inc(rzs, &rzs->stats.num_reads); |
| 226 | |
| 227 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; |
| 228 | bio_for_each_segment(bvec, bio, i) { |
| 229 | int ret; |
| 230 | size_t clen; |
| 231 | struct page *page; |
| 232 | struct zobj_header *zheader; |
| 233 | unsigned char *user_mem, *cmem; |
| 234 | |
| 235 | page = bvec->bv_page; |
| 236 | |
| 237 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
| 238 | handle_zero_page(page); |
| 239 | continue; |
| 240 | } |
| 241 | |
| 242 | /* Requested page is not present in compressed area */ |
| 243 | if (unlikely(!rzs->table[index].page)) { |
| 244 | pr_debug("Read before write: sector=%lu, size=%u", |
| 245 | (ulong)(bio->bi_sector), bio->bi_size); |
| 246 | /* Do nothing */ |
| 247 | continue; |
| 248 | } |
| 249 | |
| 250 | /* Page is stored uncompressed since it's incompressible */ |
| 251 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { |
| 252 | handle_uncompressed_page(rzs, page, index); |
| 253 | continue; |
| 254 | } |
| 255 | |
| 256 | user_mem = kmap_atomic(page, KM_USER0); |
| 257 | clen = PAGE_SIZE; |
| 258 | |
| 259 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
| 260 | rzs->table[index].offset; |
| 261 | |
| 262 | ret = lzo1x_decompress_safe( |
| 263 | cmem + sizeof(*zheader), |
| 264 | xv_get_object_size(cmem) - sizeof(*zheader), |
| 265 | user_mem, &clen); |
| 266 | |
| 267 | kunmap_atomic(user_mem, KM_USER0); |
| 268 | kunmap_atomic(cmem, KM_USER1); |
| 269 | |
| 270 | /* Should NEVER happen. Return bio error if it does. */ |
| 271 | if (unlikely(ret != LZO_E_OK)) { |
| 272 | pr_err("Decompression failed! err=%d, page=%u\n", |
| 273 | ret, index); |
| 274 | rzs_stat64_inc(rzs, &rzs->stats.failed_reads); |
| 275 | goto out; |
| 276 | } |
| 277 | |
| 278 | flush_dcache_page(page); |
| 279 | index++; |
| 280 | } |
| 281 | |
| 282 | set_bit(BIO_UPTODATE, &bio->bi_flags); |
| 283 | bio_endio(bio, 0); |
| 284 | return 0; |
| 285 | |
| 286 | out: |
| 287 | bio_io_error(bio); |
| 288 | return 0; |
| 289 | } |
| 290 | |
| 291 | static int ramzswap_write(struct ramzswap *rzs, struct bio *bio) |
| 292 | { |
| 293 | int i; |
| 294 | u32 index; |
| 295 | struct bio_vec *bvec; |
| 296 | |
| 297 | rzs_stat64_inc(rzs, &rzs->stats.num_writes); |
| 298 | |
| 299 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; |
| 300 | |
| 301 | bio_for_each_segment(bvec, bio, i) { |
| 302 | int ret; |
| 303 | u32 offset; |
| 304 | size_t clen; |
| 305 | struct zobj_header *zheader; |
| 306 | struct page *page, *page_store; |
| 307 | unsigned char *user_mem, *cmem, *src; |
| 308 | |
| 309 | page = bvec->bv_page; |
| 310 | src = rzs->compress_buffer; |
| 311 | |
| 312 | /* |
| 313 | * System overwrites unused sectors. Free memory associated |
| 314 | * with this sector now. |
| 315 | */ |
| 316 | if (rzs->table[index].page || |
| 317 | rzs_test_flag(rzs, index, RZS_ZERO)) |
| 318 | ramzswap_free_page(rzs, index); |
| 319 | |
| 320 | mutex_lock(&rzs->lock); |
| 321 | |
| 322 | user_mem = kmap_atomic(page, KM_USER0); |
| 323 | if (page_zero_filled(user_mem)) { |
| 324 | kunmap_atomic(user_mem, KM_USER0); |
| 325 | mutex_unlock(&rzs->lock); |
| 326 | rzs_stat_inc(&rzs->stats.pages_zero); |
| 327 | rzs_set_flag(rzs, index, RZS_ZERO); |
| 328 | continue; |
| 329 | } |
| 330 | |
| 331 | ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, |
| 332 | rzs->compress_workmem); |
| 333 | |
| 334 | kunmap_atomic(user_mem, KM_USER0); |
| 335 | |
| 336 | if (unlikely(ret != LZO_E_OK)) { |
| 337 | mutex_unlock(&rzs->lock); |
| 338 | pr_err("Compression failed! err=%d\n", ret); |
| 339 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
| 340 | goto out; |
| 341 | } |
| 342 | |
| 343 | /* |
| 344 | * Page is incompressible. Store it as-is (uncompressed) |
| 345 | * since we do not want to return too many swap write |
| 346 | * errors which has side effect of hanging the system. |
| 347 | */ |
| 348 | if (unlikely(clen > max_zpage_size)) { |
| 349 | clen = PAGE_SIZE; |
| 350 | page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); |
| 351 | if (unlikely(!page_store)) { |
| 352 | mutex_unlock(&rzs->lock); |
| 353 | pr_info("Error allocating memory for " |
| 354 | "incompressible page: %u\n", index); |
| 355 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
| 356 | goto out; |
| 357 | } |
| 358 | |
| 359 | offset = 0; |
| 360 | rzs_set_flag(rzs, index, RZS_UNCOMPRESSED); |
| 361 | rzs_stat_inc(&rzs->stats.pages_expand); |
| 362 | rzs->table[index].page = page_store; |
| 363 | src = kmap_atomic(page, KM_USER0); |
| 364 | goto memstore; |
| 365 | } |
| 366 | |
| 367 | if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader), |
| 368 | &rzs->table[index].page, &offset, |
| 369 | GFP_NOIO | __GFP_HIGHMEM)) { |
| 370 | mutex_unlock(&rzs->lock); |
| 371 | pr_info("Error allocating memory for compressed " |
| 372 | "page: %u, size=%zu\n", index, clen); |
| 373 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
| 374 | goto out; |
| 375 | } |
| 376 | |
| 377 | memstore: |
| 378 | rzs->table[index].offset = offset; |
| 379 | |
| 380 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
| 381 | rzs->table[index].offset; |
| 382 | |
| 383 | #if 0 |
| 384 | /* Back-reference needed for memory defragmentation */ |
| 385 | if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) { |
| 386 | zheader = (struct zobj_header *)cmem; |
| 387 | zheader->table_idx = index; |
| 388 | cmem += sizeof(*zheader); |
| 389 | } |
| 390 | #endif |
| 391 | |
| 392 | memcpy(cmem, src, clen); |
| 393 | |
| 394 | kunmap_atomic(cmem, KM_USER1); |
| 395 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) |
| 396 | kunmap_atomic(src, KM_USER0); |
| 397 | |
| 398 | /* Update stats */ |
| 399 | rzs->stats.compr_size += clen; |
| 400 | rzs_stat_inc(&rzs->stats.pages_stored); |
| 401 | if (clen <= PAGE_SIZE / 2) |
| 402 | rzs_stat_inc(&rzs->stats.good_compress); |
| 403 | |
| 404 | mutex_unlock(&rzs->lock); |
| 405 | index++; |
| 406 | } |
| 407 | |
| 408 | set_bit(BIO_UPTODATE, &bio->bi_flags); |
| 409 | bio_endio(bio, 0); |
| 410 | return 0; |
| 411 | |
| 412 | out: |
| 413 | bio_io_error(bio); |
| 414 | return 0; |
| 415 | } |
| 416 | |
| 417 | /* |
| 418 | * Check if request is within bounds and page aligned. |
| 419 | */ |
| 420 | static inline int valid_io_request(struct ramzswap *rzs, struct bio *bio) |
| 421 | { |
| 422 | if (unlikely( |
| 423 | (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) || |
| 424 | (bio->bi_sector & (SECTORS_PER_PAGE - 1)) || |
| 425 | (bio->bi_size & (PAGE_SIZE - 1)))) { |
| 426 | |
| 427 | return 0; |
| 428 | } |
| 429 | |
| 430 | /* I/O request is valid */ |
| 431 | return 1; |
| 432 | } |
| 433 | |
| 434 | /* |
| 435 | * Handler function for all ramzswap I/O requests. |
| 436 | */ |
| 437 | static int ramzswap_make_request(struct request_queue *queue, struct bio *bio) |
| 438 | { |
| 439 | int ret = 0; |
| 440 | struct ramzswap *rzs = queue->queuedata; |
| 441 | |
| 442 | if (unlikely(!rzs->init_done)) { |
| 443 | bio_io_error(bio); |
| 444 | return 0; |
| 445 | } |
| 446 | |
| 447 | if (!valid_io_request(rzs, bio)) { |
| 448 | rzs_stat64_inc(rzs, &rzs->stats.invalid_io); |
| 449 | bio_io_error(bio); |
| 450 | return 0; |
| 451 | } |
| 452 | |
| 453 | switch (bio_data_dir(bio)) { |
| 454 | case READ: |
| 455 | ret = ramzswap_read(rzs, bio); |
| 456 | break; |
| 457 | |
| 458 | case WRITE: |
| 459 | ret = ramzswap_write(rzs, bio); |
| 460 | break; |
| 461 | } |
| 462 | |
| 463 | return ret; |
| 464 | } |
| 465 | |
| 466 | static void reset_device(struct ramzswap *rzs) |
| 467 | { |
| 468 | size_t index; |
| 469 | |
| 470 | /* Do not accept any new I/O request */ |
| 471 | rzs->init_done = 0; |
| 472 | |
| 473 | /* Free various per-device buffers */ |
| 474 | kfree(rzs->compress_workmem); |
| 475 | free_pages((unsigned long)rzs->compress_buffer, 1); |
| 476 | |
| 477 | rzs->compress_workmem = NULL; |
| 478 | rzs->compress_buffer = NULL; |
| 479 | |
| 480 | /* Free all pages that are still in this ramzswap device */ |
| 481 | for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) { |
| 482 | struct page *page; |
| 483 | u16 offset; |
| 484 | |
| 485 | page = rzs->table[index].page; |
| 486 | offset = rzs->table[index].offset; |
| 487 | |
| 488 | if (!page) |
| 489 | continue; |
| 490 | |
| 491 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) |
| 492 | __free_page(page); |
| 493 | else |
| 494 | xv_free(rzs->mem_pool, page, offset); |
| 495 | } |
| 496 | |
| 497 | vfree(rzs->table); |
| 498 | rzs->table = NULL; |
| 499 | |
| 500 | xv_destroy_pool(rzs->mem_pool); |
| 501 | rzs->mem_pool = NULL; |
| 502 | |
| 503 | /* Reset stats */ |
| 504 | memset(&rzs->stats, 0, sizeof(rzs->stats)); |
| 505 | |
| 506 | rzs->disksize = 0; |
| 507 | } |
| 508 | |
| 509 | static int ramzswap_ioctl_init_device(struct ramzswap *rzs) |
| 510 | { |
| 511 | int ret; |
| 512 | size_t num_pages; |
| 513 | |
| 514 | if (rzs->init_done) { |
| 515 | pr_info("Device already initialized!\n"); |
| 516 | return -EBUSY; |
| 517 | } |
| 518 | |
| 519 | ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT); |
| 520 | |
| 521 | rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); |
| 522 | if (!rzs->compress_workmem) { |
| 523 | pr_err("Error allocating compressor working memory!\n"); |
| 524 | ret = -ENOMEM; |
| 525 | goto fail; |
| 526 | } |
| 527 | |
| 528 | rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); |
| 529 | if (!rzs->compress_buffer) { |
| 530 | pr_err("Error allocating compressor buffer space\n"); |
| 531 | ret = -ENOMEM; |
| 532 | goto fail; |
| 533 | } |
| 534 | |
| 535 | num_pages = rzs->disksize >> PAGE_SHIFT; |
| 536 | rzs->table = vmalloc(num_pages * sizeof(*rzs->table)); |
| 537 | if (!rzs->table) { |
| 538 | pr_err("Error allocating ramzswap address table\n"); |
| 539 | /* To prevent accessing table entries during cleanup */ |
| 540 | rzs->disksize = 0; |
| 541 | ret = -ENOMEM; |
| 542 | goto fail; |
| 543 | } |
| 544 | memset(rzs->table, 0, num_pages * sizeof(*rzs->table)); |
| 545 | |
| 546 | set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT); |
| 547 | |
| 548 | /* ramzswap devices sort of resembles non-rotational disks */ |
| 549 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue); |
| 550 | |
| 551 | rzs->mem_pool = xv_create_pool(); |
| 552 | if (!rzs->mem_pool) { |
| 553 | pr_err("Error creating memory pool\n"); |
| 554 | ret = -ENOMEM; |
| 555 | goto fail; |
| 556 | } |
| 557 | |
| 558 | rzs->init_done = 1; |
| 559 | |
| 560 | pr_debug("Initialization done!\n"); |
| 561 | return 0; |
| 562 | |
| 563 | fail: |
| 564 | reset_device(rzs); |
| 565 | |
| 566 | pr_err("Initialization failed: err=%d\n", ret); |
| 567 | return ret; |
| 568 | } |
| 569 | |
| 570 | static int ramzswap_ioctl_reset_device(struct ramzswap *rzs) |
| 571 | { |
| 572 | if (rzs->init_done) |
| 573 | reset_device(rzs); |
| 574 | |
| 575 | return 0; |
| 576 | } |
| 577 | |
| 578 | static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode, |
| 579 | unsigned int cmd, unsigned long arg) |
| 580 | { |
| 581 | int ret = 0; |
| 582 | size_t disksize_kb; |
| 583 | |
| 584 | struct ramzswap *rzs = bdev->bd_disk->private_data; |
| 585 | |
| 586 | switch (cmd) { |
| 587 | case RZSIO_SET_DISKSIZE_KB: |
| 588 | if (rzs->init_done) { |
| 589 | ret = -EBUSY; |
| 590 | goto out; |
| 591 | } |
| 592 | if (copy_from_user(&disksize_kb, (void *)arg, |
| 593 | _IOC_SIZE(cmd))) { |
| 594 | ret = -EFAULT; |
| 595 | goto out; |
| 596 | } |
| 597 | rzs->disksize = disksize_kb << 10; |
| 598 | pr_info("Disk size set to %zu kB\n", disksize_kb); |
| 599 | break; |
| 600 | |
| 601 | case RZSIO_GET_STATS: |
| 602 | { |
| 603 | struct ramzswap_ioctl_stats *stats; |
| 604 | if (!rzs->init_done) { |
| 605 | ret = -ENOTTY; |
| 606 | goto out; |
| 607 | } |
| 608 | stats = kzalloc(sizeof(*stats), GFP_KERNEL); |
| 609 | if (!stats) { |
| 610 | ret = -ENOMEM; |
| 611 | goto out; |
| 612 | } |
| 613 | ramzswap_ioctl_get_stats(rzs, stats); |
| 614 | if (copy_to_user((void *)arg, stats, sizeof(*stats))) { |
| 615 | kfree(stats); |
| 616 | ret = -EFAULT; |
| 617 | goto out; |
| 618 | } |
| 619 | kfree(stats); |
| 620 | break; |
| 621 | } |
| 622 | case RZSIO_INIT: |
| 623 | ret = ramzswap_ioctl_init_device(rzs); |
| 624 | break; |
| 625 | |
| 626 | case RZSIO_RESET: |
| 627 | /* Do not reset an active device! */ |
| 628 | if (bdev->bd_holders) { |
| 629 | ret = -EBUSY; |
| 630 | goto out; |
| 631 | } |
| 632 | |
| 633 | /* Make sure all pending I/O is finished */ |
| 634 | if (bdev) |
| 635 | fsync_bdev(bdev); |
| 636 | |
| 637 | ret = ramzswap_ioctl_reset_device(rzs); |
| 638 | break; |
| 639 | |
| 640 | default: |
| 641 | pr_info("Invalid ioctl %u\n", cmd); |
| 642 | ret = -ENOTTY; |
| 643 | } |
| 644 | |
| 645 | out: |
| 646 | return ret; |
| 647 | } |
| 648 | |
| 649 | void ramzswap_slot_free_notify(struct block_device *bdev, unsigned long index) |
| 650 | { |
| 651 | struct ramzswap *rzs; |
| 652 | |
| 653 | rzs = bdev->bd_disk->private_data; |
| 654 | ramzswap_free_page(rzs, index); |
| 655 | rzs_stat64_inc(rzs, &rzs->stats.notify_free); |
| 656 | } |
| 657 | |
| 658 | static const struct block_device_operations ramzswap_devops = { |
| 659 | .ioctl = ramzswap_ioctl, |
| 660 | .swap_slot_free_notify = ramzswap_slot_free_notify, |
| 661 | .owner = THIS_MODULE |
| 662 | }; |
| 663 | |
| 664 | static int create_device(struct ramzswap *rzs, int device_id) |
| 665 | { |
| 666 | int ret = 0; |
| 667 | |
| 668 | mutex_init(&rzs->lock); |
| 669 | spin_lock_init(&rzs->stat64_lock); |
| 670 | |
| 671 | rzs->queue = blk_alloc_queue(GFP_KERNEL); |
| 672 | if (!rzs->queue) { |
| 673 | pr_err("Error allocating disk queue for device %d\n", |
| 674 | device_id); |
| 675 | ret = -ENOMEM; |
| 676 | goto out; |
| 677 | } |
| 678 | |
| 679 | blk_queue_make_request(rzs->queue, ramzswap_make_request); |
| 680 | rzs->queue->queuedata = rzs; |
| 681 | |
| 682 | /* gendisk structure */ |
| 683 | rzs->disk = alloc_disk(1); |
| 684 | if (!rzs->disk) { |
| 685 | blk_cleanup_queue(rzs->queue); |
| 686 | pr_warning("Error allocating disk structure for device %d\n", |
| 687 | device_id); |
| 688 | ret = -ENOMEM; |
| 689 | goto out; |
| 690 | } |
| 691 | |
| 692 | rzs->disk->major = ramzswap_major; |
| 693 | rzs->disk->first_minor = device_id; |
| 694 | rzs->disk->fops = &ramzswap_devops; |
| 695 | rzs->disk->queue = rzs->queue; |
| 696 | rzs->disk->private_data = rzs; |
| 697 | snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id); |
| 698 | |
| 699 | /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */ |
| 700 | set_capacity(rzs->disk, 0); |
| 701 | |
| 702 | /* |
| 703 | * To ensure that we always get PAGE_SIZE aligned |
| 704 | * and n*PAGE_SIZED sized I/O requests. |
| 705 | */ |
| 706 | blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE); |
| 707 | blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE); |
| 708 | blk_queue_io_min(rzs->disk->queue, PAGE_SIZE); |
| 709 | blk_queue_io_opt(rzs->disk->queue, PAGE_SIZE); |
| 710 | |
| 711 | add_disk(rzs->disk); |
| 712 | |
| 713 | rzs->init_done = 0; |
| 714 | |
| 715 | out: |
| 716 | return ret; |
| 717 | } |
| 718 | |
| 719 | static void destroy_device(struct ramzswap *rzs) |
| 720 | { |
| 721 | if (rzs->disk) { |
| 722 | del_gendisk(rzs->disk); |
| 723 | put_disk(rzs->disk); |
| 724 | } |
| 725 | |
| 726 | if (rzs->queue) |
| 727 | blk_cleanup_queue(rzs->queue); |
| 728 | } |
| 729 | |
| 730 | static int __init ramzswap_init(void) |
| 731 | { |
| 732 | int ret, dev_id; |
| 733 | |
| 734 | if (num_devices > max_num_devices) { |
| 735 | pr_warning("Invalid value for num_devices: %u\n", |
| 736 | num_devices); |
| 737 | ret = -EINVAL; |
| 738 | goto out; |
| 739 | } |
| 740 | |
| 741 | ramzswap_major = register_blkdev(0, "ramzswap"); |
| 742 | if (ramzswap_major <= 0) { |
| 743 | pr_warning("Unable to get major number\n"); |
| 744 | ret = -EBUSY; |
| 745 | goto out; |
| 746 | } |
| 747 | |
| 748 | if (!num_devices) { |
| 749 | pr_info("num_devices not specified. Using default: 1\n"); |
| 750 | num_devices = 1; |
| 751 | } |
| 752 | |
| 753 | /* Allocate the device array and initialize each one */ |
| 754 | pr_info("Creating %u devices ...\n", num_devices); |
| 755 | devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL); |
| 756 | if (!devices) { |
| 757 | ret = -ENOMEM; |
| 758 | goto unregister; |
| 759 | } |
| 760 | |
| 761 | for (dev_id = 0; dev_id < num_devices; dev_id++) { |
| 762 | ret = create_device(&devices[dev_id], dev_id); |
| 763 | if (ret) |
| 764 | goto free_devices; |
| 765 | } |
| 766 | |
| 767 | return 0; |
| 768 | |
| 769 | free_devices: |
| 770 | while (dev_id) |
| 771 | destroy_device(&devices[--dev_id]); |
| 772 | unregister: |
| 773 | unregister_blkdev(ramzswap_major, "ramzswap"); |
| 774 | out: |
| 775 | return ret; |
| 776 | } |
| 777 | |
| 778 | static void __exit ramzswap_exit(void) |
| 779 | { |
| 780 | int i; |
| 781 | struct ramzswap *rzs; |
| 782 | |
| 783 | for (i = 0; i < num_devices; i++) { |
| 784 | rzs = &devices[i]; |
| 785 | |
| 786 | destroy_device(rzs); |
| 787 | if (rzs->init_done) |
| 788 | reset_device(rzs); |
| 789 | } |
| 790 | |
| 791 | unregister_blkdev(ramzswap_major, "ramzswap"); |
| 792 | |
| 793 | kfree(devices); |
| 794 | pr_debug("Cleanup done!\n"); |
| 795 | } |
| 796 | |
| 797 | module_param(num_devices, uint, 0); |
| 798 | MODULE_PARM_DESC(num_devices, "Number of ramzswap devices"); |
| 799 | |
| 800 | module_init(ramzswap_init); |
| 801 | module_exit(ramzswap_exit); |
| 802 | |
| 803 | MODULE_LICENSE("Dual BSD/GPL"); |
| 804 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); |
| 805 | MODULE_DESCRIPTION("Compressed RAM Based Swap Device"); |