Root/
1 | /* |
2 | * mm/percpu-vm.c - vmalloc area based chunk allocation |
3 | * |
4 | * Copyright (C) 2010 SUSE Linux Products GmbH |
5 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> |
6 | * |
7 | * This file is released under the GPLv2. |
8 | * |
9 | * Chunks are mapped into vmalloc areas and populated page by page. |
10 | * This is the default chunk allocator. |
11 | */ |
12 | |
13 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, |
14 | unsigned int cpu, int page_idx) |
15 | { |
16 | /* must not be used on pre-mapped chunk */ |
17 | WARN_ON(chunk->immutable); |
18 | |
19 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); |
20 | } |
21 | |
22 | /** |
23 | * pcpu_get_pages_and_bitmap - get temp pages array and bitmap |
24 | * @chunk: chunk of interest |
25 | * @bitmapp: output parameter for bitmap |
26 | * @may_alloc: may allocate the array |
27 | * |
28 | * Returns pointer to array of pointers to struct page and bitmap, |
29 | * both of which can be indexed with pcpu_page_idx(). The returned |
30 | * array is cleared to zero and *@bitmapp is copied from |
31 | * @chunk->populated. Note that there is only one array and bitmap |
32 | * and access exclusion is the caller's responsibility. |
33 | * |
34 | * CONTEXT: |
35 | * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. |
36 | * Otherwise, don't care. |
37 | * |
38 | * RETURNS: |
39 | * Pointer to temp pages array on success, NULL on failure. |
40 | */ |
41 | static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, |
42 | unsigned long **bitmapp, |
43 | bool may_alloc) |
44 | { |
45 | static struct page **pages; |
46 | static unsigned long *bitmap; |
47 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); |
48 | size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * |
49 | sizeof(unsigned long); |
50 | |
51 | if (!pages || !bitmap) { |
52 | if (may_alloc && !pages) |
53 | pages = pcpu_mem_zalloc(pages_size); |
54 | if (may_alloc && !bitmap) |
55 | bitmap = pcpu_mem_zalloc(bitmap_size); |
56 | if (!pages || !bitmap) |
57 | return NULL; |
58 | } |
59 | |
60 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); |
61 | |
62 | *bitmapp = bitmap; |
63 | return pages; |
64 | } |
65 | |
66 | /** |
67 | * pcpu_free_pages - free pages which were allocated for @chunk |
68 | * @chunk: chunk pages were allocated for |
69 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() |
70 | * @populated: populated bitmap |
71 | * @page_start: page index of the first page to be freed |
72 | * @page_end: page index of the last page to be freed + 1 |
73 | * |
74 | * Free pages [@page_start and @page_end) in @pages for all units. |
75 | * The pages were allocated for @chunk. |
76 | */ |
77 | static void pcpu_free_pages(struct pcpu_chunk *chunk, |
78 | struct page **pages, unsigned long *populated, |
79 | int page_start, int page_end) |
80 | { |
81 | unsigned int cpu; |
82 | int i; |
83 | |
84 | for_each_possible_cpu(cpu) { |
85 | for (i = page_start; i < page_end; i++) { |
86 | struct page *page = pages[pcpu_page_idx(cpu, i)]; |
87 | |
88 | if (page) |
89 | __free_page(page); |
90 | } |
91 | } |
92 | } |
93 | |
94 | /** |
95 | * pcpu_alloc_pages - allocates pages for @chunk |
96 | * @chunk: target chunk |
97 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() |
98 | * @populated: populated bitmap |
99 | * @page_start: page index of the first page to be allocated |
100 | * @page_end: page index of the last page to be allocated + 1 |
101 | * |
102 | * Allocate pages [@page_start,@page_end) into @pages for all units. |
103 | * The allocation is for @chunk. Percpu core doesn't care about the |
104 | * content of @pages and will pass it verbatim to pcpu_map_pages(). |
105 | */ |
106 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, |
107 | struct page **pages, unsigned long *populated, |
108 | int page_start, int page_end) |
109 | { |
110 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; |
111 | unsigned int cpu; |
112 | int i; |
113 | |
114 | for_each_possible_cpu(cpu) { |
115 | for (i = page_start; i < page_end; i++) { |
116 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; |
117 | |
118 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); |
119 | if (!*pagep) { |
120 | pcpu_free_pages(chunk, pages, populated, |
121 | page_start, page_end); |
122 | return -ENOMEM; |
123 | } |
124 | } |
125 | } |
126 | return 0; |
127 | } |
128 | |
129 | /** |
130 | * pcpu_pre_unmap_flush - flush cache prior to unmapping |
131 | * @chunk: chunk the regions to be flushed belongs to |
132 | * @page_start: page index of the first page to be flushed |
133 | * @page_end: page index of the last page to be flushed + 1 |
134 | * |
135 | * Pages in [@page_start,@page_end) of @chunk are about to be |
136 | * unmapped. Flush cache. As each flushing trial can be very |
137 | * expensive, issue flush on the whole region at once rather than |
138 | * doing it for each cpu. This could be an overkill but is more |
139 | * scalable. |
140 | */ |
141 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, |
142 | int page_start, int page_end) |
143 | { |
144 | flush_cache_vunmap( |
145 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
146 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
147 | } |
148 | |
149 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) |
150 | { |
151 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); |
152 | } |
153 | |
154 | /** |
155 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk |
156 | * @chunk: chunk of interest |
157 | * @pages: pages array which can be used to pass information to free |
158 | * @populated: populated bitmap |
159 | * @page_start: page index of the first page to unmap |
160 | * @page_end: page index of the last page to unmap + 1 |
161 | * |
162 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. |
163 | * Corresponding elements in @pages were cleared by the caller and can |
164 | * be used to carry information to pcpu_free_pages() which will be |
165 | * called after all unmaps are finished. The caller should call |
166 | * proper pre/post flush functions. |
167 | */ |
168 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, |
169 | struct page **pages, unsigned long *populated, |
170 | int page_start, int page_end) |
171 | { |
172 | unsigned int cpu; |
173 | int i; |
174 | |
175 | for_each_possible_cpu(cpu) { |
176 | for (i = page_start; i < page_end; i++) { |
177 | struct page *page; |
178 | |
179 | page = pcpu_chunk_page(chunk, cpu, i); |
180 | WARN_ON(!page); |
181 | pages[pcpu_page_idx(cpu, i)] = page; |
182 | } |
183 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
184 | page_end - page_start); |
185 | } |
186 | |
187 | bitmap_clear(populated, page_start, page_end - page_start); |
188 | } |
189 | |
190 | /** |
191 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping |
192 | * @chunk: pcpu_chunk the regions to be flushed belong to |
193 | * @page_start: page index of the first page to be flushed |
194 | * @page_end: page index of the last page to be flushed + 1 |
195 | * |
196 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush |
197 | * TLB for the regions. This can be skipped if the area is to be |
198 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. |
199 | * |
200 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once |
201 | * for the whole region. |
202 | */ |
203 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, |
204 | int page_start, int page_end) |
205 | { |
206 | flush_tlb_kernel_range( |
207 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
208 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
209 | } |
210 | |
211 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
212 | int nr_pages) |
213 | { |
214 | return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, |
215 | PAGE_KERNEL, pages); |
216 | } |
217 | |
218 | /** |
219 | * pcpu_map_pages - map pages into a pcpu_chunk |
220 | * @chunk: chunk of interest |
221 | * @pages: pages array containing pages to be mapped |
222 | * @populated: populated bitmap |
223 | * @page_start: page index of the first page to map |
224 | * @page_end: page index of the last page to map + 1 |
225 | * |
226 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The |
227 | * caller is responsible for calling pcpu_post_map_flush() after all |
228 | * mappings are complete. |
229 | * |
230 | * This function is responsible for setting corresponding bits in |
231 | * @chunk->populated bitmap and whatever is necessary for reverse |
232 | * lookup (addr -> chunk). |
233 | */ |
234 | static int pcpu_map_pages(struct pcpu_chunk *chunk, |
235 | struct page **pages, unsigned long *populated, |
236 | int page_start, int page_end) |
237 | { |
238 | unsigned int cpu, tcpu; |
239 | int i, err; |
240 | |
241 | for_each_possible_cpu(cpu) { |
242 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
243 | &pages[pcpu_page_idx(cpu, page_start)], |
244 | page_end - page_start); |
245 | if (err < 0) |
246 | goto err; |
247 | } |
248 | |
249 | /* mapping successful, link chunk and mark populated */ |
250 | for (i = page_start; i < page_end; i++) { |
251 | for_each_possible_cpu(cpu) |
252 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], |
253 | chunk); |
254 | __set_bit(i, populated); |
255 | } |
256 | |
257 | return 0; |
258 | |
259 | err: |
260 | for_each_possible_cpu(tcpu) { |
261 | if (tcpu == cpu) |
262 | break; |
263 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), |
264 | page_end - page_start); |
265 | } |
266 | return err; |
267 | } |
268 | |
269 | /** |
270 | * pcpu_post_map_flush - flush cache after mapping |
271 | * @chunk: pcpu_chunk the regions to be flushed belong to |
272 | * @page_start: page index of the first page to be flushed |
273 | * @page_end: page index of the last page to be flushed + 1 |
274 | * |
275 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush |
276 | * cache. |
277 | * |
278 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once |
279 | * for the whole region. |
280 | */ |
281 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, |
282 | int page_start, int page_end) |
283 | { |
284 | flush_cache_vmap( |
285 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
286 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
287 | } |
288 | |
289 | /** |
290 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk |
291 | * @chunk: chunk of interest |
292 | * @off: offset to the area to populate |
293 | * @size: size of the area to populate in bytes |
294 | * |
295 | * For each cpu, populate and map pages [@page_start,@page_end) into |
296 | * @chunk. The area is cleared on return. |
297 | * |
298 | * CONTEXT: |
299 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. |
300 | */ |
301 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) |
302 | { |
303 | int page_start = PFN_DOWN(off); |
304 | int page_end = PFN_UP(off + size); |
305 | int free_end = page_start, unmap_end = page_start; |
306 | struct page **pages; |
307 | unsigned long *populated; |
308 | unsigned int cpu; |
309 | int rs, re, rc; |
310 | |
311 | /* quick path, check whether all pages are already there */ |
312 | rs = page_start; |
313 | pcpu_next_pop(chunk, &rs, &re, page_end); |
314 | if (rs == page_start && re == page_end) |
315 | goto clear; |
316 | |
317 | /* need to allocate and map pages, this chunk can't be immutable */ |
318 | WARN_ON(chunk->immutable); |
319 | |
320 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); |
321 | if (!pages) |
322 | return -ENOMEM; |
323 | |
324 | /* alloc and map */ |
325 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { |
326 | rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); |
327 | if (rc) |
328 | goto err_free; |
329 | free_end = re; |
330 | } |
331 | |
332 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { |
333 | rc = pcpu_map_pages(chunk, pages, populated, rs, re); |
334 | if (rc) |
335 | goto err_unmap; |
336 | unmap_end = re; |
337 | } |
338 | pcpu_post_map_flush(chunk, page_start, page_end); |
339 | |
340 | /* commit new bitmap */ |
341 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); |
342 | clear: |
343 | for_each_possible_cpu(cpu) |
344 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); |
345 | return 0; |
346 | |
347 | err_unmap: |
348 | pcpu_pre_unmap_flush(chunk, page_start, unmap_end); |
349 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) |
350 | pcpu_unmap_pages(chunk, pages, populated, rs, re); |
351 | pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); |
352 | err_free: |
353 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) |
354 | pcpu_free_pages(chunk, pages, populated, rs, re); |
355 | return rc; |
356 | } |
357 | |
358 | /** |
359 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk |
360 | * @chunk: chunk to depopulate |
361 | * @off: offset to the area to depopulate |
362 | * @size: size of the area to depopulate in bytes |
363 | * |
364 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) |
365 | * from @chunk. If @flush is true, vcache is flushed before unmapping |
366 | * and tlb after. |
367 | * |
368 | * CONTEXT: |
369 | * pcpu_alloc_mutex. |
370 | */ |
371 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) |
372 | { |
373 | int page_start = PFN_DOWN(off); |
374 | int page_end = PFN_UP(off + size); |
375 | struct page **pages; |
376 | unsigned long *populated; |
377 | int rs, re; |
378 | |
379 | /* quick path, check whether it's empty already */ |
380 | rs = page_start; |
381 | pcpu_next_unpop(chunk, &rs, &re, page_end); |
382 | if (rs == page_start && re == page_end) |
383 | return; |
384 | |
385 | /* immutable chunks can't be depopulated */ |
386 | WARN_ON(chunk->immutable); |
387 | |
388 | /* |
389 | * If control reaches here, there must have been at least one |
390 | * successful population attempt so the temp pages array must |
391 | * be available now. |
392 | */ |
393 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); |
394 | BUG_ON(!pages); |
395 | |
396 | /* unmap and free */ |
397 | pcpu_pre_unmap_flush(chunk, page_start, page_end); |
398 | |
399 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) |
400 | pcpu_unmap_pages(chunk, pages, populated, rs, re); |
401 | |
402 | /* no need to flush tlb, vmalloc will handle it lazily */ |
403 | |
404 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) |
405 | pcpu_free_pages(chunk, pages, populated, rs, re); |
406 | |
407 | /* commit new bitmap */ |
408 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); |
409 | } |
410 | |
411 | static struct pcpu_chunk *pcpu_create_chunk(void) |
412 | { |
413 | struct pcpu_chunk *chunk; |
414 | struct vm_struct **vms; |
415 | |
416 | chunk = pcpu_alloc_chunk(); |
417 | if (!chunk) |
418 | return NULL; |
419 | |
420 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, |
421 | pcpu_nr_groups, pcpu_atom_size); |
422 | if (!vms) { |
423 | pcpu_free_chunk(chunk); |
424 | return NULL; |
425 | } |
426 | |
427 | chunk->data = vms; |
428 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; |
429 | return chunk; |
430 | } |
431 | |
432 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) |
433 | { |
434 | if (chunk && chunk->data) |
435 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); |
436 | pcpu_free_chunk(chunk); |
437 | } |
438 | |
439 | static struct page *pcpu_addr_to_page(void *addr) |
440 | { |
441 | return vmalloc_to_page(addr); |
442 | } |
443 | |
444 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) |
445 | { |
446 | /* no extra restriction */ |
447 | return 0; |
448 | } |
449 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9