Root/
1 | /* |
2 | * Routines having to do with the 'struct sk_buff' memory handlers. |
3 | * |
4 | * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> |
5 | * Florian La Roche <rzsfl@rz.uni-sb.de> |
6 | * |
7 | * Fixes: |
8 | * Alan Cox : Fixed the worst of the load |
9 | * balancer bugs. |
10 | * Dave Platt : Interrupt stacking fix. |
11 | * Richard Kooijman : Timestamp fixes. |
12 | * Alan Cox : Changed buffer format. |
13 | * Alan Cox : destructor hook for AF_UNIX etc. |
14 | * Linus Torvalds : Better skb_clone. |
15 | * Alan Cox : Added skb_copy. |
16 | * Alan Cox : Added all the changed routines Linus |
17 | * only put in the headers |
18 | * Ray VanTassle : Fixed --skb->lock in free |
19 | * Alan Cox : skb_copy copy arp field |
20 | * Andi Kleen : slabified it. |
21 | * Robert Olsson : Removed skb_head_pool |
22 | * |
23 | * NOTE: |
24 | * The __skb_ routines should be called with interrupts |
25 | * disabled, or you better be *real* sure that the operation is atomic |
26 | * with respect to whatever list is being frobbed (e.g. via lock_sock() |
27 | * or via disabling bottom half handlers, etc). |
28 | * |
29 | * This program is free software; you can redistribute it and/or |
30 | * modify it under the terms of the GNU General Public License |
31 | * as published by the Free Software Foundation; either version |
32 | * 2 of the License, or (at your option) any later version. |
33 | */ |
34 | |
35 | /* |
36 | * The functions in this file will not compile correctly with gcc 2.4.x |
37 | */ |
38 | |
39 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
40 | |
41 | #include <linux/module.h> |
42 | #include <linux/types.h> |
43 | #include <linux/kernel.h> |
44 | #include <linux/kmemcheck.h> |
45 | #include <linux/mm.h> |
46 | #include <linux/interrupt.h> |
47 | #include <linux/in.h> |
48 | #include <linux/inet.h> |
49 | #include <linux/slab.h> |
50 | #include <linux/tcp.h> |
51 | #include <linux/udp.h> |
52 | #include <linux/netdevice.h> |
53 | #ifdef CONFIG_NET_CLS_ACT |
54 | #include <net/pkt_sched.h> |
55 | #endif |
56 | #include <linux/string.h> |
57 | #include <linux/skbuff.h> |
58 | #include <linux/splice.h> |
59 | #include <linux/cache.h> |
60 | #include <linux/rtnetlink.h> |
61 | #include <linux/init.h> |
62 | #include <linux/scatterlist.h> |
63 | #include <linux/errqueue.h> |
64 | #include <linux/prefetch.h> |
65 | |
66 | #include <net/protocol.h> |
67 | #include <net/dst.h> |
68 | #include <net/sock.h> |
69 | #include <net/checksum.h> |
70 | #include <net/ip6_checksum.h> |
71 | #include <net/xfrm.h> |
72 | |
73 | #include <asm/uaccess.h> |
74 | #include <trace/events/skb.h> |
75 | #include <linux/highmem.h> |
76 | |
77 | struct kmem_cache *skbuff_head_cache __read_mostly; |
78 | static struct kmem_cache *skbuff_fclone_cache __read_mostly; |
79 | |
80 | /** |
81 | * skb_panic - private function for out-of-line support |
82 | * @skb: buffer |
83 | * @sz: size |
84 | * @addr: address |
85 | * @msg: skb_over_panic or skb_under_panic |
86 | * |
87 | * Out-of-line support for skb_put() and skb_push(). |
88 | * Called via the wrapper skb_over_panic() or skb_under_panic(). |
89 | * Keep out of line to prevent kernel bloat. |
90 | * __builtin_return_address is not used because it is not always reliable. |
91 | */ |
92 | static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, |
93 | const char msg[]) |
94 | { |
95 | pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", |
96 | msg, addr, skb->len, sz, skb->head, skb->data, |
97 | (unsigned long)skb->tail, (unsigned long)skb->end, |
98 | skb->dev ? skb->dev->name : "<NULL>"); |
99 | BUG(); |
100 | } |
101 | |
102 | static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) |
103 | { |
104 | skb_panic(skb, sz, addr, __func__); |
105 | } |
106 | |
107 | static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) |
108 | { |
109 | skb_panic(skb, sz, addr, __func__); |
110 | } |
111 | |
112 | /* |
113 | * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells |
114 | * the caller if emergency pfmemalloc reserves are being used. If it is and |
115 | * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves |
116 | * may be used. Otherwise, the packet data may be discarded until enough |
117 | * memory is free |
118 | */ |
119 | #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ |
120 | __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) |
121 | |
122 | static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, |
123 | unsigned long ip, bool *pfmemalloc) |
124 | { |
125 | void *obj; |
126 | bool ret_pfmemalloc = false; |
127 | |
128 | /* |
129 | * Try a regular allocation, when that fails and we're not entitled |
130 | * to the reserves, fail. |
131 | */ |
132 | obj = kmalloc_node_track_caller(size, |
133 | flags | __GFP_NOMEMALLOC | __GFP_NOWARN, |
134 | node); |
135 | if (obj || !(gfp_pfmemalloc_allowed(flags))) |
136 | goto out; |
137 | |
138 | /* Try again but now we are using pfmemalloc reserves */ |
139 | ret_pfmemalloc = true; |
140 | obj = kmalloc_node_track_caller(size, flags, node); |
141 | |
142 | out: |
143 | if (pfmemalloc) |
144 | *pfmemalloc = ret_pfmemalloc; |
145 | |
146 | return obj; |
147 | } |
148 | |
149 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
150 | * 'private' fields and also do memory statistics to find all the |
151 | * [BEEP] leaks. |
152 | * |
153 | */ |
154 | |
155 | struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) |
156 | { |
157 | struct sk_buff *skb; |
158 | |
159 | /* Get the HEAD */ |
160 | skb = kmem_cache_alloc_node(skbuff_head_cache, |
161 | gfp_mask & ~__GFP_DMA, node); |
162 | if (!skb) |
163 | goto out; |
164 | |
165 | /* |
166 | * Only clear those fields we need to clear, not those that we will |
167 | * actually initialise below. Hence, don't put any more fields after |
168 | * the tail pointer in struct sk_buff! |
169 | */ |
170 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
171 | skb->head = NULL; |
172 | skb->truesize = sizeof(struct sk_buff); |
173 | atomic_set(&skb->users, 1); |
174 | |
175 | skb->mac_header = (typeof(skb->mac_header))~0U; |
176 | out: |
177 | return skb; |
178 | } |
179 | |
180 | /** |
181 | * __alloc_skb - allocate a network buffer |
182 | * @size: size to allocate |
183 | * @gfp_mask: allocation mask |
184 | * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache |
185 | * instead of head cache and allocate a cloned (child) skb. |
186 | * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for |
187 | * allocations in case the data is required for writeback |
188 | * @node: numa node to allocate memory on |
189 | * |
190 | * Allocate a new &sk_buff. The returned buffer has no headroom and a |
191 | * tail room of at least size bytes. The object has a reference count |
192 | * of one. The return is the buffer. On a failure the return is %NULL. |
193 | * |
194 | * Buffers may only be allocated from interrupts using a @gfp_mask of |
195 | * %GFP_ATOMIC. |
196 | */ |
197 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, |
198 | int flags, int node) |
199 | { |
200 | struct kmem_cache *cache; |
201 | struct skb_shared_info *shinfo; |
202 | struct sk_buff *skb; |
203 | u8 *data; |
204 | bool pfmemalloc; |
205 | |
206 | cache = (flags & SKB_ALLOC_FCLONE) |
207 | ? skbuff_fclone_cache : skbuff_head_cache; |
208 | |
209 | if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) |
210 | gfp_mask |= __GFP_MEMALLOC; |
211 | |
212 | /* Get the HEAD */ |
213 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
214 | if (!skb) |
215 | goto out; |
216 | prefetchw(skb); |
217 | |
218 | /* We do our best to align skb_shared_info on a separate cache |
219 | * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives |
220 | * aligned memory blocks, unless SLUB/SLAB debug is enabled. |
221 | * Both skb->head and skb_shared_info are cache line aligned. |
222 | */ |
223 | size = SKB_DATA_ALIGN(size); |
224 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
225 | data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); |
226 | if (!data) |
227 | goto nodata; |
228 | /* kmalloc(size) might give us more room than requested. |
229 | * Put skb_shared_info exactly at the end of allocated zone, |
230 | * to allow max possible filling before reallocation. |
231 | */ |
232 | size = SKB_WITH_OVERHEAD(ksize(data)); |
233 | prefetchw(data + size); |
234 | |
235 | /* |
236 | * Only clear those fields we need to clear, not those that we will |
237 | * actually initialise below. Hence, don't put any more fields after |
238 | * the tail pointer in struct sk_buff! |
239 | */ |
240 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
241 | /* Account for allocated memory : skb + skb->head */ |
242 | skb->truesize = SKB_TRUESIZE(size); |
243 | skb->pfmemalloc = pfmemalloc; |
244 | atomic_set(&skb->users, 1); |
245 | skb->head = data; |
246 | skb->data = data; |
247 | skb_reset_tail_pointer(skb); |
248 | skb->end = skb->tail + size; |
249 | skb->mac_header = (typeof(skb->mac_header))~0U; |
250 | skb->transport_header = (typeof(skb->transport_header))~0U; |
251 | |
252 | /* make sure we initialize shinfo sequentially */ |
253 | shinfo = skb_shinfo(skb); |
254 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); |
255 | atomic_set(&shinfo->dataref, 1); |
256 | kmemcheck_annotate_variable(shinfo->destructor_arg); |
257 | |
258 | if (flags & SKB_ALLOC_FCLONE) { |
259 | struct sk_buff *child = skb + 1; |
260 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
261 | |
262 | kmemcheck_annotate_bitfield(child, flags1); |
263 | kmemcheck_annotate_bitfield(child, flags2); |
264 | skb->fclone = SKB_FCLONE_ORIG; |
265 | atomic_set(fclone_ref, 1); |
266 | |
267 | child->fclone = SKB_FCLONE_UNAVAILABLE; |
268 | child->pfmemalloc = pfmemalloc; |
269 | } |
270 | out: |
271 | return skb; |
272 | nodata: |
273 | kmem_cache_free(cache, skb); |
274 | skb = NULL; |
275 | goto out; |
276 | } |
277 | EXPORT_SYMBOL(__alloc_skb); |
278 | |
279 | /** |
280 | * build_skb - build a network buffer |
281 | * @data: data buffer provided by caller |
282 | * @frag_size: size of fragment, or 0 if head was kmalloced |
283 | * |
284 | * Allocate a new &sk_buff. Caller provides space holding head and |
285 | * skb_shared_info. @data must have been allocated by kmalloc() only if |
286 | * @frag_size is 0, otherwise data should come from the page allocator. |
287 | * The return is the new skb buffer. |
288 | * On a failure the return is %NULL, and @data is not freed. |
289 | * Notes : |
290 | * Before IO, driver allocates only data buffer where NIC put incoming frame |
291 | * Driver should add room at head (NET_SKB_PAD) and |
292 | * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) |
293 | * After IO, driver calls build_skb(), to allocate sk_buff and populate it |
294 | * before giving packet to stack. |
295 | * RX rings only contains data buffers, not full skbs. |
296 | */ |
297 | struct sk_buff *build_skb(void *data, unsigned int frag_size) |
298 | { |
299 | struct skb_shared_info *shinfo; |
300 | struct sk_buff *skb; |
301 | unsigned int size = frag_size ? : ksize(data); |
302 | |
303 | skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); |
304 | if (!skb) |
305 | return NULL; |
306 | |
307 | size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
308 | |
309 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
310 | skb->truesize = SKB_TRUESIZE(size); |
311 | skb->head_frag = frag_size != 0; |
312 | atomic_set(&skb->users, 1); |
313 | skb->head = data; |
314 | skb->data = data; |
315 | skb_reset_tail_pointer(skb); |
316 | skb->end = skb->tail + size; |
317 | skb->mac_header = (typeof(skb->mac_header))~0U; |
318 | skb->transport_header = (typeof(skb->transport_header))~0U; |
319 | |
320 | /* make sure we initialize shinfo sequentially */ |
321 | shinfo = skb_shinfo(skb); |
322 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); |
323 | atomic_set(&shinfo->dataref, 1); |
324 | kmemcheck_annotate_variable(shinfo->destructor_arg); |
325 | |
326 | return skb; |
327 | } |
328 | EXPORT_SYMBOL(build_skb); |
329 | |
330 | struct netdev_alloc_cache { |
331 | struct page_frag frag; |
332 | /* we maintain a pagecount bias, so that we dont dirty cache line |
333 | * containing page->_count every time we allocate a fragment. |
334 | */ |
335 | unsigned int pagecnt_bias; |
336 | }; |
337 | static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); |
338 | |
339 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
340 | { |
341 | struct netdev_alloc_cache *nc; |
342 | void *data = NULL; |
343 | int order; |
344 | unsigned long flags; |
345 | |
346 | local_irq_save(flags); |
347 | nc = &__get_cpu_var(netdev_alloc_cache); |
348 | if (unlikely(!nc->frag.page)) { |
349 | refill: |
350 | for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { |
351 | gfp_t gfp = gfp_mask; |
352 | |
353 | if (order) |
354 | gfp |= __GFP_COMP | __GFP_NOWARN; |
355 | nc->frag.page = alloc_pages(gfp, order); |
356 | if (likely(nc->frag.page)) |
357 | break; |
358 | if (--order < 0) |
359 | goto end; |
360 | } |
361 | nc->frag.size = PAGE_SIZE << order; |
362 | recycle: |
363 | atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); |
364 | nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; |
365 | nc->frag.offset = 0; |
366 | } |
367 | |
368 | if (nc->frag.offset + fragsz > nc->frag.size) { |
369 | /* avoid unnecessary locked operations if possible */ |
370 | if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || |
371 | atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) |
372 | goto recycle; |
373 | goto refill; |
374 | } |
375 | |
376 | data = page_address(nc->frag.page) + nc->frag.offset; |
377 | nc->frag.offset += fragsz; |
378 | nc->pagecnt_bias--; |
379 | end: |
380 | local_irq_restore(flags); |
381 | return data; |
382 | } |
383 | |
384 | /** |
385 | * netdev_alloc_frag - allocate a page fragment |
386 | * @fragsz: fragment size |
387 | * |
388 | * Allocates a frag from a page for receive buffer. |
389 | * Uses GFP_ATOMIC allocations. |
390 | */ |
391 | void *netdev_alloc_frag(unsigned int fragsz) |
392 | { |
393 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); |
394 | } |
395 | EXPORT_SYMBOL(netdev_alloc_frag); |
396 | |
397 | /** |
398 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device |
399 | * @dev: network device to receive on |
400 | * @length: length to allocate |
401 | * @gfp_mask: get_free_pages mask, passed to alloc_skb |
402 | * |
403 | * Allocate a new &sk_buff and assign it a usage count of one. The |
404 | * buffer has unspecified headroom built in. Users should allocate |
405 | * the headroom they think they need without accounting for the |
406 | * built in space. The built in space is used for optimisations. |
407 | * |
408 | * %NULL is returned if there is no free memory. |
409 | */ |
410 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, |
411 | unsigned int length, gfp_t gfp_mask) |
412 | { |
413 | struct sk_buff *skb = NULL; |
414 | unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + |
415 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
416 | |
417 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
418 | void *data; |
419 | |
420 | if (sk_memalloc_socks()) |
421 | gfp_mask |= __GFP_MEMALLOC; |
422 | |
423 | data = __netdev_alloc_frag(fragsz, gfp_mask); |
424 | |
425 | if (likely(data)) { |
426 | skb = build_skb(data, fragsz); |
427 | if (unlikely(!skb)) |
428 | put_page(virt_to_head_page(data)); |
429 | } |
430 | } else { |
431 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, |
432 | SKB_ALLOC_RX, NUMA_NO_NODE); |
433 | } |
434 | if (likely(skb)) { |
435 | skb_reserve(skb, NET_SKB_PAD); |
436 | skb->dev = dev; |
437 | } |
438 | return skb; |
439 | } |
440 | EXPORT_SYMBOL(__netdev_alloc_skb); |
441 | |
442 | void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, |
443 | int size, unsigned int truesize) |
444 | { |
445 | skb_fill_page_desc(skb, i, page, off, size); |
446 | skb->len += size; |
447 | skb->data_len += size; |
448 | skb->truesize += truesize; |
449 | } |
450 | EXPORT_SYMBOL(skb_add_rx_frag); |
451 | |
452 | void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, |
453 | unsigned int truesize) |
454 | { |
455 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
456 | |
457 | skb_frag_size_add(frag, size); |
458 | skb->len += size; |
459 | skb->data_len += size; |
460 | skb->truesize += truesize; |
461 | } |
462 | EXPORT_SYMBOL(skb_coalesce_rx_frag); |
463 | |
464 | static void skb_drop_list(struct sk_buff **listp) |
465 | { |
466 | kfree_skb_list(*listp); |
467 | *listp = NULL; |
468 | } |
469 | |
470 | static inline void skb_drop_fraglist(struct sk_buff *skb) |
471 | { |
472 | skb_drop_list(&skb_shinfo(skb)->frag_list); |
473 | } |
474 | |
475 | static void skb_clone_fraglist(struct sk_buff *skb) |
476 | { |
477 | struct sk_buff *list; |
478 | |
479 | skb_walk_frags(skb, list) |
480 | skb_get(list); |
481 | } |
482 | |
483 | static void skb_free_head(struct sk_buff *skb) |
484 | { |
485 | if (skb->head_frag) |
486 | put_page(virt_to_head_page(skb->head)); |
487 | else |
488 | kfree(skb->head); |
489 | } |
490 | |
491 | static void skb_release_data(struct sk_buff *skb) |
492 | { |
493 | if (!skb->cloned || |
494 | !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, |
495 | &skb_shinfo(skb)->dataref)) { |
496 | if (skb_shinfo(skb)->nr_frags) { |
497 | int i; |
498 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) |
499 | skb_frag_unref(skb, i); |
500 | } |
501 | |
502 | /* |
503 | * If skb buf is from userspace, we need to notify the caller |
504 | * the lower device DMA has done; |
505 | */ |
506 | if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { |
507 | struct ubuf_info *uarg; |
508 | |
509 | uarg = skb_shinfo(skb)->destructor_arg; |
510 | if (uarg->callback) |
511 | uarg->callback(uarg, true); |
512 | } |
513 | |
514 | if (skb_has_frag_list(skb)) |
515 | skb_drop_fraglist(skb); |
516 | |
517 | skb_free_head(skb); |
518 | } |
519 | } |
520 | |
521 | /* |
522 | * Free an skbuff by memory without cleaning the state. |
523 | */ |
524 | static void kfree_skbmem(struct sk_buff *skb) |
525 | { |
526 | struct sk_buff *other; |
527 | atomic_t *fclone_ref; |
528 | |
529 | switch (skb->fclone) { |
530 | case SKB_FCLONE_UNAVAILABLE: |
531 | kmem_cache_free(skbuff_head_cache, skb); |
532 | break; |
533 | |
534 | case SKB_FCLONE_ORIG: |
535 | fclone_ref = (atomic_t *) (skb + 2); |
536 | if (atomic_dec_and_test(fclone_ref)) |
537 | kmem_cache_free(skbuff_fclone_cache, skb); |
538 | break; |
539 | |
540 | case SKB_FCLONE_CLONE: |
541 | fclone_ref = (atomic_t *) (skb + 1); |
542 | other = skb - 1; |
543 | |
544 | /* The clone portion is available for |
545 | * fast-cloning again. |
546 | */ |
547 | skb->fclone = SKB_FCLONE_UNAVAILABLE; |
548 | |
549 | if (atomic_dec_and_test(fclone_ref)) |
550 | kmem_cache_free(skbuff_fclone_cache, other); |
551 | break; |
552 | } |
553 | } |
554 | |
555 | static void skb_release_head_state(struct sk_buff *skb) |
556 | { |
557 | skb_dst_drop(skb); |
558 | #ifdef CONFIG_XFRM |
559 | secpath_put(skb->sp); |
560 | #endif |
561 | if (skb->destructor) { |
562 | WARN_ON(in_irq()); |
563 | skb->destructor(skb); |
564 | } |
565 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
566 | nf_conntrack_put(skb->nfct); |
567 | #endif |
568 | #ifdef CONFIG_BRIDGE_NETFILTER |
569 | nf_bridge_put(skb->nf_bridge); |
570 | #endif |
571 | /* XXX: IS this still necessary? - JHS */ |
572 | #ifdef CONFIG_NET_SCHED |
573 | skb->tc_index = 0; |
574 | #ifdef CONFIG_NET_CLS_ACT |
575 | skb->tc_verd = 0; |
576 | #endif |
577 | #endif |
578 | } |
579 | |
580 | /* Free everything but the sk_buff shell. */ |
581 | static void skb_release_all(struct sk_buff *skb) |
582 | { |
583 | skb_release_head_state(skb); |
584 | if (likely(skb->head)) |
585 | skb_release_data(skb); |
586 | } |
587 | |
588 | /** |
589 | * __kfree_skb - private function |
590 | * @skb: buffer |
591 | * |
592 | * Free an sk_buff. Release anything attached to the buffer. |
593 | * Clean the state. This is an internal helper function. Users should |
594 | * always call kfree_skb |
595 | */ |
596 | |
597 | void __kfree_skb(struct sk_buff *skb) |
598 | { |
599 | skb_release_all(skb); |
600 | kfree_skbmem(skb); |
601 | } |
602 | EXPORT_SYMBOL(__kfree_skb); |
603 | |
604 | /** |
605 | * kfree_skb - free an sk_buff |
606 | * @skb: buffer to free |
607 | * |
608 | * Drop a reference to the buffer and free it if the usage count has |
609 | * hit zero. |
610 | */ |
611 | void kfree_skb(struct sk_buff *skb) |
612 | { |
613 | if (unlikely(!skb)) |
614 | return; |
615 | if (likely(atomic_read(&skb->users) == 1)) |
616 | smp_rmb(); |
617 | else if (likely(!atomic_dec_and_test(&skb->users))) |
618 | return; |
619 | trace_kfree_skb(skb, __builtin_return_address(0)); |
620 | __kfree_skb(skb); |
621 | } |
622 | EXPORT_SYMBOL(kfree_skb); |
623 | |
624 | void kfree_skb_list(struct sk_buff *segs) |
625 | { |
626 | while (segs) { |
627 | struct sk_buff *next = segs->next; |
628 | |
629 | kfree_skb(segs); |
630 | segs = next; |
631 | } |
632 | } |
633 | EXPORT_SYMBOL(kfree_skb_list); |
634 | |
635 | /** |
636 | * skb_tx_error - report an sk_buff xmit error |
637 | * @skb: buffer that triggered an error |
638 | * |
639 | * Report xmit error if a device callback is tracking this skb. |
640 | * skb must be freed afterwards. |
641 | */ |
642 | void skb_tx_error(struct sk_buff *skb) |
643 | { |
644 | if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { |
645 | struct ubuf_info *uarg; |
646 | |
647 | uarg = skb_shinfo(skb)->destructor_arg; |
648 | if (uarg->callback) |
649 | uarg->callback(uarg, false); |
650 | skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; |
651 | } |
652 | } |
653 | EXPORT_SYMBOL(skb_tx_error); |
654 | |
655 | /** |
656 | * consume_skb - free an skbuff |
657 | * @skb: buffer to free |
658 | * |
659 | * Drop a ref to the buffer and free it if the usage count has hit zero |
660 | * Functions identically to kfree_skb, but kfree_skb assumes that the frame |
661 | * is being dropped after a failure and notes that |
662 | */ |
663 | void consume_skb(struct sk_buff *skb) |
664 | { |
665 | if (unlikely(!skb)) |
666 | return; |
667 | if (likely(atomic_read(&skb->users) == 1)) |
668 | smp_rmb(); |
669 | else if (likely(!atomic_dec_and_test(&skb->users))) |
670 | return; |
671 | trace_consume_skb(skb); |
672 | __kfree_skb(skb); |
673 | } |
674 | EXPORT_SYMBOL(consume_skb); |
675 | |
676 | static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) |
677 | { |
678 | new->tstamp = old->tstamp; |
679 | new->dev = old->dev; |
680 | new->transport_header = old->transport_header; |
681 | new->network_header = old->network_header; |
682 | new->mac_header = old->mac_header; |
683 | new->inner_protocol = old->inner_protocol; |
684 | new->inner_transport_header = old->inner_transport_header; |
685 | new->inner_network_header = old->inner_network_header; |
686 | new->inner_mac_header = old->inner_mac_header; |
687 | skb_dst_copy(new, old); |
688 | skb_copy_hash(new, old); |
689 | new->ooo_okay = old->ooo_okay; |
690 | new->no_fcs = old->no_fcs; |
691 | new->encapsulation = old->encapsulation; |
692 | #ifdef CONFIG_XFRM |
693 | new->sp = secpath_get(old->sp); |
694 | #endif |
695 | memcpy(new->cb, old->cb, sizeof(old->cb)); |
696 | new->csum = old->csum; |
697 | new->local_df = old->local_df; |
698 | new->pkt_type = old->pkt_type; |
699 | new->ip_summed = old->ip_summed; |
700 | skb_copy_queue_mapping(new, old); |
701 | new->priority = old->priority; |
702 | #if IS_ENABLED(CONFIG_IP_VS) |
703 | new->ipvs_property = old->ipvs_property; |
704 | #endif |
705 | new->pfmemalloc = old->pfmemalloc; |
706 | new->protocol = old->protocol; |
707 | new->mark = old->mark; |
708 | new->skb_iif = old->skb_iif; |
709 | __nf_copy(new, old); |
710 | #ifdef CONFIG_NET_SCHED |
711 | new->tc_index = old->tc_index; |
712 | #ifdef CONFIG_NET_CLS_ACT |
713 | new->tc_verd = old->tc_verd; |
714 | #endif |
715 | #endif |
716 | new->vlan_proto = old->vlan_proto; |
717 | new->vlan_tci = old->vlan_tci; |
718 | |
719 | skb_copy_secmark(new, old); |
720 | |
721 | #ifdef CONFIG_NET_RX_BUSY_POLL |
722 | new->napi_id = old->napi_id; |
723 | #endif |
724 | } |
725 | |
726 | /* |
727 | * You should not add any new code to this function. Add it to |
728 | * __copy_skb_header above instead. |
729 | */ |
730 | static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) |
731 | { |
732 | #define C(x) n->x = skb->x |
733 | |
734 | n->next = n->prev = NULL; |
735 | n->sk = NULL; |
736 | __copy_skb_header(n, skb); |
737 | |
738 | C(len); |
739 | C(data_len); |
740 | C(mac_len); |
741 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; |
742 | n->cloned = 1; |
743 | n->nohdr = 0; |
744 | n->destructor = NULL; |
745 | C(tail); |
746 | C(end); |
747 | C(head); |
748 | C(head_frag); |
749 | C(data); |
750 | C(truesize); |
751 | atomic_set(&n->users, 1); |
752 | |
753 | atomic_inc(&(skb_shinfo(skb)->dataref)); |
754 | skb->cloned = 1; |
755 | |
756 | return n; |
757 | #undef C |
758 | } |
759 | |
760 | /** |
761 | * skb_morph - morph one skb into another |
762 | * @dst: the skb to receive the contents |
763 | * @src: the skb to supply the contents |
764 | * |
765 | * This is identical to skb_clone except that the target skb is |
766 | * supplied by the user. |
767 | * |
768 | * The target skb is returned upon exit. |
769 | */ |
770 | struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) |
771 | { |
772 | skb_release_all(dst); |
773 | return __skb_clone(dst, src); |
774 | } |
775 | EXPORT_SYMBOL_GPL(skb_morph); |
776 | |
777 | /** |
778 | * skb_copy_ubufs - copy userspace skb frags buffers to kernel |
779 | * @skb: the skb to modify |
780 | * @gfp_mask: allocation priority |
781 | * |
782 | * This must be called on SKBTX_DEV_ZEROCOPY skb. |
783 | * It will copy all frags into kernel and drop the reference |
784 | * to userspace pages. |
785 | * |
786 | * If this function is called from an interrupt gfp_mask() must be |
787 | * %GFP_ATOMIC. |
788 | * |
789 | * Returns 0 on success or a negative error code on failure |
790 | * to allocate kernel memory to copy to. |
791 | */ |
792 | int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) |
793 | { |
794 | int i; |
795 | int num_frags = skb_shinfo(skb)->nr_frags; |
796 | struct page *page, *head = NULL; |
797 | struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; |
798 | |
799 | for (i = 0; i < num_frags; i++) { |
800 | u8 *vaddr; |
801 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; |
802 | |
803 | page = alloc_page(gfp_mask); |
804 | if (!page) { |
805 | while (head) { |
806 | struct page *next = (struct page *)page_private(head); |
807 | put_page(head); |
808 | head = next; |
809 | } |
810 | return -ENOMEM; |
811 | } |
812 | vaddr = kmap_atomic(skb_frag_page(f)); |
813 | memcpy(page_address(page), |
814 | vaddr + f->page_offset, skb_frag_size(f)); |
815 | kunmap_atomic(vaddr); |
816 | set_page_private(page, (unsigned long)head); |
817 | head = page; |
818 | } |
819 | |
820 | /* skb frags release userspace buffers */ |
821 | for (i = 0; i < num_frags; i++) |
822 | skb_frag_unref(skb, i); |
823 | |
824 | uarg->callback(uarg, false); |
825 | |
826 | /* skb frags point to kernel buffers */ |
827 | for (i = num_frags - 1; i >= 0; i--) { |
828 | __skb_fill_page_desc(skb, i, head, 0, |
829 | skb_shinfo(skb)->frags[i].size); |
830 | head = (struct page *)page_private(head); |
831 | } |
832 | |
833 | skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; |
834 | return 0; |
835 | } |
836 | EXPORT_SYMBOL_GPL(skb_copy_ubufs); |
837 | |
838 | /** |
839 | * skb_clone - duplicate an sk_buff |
840 | * @skb: buffer to clone |
841 | * @gfp_mask: allocation priority |
842 | * |
843 | * Duplicate an &sk_buff. The new one is not owned by a socket. Both |
844 | * copies share the same packet data but not structure. The new |
845 | * buffer has a reference count of 1. If the allocation fails the |
846 | * function returns %NULL otherwise the new buffer is returned. |
847 | * |
848 | * If this function is called from an interrupt gfp_mask() must be |
849 | * %GFP_ATOMIC. |
850 | */ |
851 | |
852 | struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) |
853 | { |
854 | struct sk_buff *n; |
855 | |
856 | if (skb_orphan_frags(skb, gfp_mask)) |
857 | return NULL; |
858 | |
859 | n = skb + 1; |
860 | if (skb->fclone == SKB_FCLONE_ORIG && |
861 | n->fclone == SKB_FCLONE_UNAVAILABLE) { |
862 | atomic_t *fclone_ref = (atomic_t *) (n + 1); |
863 | n->fclone = SKB_FCLONE_CLONE; |
864 | atomic_inc(fclone_ref); |
865 | } else { |
866 | if (skb_pfmemalloc(skb)) |
867 | gfp_mask |= __GFP_MEMALLOC; |
868 | |
869 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
870 | if (!n) |
871 | return NULL; |
872 | |
873 | kmemcheck_annotate_bitfield(n, flags1); |
874 | kmemcheck_annotate_bitfield(n, flags2); |
875 | n->fclone = SKB_FCLONE_UNAVAILABLE; |
876 | } |
877 | |
878 | return __skb_clone(n, skb); |
879 | } |
880 | EXPORT_SYMBOL(skb_clone); |
881 | |
882 | static void skb_headers_offset_update(struct sk_buff *skb, int off) |
883 | { |
884 | /* Only adjust this if it actually is csum_start rather than csum */ |
885 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
886 | skb->csum_start += off; |
887 | /* {transport,network,mac}_header and tail are relative to skb->head */ |
888 | skb->transport_header += off; |
889 | skb->network_header += off; |
890 | if (skb_mac_header_was_set(skb)) |
891 | skb->mac_header += off; |
892 | skb->inner_transport_header += off; |
893 | skb->inner_network_header += off; |
894 | skb->inner_mac_header += off; |
895 | } |
896 | |
897 | static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) |
898 | { |
899 | __copy_skb_header(new, old); |
900 | |
901 | skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; |
902 | skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; |
903 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; |
904 | } |
905 | |
906 | static inline int skb_alloc_rx_flag(const struct sk_buff *skb) |
907 | { |
908 | if (skb_pfmemalloc(skb)) |
909 | return SKB_ALLOC_RX; |
910 | return 0; |
911 | } |
912 | |
913 | /** |
914 | * skb_copy - create private copy of an sk_buff |
915 | * @skb: buffer to copy |
916 | * @gfp_mask: allocation priority |
917 | * |
918 | * Make a copy of both an &sk_buff and its data. This is used when the |
919 | * caller wishes to modify the data and needs a private copy of the |
920 | * data to alter. Returns %NULL on failure or the pointer to the buffer |
921 | * on success. The returned buffer has a reference count of 1. |
922 | * |
923 | * As by-product this function converts non-linear &sk_buff to linear |
924 | * one, so that &sk_buff becomes completely private and caller is allowed |
925 | * to modify all the data of returned buffer. This means that this |
926 | * function is not recommended for use in circumstances when only |
927 | * header is going to be modified. Use pskb_copy() instead. |
928 | */ |
929 | |
930 | struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) |
931 | { |
932 | int headerlen = skb_headroom(skb); |
933 | unsigned int size = skb_end_offset(skb) + skb->data_len; |
934 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
935 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); |
936 | |
937 | if (!n) |
938 | return NULL; |
939 | |
940 | /* Set the data pointer */ |
941 | skb_reserve(n, headerlen); |
942 | /* Set the tail pointer and length */ |
943 | skb_put(n, skb->len); |
944 | |
945 | if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) |
946 | BUG(); |
947 | |
948 | copy_skb_header(n, skb); |
949 | return n; |
950 | } |
951 | EXPORT_SYMBOL(skb_copy); |
952 | |
953 | /** |
954 | * __pskb_copy - create copy of an sk_buff with private head. |
955 | * @skb: buffer to copy |
956 | * @headroom: headroom of new skb |
957 | * @gfp_mask: allocation priority |
958 | * |
959 | * Make a copy of both an &sk_buff and part of its data, located |
960 | * in header. Fragmented data remain shared. This is used when |
961 | * the caller wishes to modify only header of &sk_buff and needs |
962 | * private copy of the header to alter. Returns %NULL on failure |
963 | * or the pointer to the buffer on success. |
964 | * The returned buffer has a reference count of 1. |
965 | */ |
966 | |
967 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
968 | { |
969 | unsigned int size = skb_headlen(skb) + headroom; |
970 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
971 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); |
972 | |
973 | if (!n) |
974 | goto out; |
975 | |
976 | /* Set the data pointer */ |
977 | skb_reserve(n, headroom); |
978 | /* Set the tail pointer and length */ |
979 | skb_put(n, skb_headlen(skb)); |
980 | /* Copy the bytes */ |
981 | skb_copy_from_linear_data(skb, n->data, n->len); |
982 | |
983 | n->truesize += skb->data_len; |
984 | n->data_len = skb->data_len; |
985 | n->len = skb->len; |
986 | |
987 | if (skb_shinfo(skb)->nr_frags) { |
988 | int i; |
989 | |
990 | if (skb_orphan_frags(skb, gfp_mask)) { |
991 | kfree_skb(n); |
992 | n = NULL; |
993 | goto out; |
994 | } |
995 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
996 | skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; |
997 | skb_frag_ref(skb, i); |
998 | } |
999 | skb_shinfo(n)->nr_frags = i; |
1000 | } |
1001 | |
1002 | if (skb_has_frag_list(skb)) { |
1003 | skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; |
1004 | skb_clone_fraglist(n); |
1005 | } |
1006 | |
1007 | copy_skb_header(n, skb); |
1008 | out: |
1009 | return n; |
1010 | } |
1011 | EXPORT_SYMBOL(__pskb_copy); |
1012 | |
1013 | /** |
1014 | * pskb_expand_head - reallocate header of &sk_buff |
1015 | * @skb: buffer to reallocate |
1016 | * @nhead: room to add at head |
1017 | * @ntail: room to add at tail |
1018 | * @gfp_mask: allocation priority |
1019 | * |
1020 | * Expands (or creates identical copy, if @nhead and @ntail are zero) |
1021 | * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have |
1022 | * reference count of 1. Returns zero in the case of success or error, |
1023 | * if expansion failed. In the last case, &sk_buff is not changed. |
1024 | * |
1025 | * All the pointers pointing into skb header may change and must be |
1026 | * reloaded after call to this function. |
1027 | */ |
1028 | |
1029 | int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, |
1030 | gfp_t gfp_mask) |
1031 | { |
1032 | int i; |
1033 | u8 *data; |
1034 | int size = nhead + skb_end_offset(skb) + ntail; |
1035 | long off; |
1036 | |
1037 | BUG_ON(nhead < 0); |
1038 | |
1039 | if (skb_shared(skb)) |
1040 | BUG(); |
1041 | |
1042 | size = SKB_DATA_ALIGN(size); |
1043 | |
1044 | if (skb_pfmemalloc(skb)) |
1045 | gfp_mask |= __GFP_MEMALLOC; |
1046 | data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), |
1047 | gfp_mask, NUMA_NO_NODE, NULL); |
1048 | if (!data) |
1049 | goto nodata; |
1050 | size = SKB_WITH_OVERHEAD(ksize(data)); |
1051 | |
1052 | /* Copy only real data... and, alas, header. This should be |
1053 | * optimized for the cases when header is void. |
1054 | */ |
1055 | memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); |
1056 | |
1057 | memcpy((struct skb_shared_info *)(data + size), |
1058 | skb_shinfo(skb), |
1059 | offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); |
1060 | |
1061 | /* |
1062 | * if shinfo is shared we must drop the old head gracefully, but if it |
1063 | * is not we can just drop the old head and let the existing refcount |
1064 | * be since all we did is relocate the values |
1065 | */ |
1066 | if (skb_cloned(skb)) { |
1067 | /* copy this zero copy skb frags */ |
1068 | if (skb_orphan_frags(skb, gfp_mask)) |
1069 | goto nofrags; |
1070 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) |
1071 | skb_frag_ref(skb, i); |
1072 | |
1073 | if (skb_has_frag_list(skb)) |
1074 | skb_clone_fraglist(skb); |
1075 | |
1076 | skb_release_data(skb); |
1077 | } else { |
1078 | skb_free_head(skb); |
1079 | } |
1080 | off = (data + nhead) - skb->head; |
1081 | |
1082 | skb->head = data; |
1083 | skb->head_frag = 0; |
1084 | skb->data += off; |
1085 | #ifdef NET_SKBUFF_DATA_USES_OFFSET |
1086 | skb->end = size; |
1087 | off = nhead; |
1088 | #else |
1089 | skb->end = skb->head + size; |
1090 | #endif |
1091 | skb->tail += off; |
1092 | skb_headers_offset_update(skb, nhead); |
1093 | skb->cloned = 0; |
1094 | skb->hdr_len = 0; |
1095 | skb->nohdr = 0; |
1096 | atomic_set(&skb_shinfo(skb)->dataref, 1); |
1097 | return 0; |
1098 | |
1099 | nofrags: |
1100 | kfree(data); |
1101 | nodata: |
1102 | return -ENOMEM; |
1103 | } |
1104 | EXPORT_SYMBOL(pskb_expand_head); |
1105 | |
1106 | /* Make private copy of skb with writable head and some headroom */ |
1107 | |
1108 | struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) |
1109 | { |
1110 | struct sk_buff *skb2; |
1111 | int delta = headroom - skb_headroom(skb); |
1112 | |
1113 | if (delta <= 0) |
1114 | skb2 = pskb_copy(skb, GFP_ATOMIC); |
1115 | else { |
1116 | skb2 = skb_clone(skb, GFP_ATOMIC); |
1117 | if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, |
1118 | GFP_ATOMIC)) { |
1119 | kfree_skb(skb2); |
1120 | skb2 = NULL; |
1121 | } |
1122 | } |
1123 | return skb2; |
1124 | } |
1125 | EXPORT_SYMBOL(skb_realloc_headroom); |
1126 | |
1127 | /** |
1128 | * skb_copy_expand - copy and expand sk_buff |
1129 | * @skb: buffer to copy |
1130 | * @newheadroom: new free bytes at head |
1131 | * @newtailroom: new free bytes at tail |
1132 | * @gfp_mask: allocation priority |
1133 | * |
1134 | * Make a copy of both an &sk_buff and its data and while doing so |
1135 | * allocate additional space. |
1136 | * |
1137 | * This is used when the caller wishes to modify the data and needs a |
1138 | * private copy of the data to alter as well as more space for new fields. |
1139 | * Returns %NULL on failure or the pointer to the buffer |
1140 | * on success. The returned buffer has a reference count of 1. |
1141 | * |
1142 | * You must pass %GFP_ATOMIC as the allocation priority if this function |
1143 | * is called from an interrupt. |
1144 | */ |
1145 | struct sk_buff *skb_copy_expand(const struct sk_buff *skb, |
1146 | int newheadroom, int newtailroom, |
1147 | gfp_t gfp_mask) |
1148 | { |
1149 | /* |
1150 | * Allocate the copy buffer |
1151 | */ |
1152 | struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, |
1153 | gfp_mask, skb_alloc_rx_flag(skb), |
1154 | NUMA_NO_NODE); |
1155 | int oldheadroom = skb_headroom(skb); |
1156 | int head_copy_len, head_copy_off; |
1157 | |
1158 | if (!n) |
1159 | return NULL; |
1160 | |
1161 | skb_reserve(n, newheadroom); |
1162 | |
1163 | /* Set the tail pointer and length */ |
1164 | skb_put(n, skb->len); |
1165 | |
1166 | head_copy_len = oldheadroom; |
1167 | head_copy_off = 0; |
1168 | if (newheadroom <= head_copy_len) |
1169 | head_copy_len = newheadroom; |
1170 | else |
1171 | head_copy_off = newheadroom - head_copy_len; |
1172 | |
1173 | /* Copy the linear header and data. */ |
1174 | if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, |
1175 | skb->len + head_copy_len)) |
1176 | BUG(); |
1177 | |
1178 | copy_skb_header(n, skb); |
1179 | |
1180 | skb_headers_offset_update(n, newheadroom - oldheadroom); |
1181 | |
1182 | return n; |
1183 | } |
1184 | EXPORT_SYMBOL(skb_copy_expand); |
1185 | |
1186 | /** |
1187 | * skb_pad - zero pad the tail of an skb |
1188 | * @skb: buffer to pad |
1189 | * @pad: space to pad |
1190 | * |
1191 | * Ensure that a buffer is followed by a padding area that is zero |
1192 | * filled. Used by network drivers which may DMA or transfer data |
1193 | * beyond the buffer end onto the wire. |
1194 | * |
1195 | * May return error in out of memory cases. The skb is freed on error. |
1196 | */ |
1197 | |
1198 | int skb_pad(struct sk_buff *skb, int pad) |
1199 | { |
1200 | int err; |
1201 | int ntail; |
1202 | |
1203 | /* If the skbuff is non linear tailroom is always zero.. */ |
1204 | if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { |
1205 | memset(skb->data+skb->len, 0, pad); |
1206 | return 0; |
1207 | } |
1208 | |
1209 | ntail = skb->data_len + pad - (skb->end - skb->tail); |
1210 | if (likely(skb_cloned(skb) || ntail > 0)) { |
1211 | err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); |
1212 | if (unlikely(err)) |
1213 | goto free_skb; |
1214 | } |
1215 | |
1216 | /* FIXME: The use of this function with non-linear skb's really needs |
1217 | * to be audited. |
1218 | */ |
1219 | err = skb_linearize(skb); |
1220 | if (unlikely(err)) |
1221 | goto free_skb; |
1222 | |
1223 | memset(skb->data + skb->len, 0, pad); |
1224 | return 0; |
1225 | |
1226 | free_skb: |
1227 | kfree_skb(skb); |
1228 | return err; |
1229 | } |
1230 | EXPORT_SYMBOL(skb_pad); |
1231 | |
1232 | /** |
1233 | * pskb_put - add data to the tail of a potentially fragmented buffer |
1234 | * @skb: start of the buffer to use |
1235 | * @tail: tail fragment of the buffer to use |
1236 | * @len: amount of data to add |
1237 | * |
1238 | * This function extends the used data area of the potentially |
1239 | * fragmented buffer. @tail must be the last fragment of @skb -- or |
1240 | * @skb itself. If this would exceed the total buffer size the kernel |
1241 | * will panic. A pointer to the first byte of the extra data is |
1242 | * returned. |
1243 | */ |
1244 | |
1245 | unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) |
1246 | { |
1247 | if (tail != skb) { |
1248 | skb->data_len += len; |
1249 | skb->len += len; |
1250 | } |
1251 | return skb_put(tail, len); |
1252 | } |
1253 | EXPORT_SYMBOL_GPL(pskb_put); |
1254 | |
1255 | /** |
1256 | * skb_put - add data to a buffer |
1257 | * @skb: buffer to use |
1258 | * @len: amount of data to add |
1259 | * |
1260 | * This function extends the used data area of the buffer. If this would |
1261 | * exceed the total buffer size the kernel will panic. A pointer to the |
1262 | * first byte of the extra data is returned. |
1263 | */ |
1264 | unsigned char *skb_put(struct sk_buff *skb, unsigned int len) |
1265 | { |
1266 | unsigned char *tmp = skb_tail_pointer(skb); |
1267 | SKB_LINEAR_ASSERT(skb); |
1268 | skb->tail += len; |
1269 | skb->len += len; |
1270 | if (unlikely(skb->tail > skb->end)) |
1271 | skb_over_panic(skb, len, __builtin_return_address(0)); |
1272 | return tmp; |
1273 | } |
1274 | EXPORT_SYMBOL(skb_put); |
1275 | |
1276 | /** |
1277 | * skb_push - add data to the start of a buffer |
1278 | * @skb: buffer to use |
1279 | * @len: amount of data to add |
1280 | * |
1281 | * This function extends the used data area of the buffer at the buffer |
1282 | * start. If this would exceed the total buffer headroom the kernel will |
1283 | * panic. A pointer to the first byte of the extra data is returned. |
1284 | */ |
1285 | unsigned char *skb_push(struct sk_buff *skb, unsigned int len) |
1286 | { |
1287 | skb->data -= len; |
1288 | skb->len += len; |
1289 | if (unlikely(skb->data<skb->head)) |
1290 | skb_under_panic(skb, len, __builtin_return_address(0)); |
1291 | return skb->data; |
1292 | } |
1293 | EXPORT_SYMBOL(skb_push); |
1294 | |
1295 | /** |
1296 | * skb_pull - remove data from the start of a buffer |
1297 | * @skb: buffer to use |
1298 | * @len: amount of data to remove |
1299 | * |
1300 | * This function removes data from the start of a buffer, returning |
1301 | * the memory to the headroom. A pointer to the next data in the buffer |
1302 | * is returned. Once the data has been pulled future pushes will overwrite |
1303 | * the old data. |
1304 | */ |
1305 | unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) |
1306 | { |
1307 | return skb_pull_inline(skb, len); |
1308 | } |
1309 | EXPORT_SYMBOL(skb_pull); |
1310 | |
1311 | /** |
1312 | * skb_trim - remove end from a buffer |
1313 | * @skb: buffer to alter |
1314 | * @len: new length |
1315 | * |
1316 | * Cut the length of a buffer down by removing data from the tail. If |
1317 | * the buffer is already under the length specified it is not modified. |
1318 | * The skb must be linear. |
1319 | */ |
1320 | void skb_trim(struct sk_buff *skb, unsigned int len) |
1321 | { |
1322 | if (skb->len > len) |
1323 | __skb_trim(skb, len); |
1324 | } |
1325 | EXPORT_SYMBOL(skb_trim); |
1326 | |
1327 | /* Trims skb to length len. It can change skb pointers. |
1328 | */ |
1329 | |
1330 | int ___pskb_trim(struct sk_buff *skb, unsigned int len) |
1331 | { |
1332 | struct sk_buff **fragp; |
1333 | struct sk_buff *frag; |
1334 | int offset = skb_headlen(skb); |
1335 | int nfrags = skb_shinfo(skb)->nr_frags; |
1336 | int i; |
1337 | int err; |
1338 | |
1339 | if (skb_cloned(skb) && |
1340 | unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) |
1341 | return err; |
1342 | |
1343 | i = 0; |
1344 | if (offset >= len) |
1345 | goto drop_pages; |
1346 | |
1347 | for (; i < nfrags; i++) { |
1348 | int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); |
1349 | |
1350 | if (end < len) { |
1351 | offset = end; |
1352 | continue; |
1353 | } |
1354 | |
1355 | skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); |
1356 | |
1357 | drop_pages: |
1358 | skb_shinfo(skb)->nr_frags = i; |
1359 | |
1360 | for (; i < nfrags; i++) |
1361 | skb_frag_unref(skb, i); |
1362 | |
1363 | if (skb_has_frag_list(skb)) |
1364 | skb_drop_fraglist(skb); |
1365 | goto done; |
1366 | } |
1367 | |
1368 | for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); |
1369 | fragp = &frag->next) { |
1370 | int end = offset + frag->len; |
1371 | |
1372 | if (skb_shared(frag)) { |
1373 | struct sk_buff *nfrag; |
1374 | |
1375 | nfrag = skb_clone(frag, GFP_ATOMIC); |
1376 | if (unlikely(!nfrag)) |
1377 | return -ENOMEM; |
1378 | |
1379 | nfrag->next = frag->next; |
1380 | consume_skb(frag); |
1381 | frag = nfrag; |
1382 | *fragp = frag; |
1383 | } |
1384 | |
1385 | if (end < len) { |
1386 | offset = end; |
1387 | continue; |
1388 | } |
1389 | |
1390 | if (end > len && |
1391 | unlikely((err = pskb_trim(frag, len - offset)))) |
1392 | return err; |
1393 | |
1394 | if (frag->next) |
1395 | skb_drop_list(&frag->next); |
1396 | break; |
1397 | } |
1398 | |
1399 | done: |
1400 | if (len > skb_headlen(skb)) { |
1401 | skb->data_len -= skb->len - len; |
1402 | skb->len = len; |
1403 | } else { |
1404 | skb->len = len; |
1405 | skb->data_len = 0; |
1406 | skb_set_tail_pointer(skb, len); |
1407 | } |
1408 | |
1409 | return 0; |
1410 | } |
1411 | EXPORT_SYMBOL(___pskb_trim); |
1412 | |
1413 | /** |
1414 | * __pskb_pull_tail - advance tail of skb header |
1415 | * @skb: buffer to reallocate |
1416 | * @delta: number of bytes to advance tail |
1417 | * |
1418 | * The function makes a sense only on a fragmented &sk_buff, |
1419 | * it expands header moving its tail forward and copying necessary |
1420 | * data from fragmented part. |
1421 | * |
1422 | * &sk_buff MUST have reference count of 1. |
1423 | * |
1424 | * Returns %NULL (and &sk_buff does not change) if pull failed |
1425 | * or value of new tail of skb in the case of success. |
1426 | * |
1427 | * All the pointers pointing into skb header may change and must be |
1428 | * reloaded after call to this function. |
1429 | */ |
1430 | |
1431 | /* Moves tail of skb head forward, copying data from fragmented part, |
1432 | * when it is necessary. |
1433 | * 1. It may fail due to malloc failure. |
1434 | * 2. It may change skb pointers. |
1435 | * |
1436 | * It is pretty complicated. Luckily, it is called only in exceptional cases. |
1437 | */ |
1438 | unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) |
1439 | { |
1440 | /* If skb has not enough free space at tail, get new one |
1441 | * plus 128 bytes for future expansions. If we have enough |
1442 | * room at tail, reallocate without expansion only if skb is cloned. |
1443 | */ |
1444 | int i, k, eat = (skb->tail + delta) - skb->end; |
1445 | |
1446 | if (eat > 0 || skb_cloned(skb)) { |
1447 | if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, |
1448 | GFP_ATOMIC)) |
1449 | return NULL; |
1450 | } |
1451 | |
1452 | if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) |
1453 | BUG(); |
1454 | |
1455 | /* Optimization: no fragments, no reasons to preestimate |
1456 | * size of pulled pages. Superb. |
1457 | */ |
1458 | if (!skb_has_frag_list(skb)) |
1459 | goto pull_pages; |
1460 | |
1461 | /* Estimate size of pulled pages. */ |
1462 | eat = delta; |
1463 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
1464 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); |
1465 | |
1466 | if (size >= eat) |
1467 | goto pull_pages; |
1468 | eat -= size; |
1469 | } |
1470 | |
1471 | /* If we need update frag list, we are in troubles. |
1472 | * Certainly, it possible to add an offset to skb data, |
1473 | * but taking into account that pulling is expected to |
1474 | * be very rare operation, it is worth to fight against |
1475 | * further bloating skb head and crucify ourselves here instead. |
1476 | * Pure masohism, indeed. 8)8) |
1477 | */ |
1478 | if (eat) { |
1479 | struct sk_buff *list = skb_shinfo(skb)->frag_list; |
1480 | struct sk_buff *clone = NULL; |
1481 | struct sk_buff *insp = NULL; |
1482 | |
1483 | do { |
1484 | BUG_ON(!list); |
1485 | |
1486 | if (list->len <= eat) { |
1487 | /* Eaten as whole. */ |
1488 | eat -= list->len; |
1489 | list = list->next; |
1490 | insp = list; |
1491 | } else { |
1492 | /* Eaten partially. */ |
1493 | |
1494 | if (skb_shared(list)) { |
1495 | /* Sucks! We need to fork list. :-( */ |
1496 | clone = skb_clone(list, GFP_ATOMIC); |
1497 | if (!clone) |
1498 | return NULL; |
1499 | insp = list->next; |
1500 | list = clone; |
1501 | } else { |
1502 | /* This may be pulled without |
1503 | * problems. */ |
1504 | insp = list; |
1505 | } |
1506 | if (!pskb_pull(list, eat)) { |
1507 | kfree_skb(clone); |
1508 | return NULL; |
1509 | } |
1510 | break; |
1511 | } |
1512 | } while (eat); |
1513 | |
1514 | /* Free pulled out fragments. */ |
1515 | while ((list = skb_shinfo(skb)->frag_list) != insp) { |
1516 | skb_shinfo(skb)->frag_list = list->next; |
1517 | kfree_skb(list); |
1518 | } |
1519 | /* And insert new clone at head. */ |
1520 | if (clone) { |
1521 | clone->next = list; |
1522 | skb_shinfo(skb)->frag_list = clone; |
1523 | } |
1524 | } |
1525 | /* Success! Now we may commit changes to skb data. */ |
1526 | |
1527 | pull_pages: |
1528 | eat = delta; |
1529 | k = 0; |
1530 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
1531 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); |
1532 | |
1533 | if (size <= eat) { |
1534 | skb_frag_unref(skb, i); |
1535 | eat -= size; |
1536 | } else { |
1537 | skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; |
1538 | if (eat) { |
1539 | skb_shinfo(skb)->frags[k].page_offset += eat; |
1540 | skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); |
1541 | eat = 0; |
1542 | } |
1543 | k++; |
1544 | } |
1545 | } |
1546 | skb_shinfo(skb)->nr_frags = k; |
1547 | |
1548 | skb->tail += delta; |
1549 | skb->data_len -= delta; |
1550 | |
1551 | return skb_tail_pointer(skb); |
1552 | } |
1553 | EXPORT_SYMBOL(__pskb_pull_tail); |
1554 | |
1555 | /** |
1556 | * skb_copy_bits - copy bits from skb to kernel buffer |
1557 | * @skb: source skb |
1558 | * @offset: offset in source |
1559 | * @to: destination buffer |
1560 | * @len: number of bytes to copy |
1561 | * |
1562 | * Copy the specified number of bytes from the source skb to the |
1563 | * destination buffer. |
1564 | * |
1565 | * CAUTION ! : |
1566 | * If its prototype is ever changed, |
1567 | * check arch/{*}/net/{*}.S files, |
1568 | * since it is called from BPF assembly code. |
1569 | */ |
1570 | int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) |
1571 | { |
1572 | int start = skb_headlen(skb); |
1573 | struct sk_buff *frag_iter; |
1574 | int i, copy; |
1575 | |
1576 | if (offset > (int)skb->len - len) |
1577 | goto fault; |
1578 | |
1579 | /* Copy header. */ |
1580 | if ((copy = start - offset) > 0) { |
1581 | if (copy > len) |
1582 | copy = len; |
1583 | skb_copy_from_linear_data_offset(skb, offset, to, copy); |
1584 | if ((len -= copy) == 0) |
1585 | return 0; |
1586 | offset += copy; |
1587 | to += copy; |
1588 | } |
1589 | |
1590 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
1591 | int end; |
1592 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; |
1593 | |
1594 | WARN_ON(start > offset + len); |
1595 | |
1596 | end = start + skb_frag_size(f); |
1597 | if ((copy = end - offset) > 0) { |
1598 | u8 *vaddr; |
1599 | |
1600 | if (copy > len) |
1601 | copy = len; |
1602 | |
1603 | vaddr = kmap_atomic(skb_frag_page(f)); |
1604 | memcpy(to, |
1605 | vaddr + f->page_offset + offset - start, |
1606 | copy); |
1607 | kunmap_atomic(vaddr); |
1608 | |
1609 | if ((len -= copy) == 0) |
1610 | return 0; |
1611 | offset += copy; |
1612 | to += copy; |
1613 | } |
1614 | start = end; |
1615 | } |
1616 | |
1617 | skb_walk_frags(skb, frag_iter) { |
1618 | int end; |
1619 | |
1620 | WARN_ON(start > offset + len); |
1621 | |
1622 | end = start + frag_iter->len; |
1623 | if ((copy = end - offset) > 0) { |
1624 | if (copy > len) |
1625 | copy = len; |
1626 | if (skb_copy_bits(frag_iter, offset - start, to, copy)) |
1627 | goto fault; |
1628 | if ((len -= copy) == 0) |
1629 | return 0; |
1630 | offset += copy; |
1631 | to += copy; |
1632 | } |
1633 | start = end; |
1634 | } |
1635 | |
1636 | if (!len) |
1637 | return 0; |
1638 | |
1639 | fault: |
1640 | return -EFAULT; |
1641 | } |
1642 | EXPORT_SYMBOL(skb_copy_bits); |
1643 | |
1644 | /* |
1645 | * Callback from splice_to_pipe(), if we need to release some pages |
1646 | * at the end of the spd in case we error'ed out in filling the pipe. |
1647 | */ |
1648 | static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) |
1649 | { |
1650 | put_page(spd->pages[i]); |
1651 | } |
1652 | |
1653 | static struct page *linear_to_page(struct page *page, unsigned int *len, |
1654 | unsigned int *offset, |
1655 | struct sock *sk) |
1656 | { |
1657 | struct page_frag *pfrag = sk_page_frag(sk); |
1658 | |
1659 | if (!sk_page_frag_refill(sk, pfrag)) |
1660 | return NULL; |
1661 | |
1662 | *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); |
1663 | |
1664 | memcpy(page_address(pfrag->page) + pfrag->offset, |
1665 | page_address(page) + *offset, *len); |
1666 | *offset = pfrag->offset; |
1667 | pfrag->offset += *len; |
1668 | |
1669 | return pfrag->page; |
1670 | } |
1671 | |
1672 | static bool spd_can_coalesce(const struct splice_pipe_desc *spd, |
1673 | struct page *page, |
1674 | unsigned int offset) |
1675 | { |
1676 | return spd->nr_pages && |
1677 | spd->pages[spd->nr_pages - 1] == page && |
1678 | (spd->partial[spd->nr_pages - 1].offset + |
1679 | spd->partial[spd->nr_pages - 1].len == offset); |
1680 | } |
1681 | |
1682 | /* |
1683 | * Fill page/offset/length into spd, if it can hold more pages. |
1684 | */ |
1685 | static bool spd_fill_page(struct splice_pipe_desc *spd, |
1686 | struct pipe_inode_info *pipe, struct page *page, |
1687 | unsigned int *len, unsigned int offset, |
1688 | bool linear, |
1689 | struct sock *sk) |
1690 | { |
1691 | if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) |
1692 | return true; |
1693 | |
1694 | if (linear) { |
1695 | page = linear_to_page(page, len, &offset, sk); |
1696 | if (!page) |
1697 | return true; |
1698 | } |
1699 | if (spd_can_coalesce(spd, page, offset)) { |
1700 | spd->partial[spd->nr_pages - 1].len += *len; |
1701 | return false; |
1702 | } |
1703 | get_page(page); |
1704 | spd->pages[spd->nr_pages] = page; |
1705 | spd->partial[spd->nr_pages].len = *len; |
1706 | spd->partial[spd->nr_pages].offset = offset; |
1707 | spd->nr_pages++; |
1708 | |
1709 | return false; |
1710 | } |
1711 | |
1712 | static bool __splice_segment(struct page *page, unsigned int poff, |
1713 | unsigned int plen, unsigned int *off, |
1714 | unsigned int *len, |
1715 | struct splice_pipe_desc *spd, bool linear, |
1716 | struct sock *sk, |
1717 | struct pipe_inode_info *pipe) |
1718 | { |
1719 | if (!*len) |
1720 | return true; |
1721 | |
1722 | /* skip this segment if already processed */ |
1723 | if (*off >= plen) { |
1724 | *off -= plen; |
1725 | return false; |
1726 | } |
1727 | |
1728 | /* ignore any bits we already processed */ |
1729 | poff += *off; |
1730 | plen -= *off; |
1731 | *off = 0; |
1732 | |
1733 | do { |
1734 | unsigned int flen = min(*len, plen); |
1735 | |
1736 | if (spd_fill_page(spd, pipe, page, &flen, poff, |
1737 | linear, sk)) |
1738 | return true; |
1739 | poff += flen; |
1740 | plen -= flen; |
1741 | *len -= flen; |
1742 | } while (*len && plen); |
1743 | |
1744 | return false; |
1745 | } |
1746 | |
1747 | /* |
1748 | * Map linear and fragment data from the skb to spd. It reports true if the |
1749 | * pipe is full or if we already spliced the requested length. |
1750 | */ |
1751 | static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, |
1752 | unsigned int *offset, unsigned int *len, |
1753 | struct splice_pipe_desc *spd, struct sock *sk) |
1754 | { |
1755 | int seg; |
1756 | |
1757 | /* map the linear part : |
1758 | * If skb->head_frag is set, this 'linear' part is backed by a |
1759 | * fragment, and if the head is not shared with any clones then |
1760 | * we can avoid a copy since we own the head portion of this page. |
1761 | */ |
1762 | if (__splice_segment(virt_to_page(skb->data), |
1763 | (unsigned long) skb->data & (PAGE_SIZE - 1), |
1764 | skb_headlen(skb), |
1765 | offset, len, spd, |
1766 | skb_head_is_locked(skb), |
1767 | sk, pipe)) |
1768 | return true; |
1769 | |
1770 | /* |
1771 | * then map the fragments |
1772 | */ |
1773 | for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { |
1774 | const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; |
1775 | |
1776 | if (__splice_segment(skb_frag_page(f), |
1777 | f->page_offset, skb_frag_size(f), |
1778 | offset, len, spd, false, sk, pipe)) |
1779 | return true; |
1780 | } |
1781 | |
1782 | return false; |
1783 | } |
1784 | |
1785 | /* |
1786 | * Map data from the skb to a pipe. Should handle both the linear part, |
1787 | * the fragments, and the frag list. It does NOT handle frag lists within |
1788 | * the frag list, if such a thing exists. We'd probably need to recurse to |
1789 | * handle that cleanly. |
1790 | */ |
1791 | int skb_splice_bits(struct sk_buff *skb, unsigned int offset, |
1792 | struct pipe_inode_info *pipe, unsigned int tlen, |
1793 | unsigned int flags) |
1794 | { |
1795 | struct partial_page partial[MAX_SKB_FRAGS]; |
1796 | struct page *pages[MAX_SKB_FRAGS]; |
1797 | struct splice_pipe_desc spd = { |
1798 | .pages = pages, |
1799 | .partial = partial, |
1800 | .nr_pages_max = MAX_SKB_FRAGS, |
1801 | .flags = flags, |
1802 | .ops = &nosteal_pipe_buf_ops, |
1803 | .spd_release = sock_spd_release, |
1804 | }; |
1805 | struct sk_buff *frag_iter; |
1806 | struct sock *sk = skb->sk; |
1807 | int ret = 0; |
1808 | |
1809 | /* |
1810 | * __skb_splice_bits() only fails if the output has no room left, |
1811 | * so no point in going over the frag_list for the error case. |
1812 | */ |
1813 | if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) |
1814 | goto done; |
1815 | else if (!tlen) |
1816 | goto done; |
1817 | |
1818 | /* |
1819 | * now see if we have a frag_list to map |
1820 | */ |
1821 | skb_walk_frags(skb, frag_iter) { |
1822 | if (!tlen) |
1823 | break; |
1824 | if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) |
1825 | break; |
1826 | } |
1827 | |
1828 | done: |
1829 | if (spd.nr_pages) { |
1830 | /* |
1831 | * Drop the socket lock, otherwise we have reverse |
1832 | * locking dependencies between sk_lock and i_mutex |
1833 | * here as compared to sendfile(). We enter here |
1834 | * with the socket lock held, and splice_to_pipe() will |
1835 | * grab the pipe inode lock. For sendfile() emulation, |
1836 | * we call into ->sendpage() with the i_mutex lock held |
1837 | * and networking will grab the socket lock. |
1838 | */ |
1839 | release_sock(sk); |
1840 | ret = splice_to_pipe(pipe, &spd); |
1841 | lock_sock(sk); |
1842 | } |
1843 | |
1844 | return ret; |
1845 | } |
1846 | |
1847 | /** |
1848 | * skb_store_bits - store bits from kernel buffer to skb |
1849 | * @skb: destination buffer |
1850 | * @offset: offset in destination |
1851 | * @from: source buffer |
1852 | * @len: number of bytes to copy |
1853 | * |
1854 | * Copy the specified number of bytes from the source buffer to the |
1855 | * destination skb. This function handles all the messy bits of |
1856 | * traversing fragment lists and such. |
1857 | */ |
1858 | |
1859 | int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) |
1860 | { |
1861 | int start = skb_headlen(skb); |
1862 | struct sk_buff *frag_iter; |
1863 | int i, copy; |
1864 | |
1865 | if (offset > (int)skb->len - len) |
1866 | goto fault; |
1867 | |
1868 | if ((copy = start - offset) > 0) { |
1869 | if (copy > len) |
1870 | copy = len; |
1871 | skb_copy_to_linear_data_offset(skb, offset, from, copy); |
1872 | if ((len -= copy) == 0) |
1873 | return 0; |
1874 | offset += copy; |
1875 | from += copy; |
1876 | } |
1877 | |
1878 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
1879 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
1880 | int end; |
1881 | |
1882 | WARN_ON(start > offset + len); |
1883 | |
1884 | end = start + skb_frag_size(frag); |
1885 | if ((copy = end - offset) > 0) { |
1886 | u8 *vaddr; |
1887 | |
1888 | if (copy > len) |
1889 | copy = len; |
1890 | |
1891 | vaddr = kmap_atomic(skb_frag_page(frag)); |
1892 | memcpy(vaddr + frag->page_offset + offset - start, |
1893 | from, copy); |
1894 | kunmap_atomic(vaddr); |
1895 | |
1896 | if ((len -= copy) == 0) |
1897 | return 0; |
1898 | offset += copy; |
1899 | from += copy; |
1900 | } |
1901 | start = end; |
1902 | } |
1903 | |
1904 | skb_walk_frags(skb, frag_iter) { |
1905 | int end; |
1906 | |
1907 | WARN_ON(start > offset + len); |
1908 | |
1909 | end = start + frag_iter->len; |
1910 | if ((copy = end - offset) > 0) { |
1911 | if (copy > len) |
1912 | copy = len; |
1913 | if (skb_store_bits(frag_iter, offset - start, |
1914 | from, copy)) |
1915 | goto fault; |
1916 | if ((len -= copy) == 0) |
1917 | return 0; |
1918 | offset += copy; |
1919 | from += copy; |
1920 | } |
1921 | start = end; |
1922 | } |
1923 | if (!len) |
1924 | return 0; |
1925 | |
1926 | fault: |
1927 | return -EFAULT; |
1928 | } |
1929 | EXPORT_SYMBOL(skb_store_bits); |
1930 | |
1931 | /* Checksum skb data. */ |
1932 | __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, |
1933 | __wsum csum, const struct skb_checksum_ops *ops) |
1934 | { |
1935 | int start = skb_headlen(skb); |
1936 | int i, copy = start - offset; |
1937 | struct sk_buff *frag_iter; |
1938 | int pos = 0; |
1939 | |
1940 | /* Checksum header. */ |
1941 | if (copy > 0) { |
1942 | if (copy > len) |
1943 | copy = len; |
1944 | csum = ops->update(skb->data + offset, copy, csum); |
1945 | if ((len -= copy) == 0) |
1946 | return csum; |
1947 | offset += copy; |
1948 | pos = copy; |
1949 | } |
1950 | |
1951 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
1952 | int end; |
1953 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
1954 | |
1955 | WARN_ON(start > offset + len); |
1956 | |
1957 | end = start + skb_frag_size(frag); |
1958 | if ((copy = end - offset) > 0) { |
1959 | __wsum csum2; |
1960 | u8 *vaddr; |
1961 | |
1962 | if (copy > len) |
1963 | copy = len; |
1964 | vaddr = kmap_atomic(skb_frag_page(frag)); |
1965 | csum2 = ops->update(vaddr + frag->page_offset + |
1966 | offset - start, copy, 0); |
1967 | kunmap_atomic(vaddr); |
1968 | csum = ops->combine(csum, csum2, pos, copy); |
1969 | if (!(len -= copy)) |
1970 | return csum; |
1971 | offset += copy; |
1972 | pos += copy; |
1973 | } |
1974 | start = end; |
1975 | } |
1976 | |
1977 | skb_walk_frags(skb, frag_iter) { |
1978 | int end; |
1979 | |
1980 | WARN_ON(start > offset + len); |
1981 | |
1982 | end = start + frag_iter->len; |
1983 | if ((copy = end - offset) > 0) { |
1984 | __wsum csum2; |
1985 | if (copy > len) |
1986 | copy = len; |
1987 | csum2 = __skb_checksum(frag_iter, offset - start, |
1988 | copy, 0, ops); |
1989 | csum = ops->combine(csum, csum2, pos, copy); |
1990 | if ((len -= copy) == 0) |
1991 | return csum; |
1992 | offset += copy; |
1993 | pos += copy; |
1994 | } |
1995 | start = end; |
1996 | } |
1997 | BUG_ON(len); |
1998 | |
1999 | return csum; |
2000 | } |
2001 | EXPORT_SYMBOL(__skb_checksum); |
2002 | |
2003 | __wsum skb_checksum(const struct sk_buff *skb, int offset, |
2004 | int len, __wsum csum) |
2005 | { |
2006 | const struct skb_checksum_ops ops = { |
2007 | .update = csum_partial_ext, |
2008 | .combine = csum_block_add_ext, |
2009 | }; |
2010 | |
2011 | return __skb_checksum(skb, offset, len, csum, &ops); |
2012 | } |
2013 | EXPORT_SYMBOL(skb_checksum); |
2014 | |
2015 | /* Both of above in one bottle. */ |
2016 | |
2017 | __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, |
2018 | u8 *to, int len, __wsum csum) |
2019 | { |
2020 | int start = skb_headlen(skb); |
2021 | int i, copy = start - offset; |
2022 | struct sk_buff *frag_iter; |
2023 | int pos = 0; |
2024 | |
2025 | /* Copy header. */ |
2026 | if (copy > 0) { |
2027 | if (copy > len) |
2028 | copy = len; |
2029 | csum = csum_partial_copy_nocheck(skb->data + offset, to, |
2030 | copy, csum); |
2031 | if ((len -= copy) == 0) |
2032 | return csum; |
2033 | offset += copy; |
2034 | to += copy; |
2035 | pos = copy; |
2036 | } |
2037 | |
2038 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
2039 | int end; |
2040 | |
2041 | WARN_ON(start > offset + len); |
2042 | |
2043 | end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); |
2044 | if ((copy = end - offset) > 0) { |
2045 | __wsum csum2; |
2046 | u8 *vaddr; |
2047 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
2048 | |
2049 | if (copy > len) |
2050 | copy = len; |
2051 | vaddr = kmap_atomic(skb_frag_page(frag)); |
2052 | csum2 = csum_partial_copy_nocheck(vaddr + |
2053 | frag->page_offset + |
2054 | offset - start, to, |
2055 | copy, 0); |
2056 | kunmap_atomic(vaddr); |
2057 | csum = csum_block_add(csum, csum2, pos); |
2058 | if (!(len -= copy)) |
2059 | return csum; |
2060 | offset += copy; |
2061 | to += copy; |
2062 | pos += copy; |
2063 | } |
2064 | start = end; |
2065 | } |
2066 | |
2067 | skb_walk_frags(skb, frag_iter) { |
2068 | __wsum csum2; |
2069 | int end; |
2070 | |
2071 | WARN_ON(start > offset + len); |
2072 | |
2073 | end = start + frag_iter->len; |
2074 | if ((copy = end - offset) > 0) { |
2075 | if (copy > len) |
2076 | copy = len; |
2077 | csum2 = skb_copy_and_csum_bits(frag_iter, |
2078 | offset - start, |
2079 | to, copy, 0); |
2080 | csum = csum_block_add(csum, csum2, pos); |
2081 | if ((len -= copy) == 0) |
2082 | return csum; |
2083 | offset += copy; |
2084 | to += copy; |
2085 | pos += copy; |
2086 | } |
2087 | start = end; |
2088 | } |
2089 | BUG_ON(len); |
2090 | return csum; |
2091 | } |
2092 | EXPORT_SYMBOL(skb_copy_and_csum_bits); |
2093 | |
2094 | /** |
2095 | * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() |
2096 | * @from: source buffer |
2097 | * |
2098 | * Calculates the amount of linear headroom needed in the 'to' skb passed |
2099 | * into skb_zerocopy(). |
2100 | */ |
2101 | unsigned int |
2102 | skb_zerocopy_headlen(const struct sk_buff *from) |
2103 | { |
2104 | unsigned int hlen = 0; |
2105 | |
2106 | if (!from->head_frag || |
2107 | skb_headlen(from) < L1_CACHE_BYTES || |
2108 | skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) |
2109 | hlen = skb_headlen(from); |
2110 | |
2111 | if (skb_has_frag_list(from)) |
2112 | hlen = from->len; |
2113 | |
2114 | return hlen; |
2115 | } |
2116 | EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); |
2117 | |
2118 | /** |
2119 | * skb_zerocopy - Zero copy skb to skb |
2120 | * @to: destination buffer |
2121 | * @from: source buffer |
2122 | * @len: number of bytes to copy from source buffer |
2123 | * @hlen: size of linear headroom in destination buffer |
2124 | * |
2125 | * Copies up to `len` bytes from `from` to `to` by creating references |
2126 | * to the frags in the source buffer. |
2127 | * |
2128 | * The `hlen` as calculated by skb_zerocopy_headlen() specifies the |
2129 | * headroom in the `to` buffer. |
2130 | * |
2131 | * Return value: |
2132 | * 0: everything is OK |
2133 | * -ENOMEM: couldn't orphan frags of @from due to lack of memory |
2134 | * -EFAULT: skb_copy_bits() found some problem with skb geometry |
2135 | */ |
2136 | int |
2137 | skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) |
2138 | { |
2139 | int i, j = 0; |
2140 | int plen = 0; /* length of skb->head fragment */ |
2141 | int ret; |
2142 | struct page *page; |
2143 | unsigned int offset; |
2144 | |
2145 | BUG_ON(!from->head_frag && !hlen); |
2146 | |
2147 | /* dont bother with small payloads */ |
2148 | if (len <= skb_tailroom(to)) |
2149 | return skb_copy_bits(from, 0, skb_put(to, len), len); |
2150 | |
2151 | if (hlen) { |
2152 | ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); |
2153 | if (unlikely(ret)) |
2154 | return ret; |
2155 | len -= hlen; |
2156 | } else { |
2157 | plen = min_t(int, skb_headlen(from), len); |
2158 | if (plen) { |
2159 | page = virt_to_head_page(from->head); |
2160 | offset = from->data - (unsigned char *)page_address(page); |
2161 | __skb_fill_page_desc(to, 0, page, offset, plen); |
2162 | get_page(page); |
2163 | j = 1; |
2164 | len -= plen; |
2165 | } |
2166 | } |
2167 | |
2168 | to->truesize += len + plen; |
2169 | to->len += len + plen; |
2170 | to->data_len += len + plen; |
2171 | |
2172 | if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { |
2173 | skb_tx_error(from); |
2174 | return -ENOMEM; |
2175 | } |
2176 | |
2177 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { |
2178 | if (!len) |
2179 | break; |
2180 | skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; |
2181 | skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); |
2182 | len -= skb_shinfo(to)->frags[j].size; |
2183 | skb_frag_ref(to, j); |
2184 | j++; |
2185 | } |
2186 | skb_shinfo(to)->nr_frags = j; |
2187 | |
2188 | return 0; |
2189 | } |
2190 | EXPORT_SYMBOL_GPL(skb_zerocopy); |
2191 | |
2192 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) |
2193 | { |
2194 | __wsum csum; |
2195 | long csstart; |
2196 | |
2197 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
2198 | csstart = skb_checksum_start_offset(skb); |
2199 | else |
2200 | csstart = skb_headlen(skb); |
2201 | |
2202 | BUG_ON(csstart > skb_headlen(skb)); |
2203 | |
2204 | skb_copy_from_linear_data(skb, to, csstart); |
2205 | |
2206 | csum = 0; |
2207 | if (csstart != skb->len) |
2208 | csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, |
2209 | skb->len - csstart, 0); |
2210 | |
2211 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
2212 | long csstuff = csstart + skb->csum_offset; |
2213 | |
2214 | *((__sum16 *)(to + csstuff)) = csum_fold(csum); |
2215 | } |
2216 | } |
2217 | EXPORT_SYMBOL(skb_copy_and_csum_dev); |
2218 | |
2219 | /** |
2220 | * skb_dequeue - remove from the head of the queue |
2221 | * @list: list to dequeue from |
2222 | * |
2223 | * Remove the head of the list. The list lock is taken so the function |
2224 | * may be used safely with other locking list functions. The head item is |
2225 | * returned or %NULL if the list is empty. |
2226 | */ |
2227 | |
2228 | struct sk_buff *skb_dequeue(struct sk_buff_head *list) |
2229 | { |
2230 | unsigned long flags; |
2231 | struct sk_buff *result; |
2232 | |
2233 | spin_lock_irqsave(&list->lock, flags); |
2234 | result = __skb_dequeue(list); |
2235 | spin_unlock_irqrestore(&list->lock, flags); |
2236 | return result; |
2237 | } |
2238 | EXPORT_SYMBOL(skb_dequeue); |
2239 | |
2240 | /** |
2241 | * skb_dequeue_tail - remove from the tail of the queue |
2242 | * @list: list to dequeue from |
2243 | * |
2244 | * Remove the tail of the list. The list lock is taken so the function |
2245 | * may be used safely with other locking list functions. The tail item is |
2246 | * returned or %NULL if the list is empty. |
2247 | */ |
2248 | struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) |
2249 | { |
2250 | unsigned long flags; |
2251 | struct sk_buff *result; |
2252 | |
2253 | spin_lock_irqsave(&list->lock, flags); |
2254 | result = __skb_dequeue_tail(list); |
2255 | spin_unlock_irqrestore(&list->lock, flags); |
2256 | return result; |
2257 | } |
2258 | EXPORT_SYMBOL(skb_dequeue_tail); |
2259 | |
2260 | /** |
2261 | * skb_queue_purge - empty a list |
2262 | * @list: list to empty |
2263 | * |
2264 | * Delete all buffers on an &sk_buff list. Each buffer is removed from |
2265 | * the list and one reference dropped. This function takes the list |
2266 | * lock and is atomic with respect to other list locking functions. |
2267 | */ |
2268 | void skb_queue_purge(struct sk_buff_head *list) |
2269 | { |
2270 | struct sk_buff *skb; |
2271 | while ((skb = skb_dequeue(list)) != NULL) |
2272 | kfree_skb(skb); |
2273 | } |
2274 | EXPORT_SYMBOL(skb_queue_purge); |
2275 | |
2276 | /** |
2277 | * skb_queue_head - queue a buffer at the list head |
2278 | * @list: list to use |
2279 | * @newsk: buffer to queue |
2280 | * |
2281 | * Queue a buffer at the start of the list. This function takes the |
2282 | * list lock and can be used safely with other locking &sk_buff functions |
2283 | * safely. |
2284 | * |
2285 | * A buffer cannot be placed on two lists at the same time. |
2286 | */ |
2287 | void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) |
2288 | { |
2289 | unsigned long flags; |
2290 | |
2291 | spin_lock_irqsave(&list->lock, flags); |
2292 | __skb_queue_head(list, newsk); |
2293 | spin_unlock_irqrestore(&list->lock, flags); |
2294 | } |
2295 | EXPORT_SYMBOL(skb_queue_head); |
2296 | |
2297 | /** |
2298 | * skb_queue_tail - queue a buffer at the list tail |
2299 | * @list: list to use |
2300 | * @newsk: buffer to queue |
2301 | * |
2302 | * Queue a buffer at the tail of the list. This function takes the |
2303 | * list lock and can be used safely with other locking &sk_buff functions |
2304 | * safely. |
2305 | * |
2306 | * A buffer cannot be placed on two lists at the same time. |
2307 | */ |
2308 | void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) |
2309 | { |
2310 | unsigned long flags; |
2311 | |
2312 | spin_lock_irqsave(&list->lock, flags); |
2313 | __skb_queue_tail(list, newsk); |
2314 | spin_unlock_irqrestore(&list->lock, flags); |
2315 | } |
2316 | EXPORT_SYMBOL(skb_queue_tail); |
2317 | |
2318 | /** |
2319 | * skb_unlink - remove a buffer from a list |
2320 | * @skb: buffer to remove |
2321 | * @list: list to use |
2322 | * |
2323 | * Remove a packet from a list. The list locks are taken and this |
2324 | * function is atomic with respect to other list locked calls |
2325 | * |
2326 | * You must know what list the SKB is on. |
2327 | */ |
2328 | void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) |
2329 | { |
2330 | unsigned long flags; |
2331 | |
2332 | spin_lock_irqsave(&list->lock, flags); |
2333 | __skb_unlink(skb, list); |
2334 | spin_unlock_irqrestore(&list->lock, flags); |
2335 | } |
2336 | EXPORT_SYMBOL(skb_unlink); |
2337 | |
2338 | /** |
2339 | * skb_append - append a buffer |
2340 | * @old: buffer to insert after |
2341 | * @newsk: buffer to insert |
2342 | * @list: list to use |
2343 | * |
2344 | * Place a packet after a given packet in a list. The list locks are taken |
2345 | * and this function is atomic with respect to other list locked calls. |
2346 | * A buffer cannot be placed on two lists at the same time. |
2347 | */ |
2348 | void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) |
2349 | { |
2350 | unsigned long flags; |
2351 | |
2352 | spin_lock_irqsave(&list->lock, flags); |
2353 | __skb_queue_after(list, old, newsk); |
2354 | spin_unlock_irqrestore(&list->lock, flags); |
2355 | } |
2356 | EXPORT_SYMBOL(skb_append); |
2357 | |
2358 | /** |
2359 | * skb_insert - insert a buffer |
2360 | * @old: buffer to insert before |
2361 | * @newsk: buffer to insert |
2362 | * @list: list to use |
2363 | * |
2364 | * Place a packet before a given packet in a list. The list locks are |
2365 | * taken and this function is atomic with respect to other list locked |
2366 | * calls. |
2367 | * |
2368 | * A buffer cannot be placed on two lists at the same time. |
2369 | */ |
2370 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) |
2371 | { |
2372 | unsigned long flags; |
2373 | |
2374 | spin_lock_irqsave(&list->lock, flags); |
2375 | __skb_insert(newsk, old->prev, old, list); |
2376 | spin_unlock_irqrestore(&list->lock, flags); |
2377 | } |
2378 | EXPORT_SYMBOL(skb_insert); |
2379 | |
2380 | static inline void skb_split_inside_header(struct sk_buff *skb, |
2381 | struct sk_buff* skb1, |
2382 | const u32 len, const int pos) |
2383 | { |
2384 | int i; |
2385 | |
2386 | skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), |
2387 | pos - len); |
2388 | /* And move data appendix as is. */ |
2389 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) |
2390 | skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; |
2391 | |
2392 | skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; |
2393 | skb_shinfo(skb)->nr_frags = 0; |
2394 | skb1->data_len = skb->data_len; |
2395 | skb1->len += skb1->data_len; |
2396 | skb->data_len = 0; |
2397 | skb->len = len; |
2398 | skb_set_tail_pointer(skb, len); |
2399 | } |
2400 | |
2401 | static inline void skb_split_no_header(struct sk_buff *skb, |
2402 | struct sk_buff* skb1, |
2403 | const u32 len, int pos) |
2404 | { |
2405 | int i, k = 0; |
2406 | const int nfrags = skb_shinfo(skb)->nr_frags; |
2407 | |
2408 | skb_shinfo(skb)->nr_frags = 0; |
2409 | skb1->len = skb1->data_len = skb->len - len; |
2410 | skb->len = len; |
2411 | skb->data_len = len - pos; |
2412 | |
2413 | for (i = 0; i < nfrags; i++) { |
2414 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); |
2415 | |
2416 | if (pos + size > len) { |
2417 | skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; |
2418 | |
2419 | if (pos < len) { |
2420 | /* Split frag. |
2421 | * We have two variants in this case: |
2422 | * 1. Move all the frag to the second |
2423 | * part, if it is possible. F.e. |
2424 | * this approach is mandatory for TUX, |
2425 | * where splitting is expensive. |
2426 | * 2. Split is accurately. We make this. |
2427 | */ |
2428 | skb_frag_ref(skb, i); |
2429 | skb_shinfo(skb1)->frags[0].page_offset += len - pos; |
2430 | skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); |
2431 | skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); |
2432 | skb_shinfo(skb)->nr_frags++; |
2433 | } |
2434 | k++; |
2435 | } else |
2436 | skb_shinfo(skb)->nr_frags++; |
2437 | pos += size; |
2438 | } |
2439 | skb_shinfo(skb1)->nr_frags = k; |
2440 | } |
2441 | |
2442 | /** |
2443 | * skb_split - Split fragmented skb to two parts at length len. |
2444 | * @skb: the buffer to split |
2445 | * @skb1: the buffer to receive the second part |
2446 | * @len: new length for skb |
2447 | */ |
2448 | void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) |
2449 | { |
2450 | int pos = skb_headlen(skb); |
2451 | |
2452 | skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; |
2453 | if (len < pos) /* Split line is inside header. */ |
2454 | skb_split_inside_header(skb, skb1, len, pos); |
2455 | else /* Second chunk has no header, nothing to copy. */ |
2456 | skb_split_no_header(skb, skb1, len, pos); |
2457 | } |
2458 | EXPORT_SYMBOL(skb_split); |
2459 | |
2460 | /* Shifting from/to a cloned skb is a no-go. |
2461 | * |
2462 | * Caller cannot keep skb_shinfo related pointers past calling here! |
2463 | */ |
2464 | static int skb_prepare_for_shift(struct sk_buff *skb) |
2465 | { |
2466 | return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
2467 | } |
2468 | |
2469 | /** |
2470 | * skb_shift - Shifts paged data partially from skb to another |
2471 | * @tgt: buffer into which tail data gets added |
2472 | * @skb: buffer from which the paged data comes from |
2473 | * @shiftlen: shift up to this many bytes |
2474 | * |
2475 | * Attempts to shift up to shiftlen worth of bytes, which may be less than |
2476 | * the length of the skb, from skb to tgt. Returns number bytes shifted. |
2477 | * It's up to caller to free skb if everything was shifted. |
2478 | * |
2479 | * If @tgt runs out of frags, the whole operation is aborted. |
2480 | * |
2481 | * Skb cannot include anything else but paged data while tgt is allowed |
2482 | * to have non-paged data as well. |
2483 | * |
2484 | * TODO: full sized shift could be optimized but that would need |
2485 | * specialized skb free'er to handle frags without up-to-date nr_frags. |
2486 | */ |
2487 | int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) |
2488 | { |
2489 | int from, to, merge, todo; |
2490 | struct skb_frag_struct *fragfrom, *fragto; |
2491 | |
2492 | BUG_ON(shiftlen > skb->len); |
2493 | BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ |
2494 | |
2495 | todo = shiftlen; |
2496 | from = 0; |
2497 | to = skb_shinfo(tgt)->nr_frags; |
2498 | fragfrom = &skb_shinfo(skb)->frags[from]; |
2499 | |
2500 | /* Actual merge is delayed until the point when we know we can |
2501 | * commit all, so that we don't have to undo partial changes |
2502 | */ |
2503 | if (!to || |
2504 | !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), |
2505 | fragfrom->page_offset)) { |
2506 | merge = -1; |
2507 | } else { |
2508 | merge = to - 1; |
2509 | |
2510 | todo -= skb_frag_size(fragfrom); |
2511 | if (todo < 0) { |
2512 | if (skb_prepare_for_shift(skb) || |
2513 | skb_prepare_for_shift(tgt)) |
2514 | return 0; |
2515 | |
2516 | /* All previous frag pointers might be stale! */ |
2517 | fragfrom = &skb_shinfo(skb)->frags[from]; |
2518 | fragto = &skb_shinfo(tgt)->frags[merge]; |
2519 | |
2520 | skb_frag_size_add(fragto, shiftlen); |
2521 | skb_frag_size_sub(fragfrom, shiftlen); |
2522 | fragfrom->page_offset += shiftlen; |
2523 | |
2524 | goto onlymerged; |
2525 | } |
2526 | |
2527 | from++; |
2528 | } |
2529 | |
2530 | /* Skip full, not-fitting skb to avoid expensive operations */ |
2531 | if ((shiftlen == skb->len) && |
2532 | (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) |
2533 | return 0; |
2534 | |
2535 | if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) |
2536 | return 0; |
2537 | |
2538 | while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { |
2539 | if (to == MAX_SKB_FRAGS) |
2540 | return 0; |
2541 | |
2542 | fragfrom = &skb_shinfo(skb)->frags[from]; |
2543 | fragto = &skb_shinfo(tgt)->frags[to]; |
2544 | |
2545 | if (todo >= skb_frag_size(fragfrom)) { |
2546 | *fragto = *fragfrom; |
2547 | todo -= skb_frag_size(fragfrom); |
2548 | from++; |
2549 | to++; |
2550 | |
2551 | } else { |
2552 | __skb_frag_ref(fragfrom); |
2553 | fragto->page = fragfrom->page; |
2554 | fragto->page_offset = fragfrom->page_offset; |
2555 | skb_frag_size_set(fragto, todo); |
2556 | |
2557 | fragfrom->page_offset += todo; |
2558 | skb_frag_size_sub(fragfrom, todo); |
2559 | todo = 0; |
2560 | |
2561 | to++; |
2562 | break; |
2563 | } |
2564 | } |
2565 | |
2566 | /* Ready to "commit" this state change to tgt */ |
2567 | skb_shinfo(tgt)->nr_frags = to; |
2568 | |
2569 | if (merge >= 0) { |
2570 | fragfrom = &skb_shinfo(skb)->frags[0]; |
2571 | fragto = &skb_shinfo(tgt)->frags[merge]; |
2572 | |
2573 | skb_frag_size_add(fragto, skb_frag_size(fragfrom)); |
2574 | __skb_frag_unref(fragfrom); |
2575 | } |
2576 | |
2577 | /* Reposition in the original skb */ |
2578 | to = 0; |
2579 | while (from < skb_shinfo(skb)->nr_frags) |
2580 | skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; |
2581 | skb_shinfo(skb)->nr_frags = to; |
2582 | |
2583 | BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); |
2584 | |
2585 | onlymerged: |
2586 | /* Most likely the tgt won't ever need its checksum anymore, skb on |
2587 | * the other hand might need it if it needs to be resent |
2588 | */ |
2589 | tgt->ip_summed = CHECKSUM_PARTIAL; |
2590 | skb->ip_summed = CHECKSUM_PARTIAL; |
2591 | |
2592 | /* Yak, is it really working this way? Some helper please? */ |
2593 | skb->len -= shiftlen; |
2594 | skb->data_len -= shiftlen; |
2595 | skb->truesize -= shiftlen; |
2596 | tgt->len += shiftlen; |
2597 | tgt->data_len += shiftlen; |
2598 | tgt->truesize += shiftlen; |
2599 | |
2600 | return shiftlen; |
2601 | } |
2602 | |
2603 | /** |
2604 | * skb_prepare_seq_read - Prepare a sequential read of skb data |
2605 | * @skb: the buffer to read |
2606 | * @from: lower offset of data to be read |
2607 | * @to: upper offset of data to be read |
2608 | * @st: state variable |
2609 | * |
2610 | * Initializes the specified state variable. Must be called before |
2611 | * invoking skb_seq_read() for the first time. |
2612 | */ |
2613 | void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, |
2614 | unsigned int to, struct skb_seq_state *st) |
2615 | { |
2616 | st->lower_offset = from; |
2617 | st->upper_offset = to; |
2618 | st->root_skb = st->cur_skb = skb; |
2619 | st->frag_idx = st->stepped_offset = 0; |
2620 | st->frag_data = NULL; |
2621 | } |
2622 | EXPORT_SYMBOL(skb_prepare_seq_read); |
2623 | |
2624 | /** |
2625 | * skb_seq_read - Sequentially read skb data |
2626 | * @consumed: number of bytes consumed by the caller so far |
2627 | * @data: destination pointer for data to be returned |
2628 | * @st: state variable |
2629 | * |
2630 | * Reads a block of skb data at @consumed relative to the |
2631 | * lower offset specified to skb_prepare_seq_read(). Assigns |
2632 | * the head of the data block to @data and returns the length |
2633 | * of the block or 0 if the end of the skb data or the upper |
2634 | * offset has been reached. |
2635 | * |
2636 | * The caller is not required to consume all of the data |
2637 | * returned, i.e. @consumed is typically set to the number |
2638 | * of bytes already consumed and the next call to |
2639 | * skb_seq_read() will return the remaining part of the block. |
2640 | * |
2641 | * Note 1: The size of each block of data returned can be arbitrary, |
2642 | * this limitation is the cost for zerocopy seqeuental |
2643 | * reads of potentially non linear data. |
2644 | * |
2645 | * Note 2: Fragment lists within fragments are not implemented |
2646 | * at the moment, state->root_skb could be replaced with |
2647 | * a stack for this purpose. |
2648 | */ |
2649 | unsigned int skb_seq_read(unsigned int consumed, const u8 **data, |
2650 | struct skb_seq_state *st) |
2651 | { |
2652 | unsigned int block_limit, abs_offset = consumed + st->lower_offset; |
2653 | skb_frag_t *frag; |
2654 | |
2655 | if (unlikely(abs_offset >= st->upper_offset)) { |
2656 | if (st->frag_data) { |
2657 | kunmap_atomic(st->frag_data); |
2658 | st->frag_data = NULL; |
2659 | } |
2660 | return 0; |
2661 | } |
2662 | |
2663 | next_skb: |
2664 | block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; |
2665 | |
2666 | if (abs_offset < block_limit && !st->frag_data) { |
2667 | *data = st->cur_skb->data + (abs_offset - st->stepped_offset); |
2668 | return block_limit - abs_offset; |
2669 | } |
2670 | |
2671 | if (st->frag_idx == 0 && !st->frag_data) |
2672 | st->stepped_offset += skb_headlen(st->cur_skb); |
2673 | |
2674 | while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { |
2675 | frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; |
2676 | block_limit = skb_frag_size(frag) + st->stepped_offset; |
2677 | |
2678 | if (abs_offset < block_limit) { |
2679 | if (!st->frag_data) |
2680 | st->frag_data = kmap_atomic(skb_frag_page(frag)); |
2681 | |
2682 | *data = (u8 *) st->frag_data + frag->page_offset + |
2683 | (abs_offset - st->stepped_offset); |
2684 | |
2685 | return block_limit - abs_offset; |
2686 | } |
2687 | |
2688 | if (st->frag_data) { |
2689 | kunmap_atomic(st->frag_data); |
2690 | st->frag_data = NULL; |
2691 | } |
2692 | |
2693 | st->frag_idx++; |
2694 | st->stepped_offset += skb_frag_size(frag); |
2695 | } |
2696 | |
2697 | if (st->frag_data) { |
2698 | kunmap_atomic(st->frag_data); |
2699 | st->frag_data = NULL; |
2700 | } |
2701 | |
2702 | if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { |
2703 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; |
2704 | st->frag_idx = 0; |
2705 | goto next_skb; |
2706 | } else if (st->cur_skb->next) { |
2707 | st->cur_skb = st->cur_skb->next; |
2708 | st->frag_idx = 0; |
2709 | goto next_skb; |
2710 | } |
2711 | |
2712 | return 0; |
2713 | } |
2714 | EXPORT_SYMBOL(skb_seq_read); |
2715 | |
2716 | /** |
2717 | * skb_abort_seq_read - Abort a sequential read of skb data |
2718 | * @st: state variable |
2719 | * |
2720 | * Must be called if skb_seq_read() was not called until it |
2721 | * returned 0. |
2722 | */ |
2723 | void skb_abort_seq_read(struct skb_seq_state *st) |
2724 | { |
2725 | if (st->frag_data) |
2726 | kunmap_atomic(st->frag_data); |
2727 | } |
2728 | EXPORT_SYMBOL(skb_abort_seq_read); |
2729 | |
2730 | #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) |
2731 | |
2732 | static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, |
2733 | struct ts_config *conf, |
2734 | struct ts_state *state) |
2735 | { |
2736 | return skb_seq_read(offset, text, TS_SKB_CB(state)); |
2737 | } |
2738 | |
2739 | static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) |
2740 | { |
2741 | skb_abort_seq_read(TS_SKB_CB(state)); |
2742 | } |
2743 | |
2744 | /** |
2745 | * skb_find_text - Find a text pattern in skb data |
2746 | * @skb: the buffer to look in |
2747 | * @from: search offset |
2748 | * @to: search limit |
2749 | * @config: textsearch configuration |
2750 | * @state: uninitialized textsearch state variable |
2751 | * |
2752 | * Finds a pattern in the skb data according to the specified |
2753 | * textsearch configuration. Use textsearch_next() to retrieve |
2754 | * subsequent occurrences of the pattern. Returns the offset |
2755 | * to the first occurrence or UINT_MAX if no match was found. |
2756 | */ |
2757 | unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, |
2758 | unsigned int to, struct ts_config *config, |
2759 | struct ts_state *state) |
2760 | { |
2761 | unsigned int ret; |
2762 | |
2763 | config->get_next_block = skb_ts_get_next_block; |
2764 | config->finish = skb_ts_finish; |
2765 | |
2766 | skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); |
2767 | |
2768 | ret = textsearch_find(config, state); |
2769 | return (ret <= to - from ? ret : UINT_MAX); |
2770 | } |
2771 | EXPORT_SYMBOL(skb_find_text); |
2772 | |
2773 | /** |
2774 | * skb_append_datato_frags - append the user data to a skb |
2775 | * @sk: sock structure |
2776 | * @skb: skb structure to be appened with user data. |
2777 | * @getfrag: call back function to be used for getting the user data |
2778 | * @from: pointer to user message iov |
2779 | * @length: length of the iov message |
2780 | * |
2781 | * Description: This procedure append the user data in the fragment part |
2782 | * of the skb if any page alloc fails user this procedure returns -ENOMEM |
2783 | */ |
2784 | int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, |
2785 | int (*getfrag)(void *from, char *to, int offset, |
2786 | int len, int odd, struct sk_buff *skb), |
2787 | void *from, int length) |
2788 | { |
2789 | int frg_cnt = skb_shinfo(skb)->nr_frags; |
2790 | int copy; |
2791 | int offset = 0; |
2792 | int ret; |
2793 | struct page_frag *pfrag = ¤t->task_frag; |
2794 | |
2795 | do { |
2796 | /* Return error if we don't have space for new frag */ |
2797 | if (frg_cnt >= MAX_SKB_FRAGS) |
2798 | return -EMSGSIZE; |
2799 | |
2800 | if (!sk_page_frag_refill(sk, pfrag)) |
2801 | return -ENOMEM; |
2802 | |
2803 | /* copy the user data to page */ |
2804 | copy = min_t(int, length, pfrag->size - pfrag->offset); |
2805 | |
2806 | ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, |
2807 | offset, copy, 0, skb); |
2808 | if (ret < 0) |
2809 | return -EFAULT; |
2810 | |
2811 | /* copy was successful so update the size parameters */ |
2812 | skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, |
2813 | copy); |
2814 | frg_cnt++; |
2815 | pfrag->offset += copy; |
2816 | get_page(pfrag->page); |
2817 | |
2818 | skb->truesize += copy; |
2819 | atomic_add(copy, &sk->sk_wmem_alloc); |
2820 | skb->len += copy; |
2821 | skb->data_len += copy; |
2822 | offset += copy; |
2823 | length -= copy; |
2824 | |
2825 | } while (length > 0); |
2826 | |
2827 | return 0; |
2828 | } |
2829 | EXPORT_SYMBOL(skb_append_datato_frags); |
2830 | |
2831 | /** |
2832 | * skb_pull_rcsum - pull skb and update receive checksum |
2833 | * @skb: buffer to update |
2834 | * @len: length of data pulled |
2835 | * |
2836 | * This function performs an skb_pull on the packet and updates |
2837 | * the CHECKSUM_COMPLETE checksum. It should be used on |
2838 | * receive path processing instead of skb_pull unless you know |
2839 | * that the checksum difference is zero (e.g., a valid IP header) |
2840 | * or you are setting ip_summed to CHECKSUM_NONE. |
2841 | */ |
2842 | unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) |
2843 | { |
2844 | BUG_ON(len > skb->len); |
2845 | skb->len -= len; |
2846 | BUG_ON(skb->len < skb->data_len); |
2847 | skb_postpull_rcsum(skb, skb->data, len); |
2848 | return skb->data += len; |
2849 | } |
2850 | EXPORT_SYMBOL_GPL(skb_pull_rcsum); |
2851 | |
2852 | /** |
2853 | * skb_segment - Perform protocol segmentation on skb. |
2854 | * @head_skb: buffer to segment |
2855 | * @features: features for the output path (see dev->features) |
2856 | * |
2857 | * This function performs segmentation on the given skb. It returns |
2858 | * a pointer to the first in a list of new skbs for the segments. |
2859 | * In case of error it returns ERR_PTR(err). |
2860 | */ |
2861 | struct sk_buff *skb_segment(struct sk_buff *head_skb, |
2862 | netdev_features_t features) |
2863 | { |
2864 | struct sk_buff *segs = NULL; |
2865 | struct sk_buff *tail = NULL; |
2866 | struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; |
2867 | skb_frag_t *frag = skb_shinfo(head_skb)->frags; |
2868 | unsigned int mss = skb_shinfo(head_skb)->gso_size; |
2869 | unsigned int doffset = head_skb->data - skb_mac_header(head_skb); |
2870 | struct sk_buff *frag_skb = head_skb; |
2871 | unsigned int offset = doffset; |
2872 | unsigned int tnl_hlen = skb_tnl_header_len(head_skb); |
2873 | unsigned int headroom; |
2874 | unsigned int len; |
2875 | __be16 proto; |
2876 | bool csum; |
2877 | int sg = !!(features & NETIF_F_SG); |
2878 | int nfrags = skb_shinfo(head_skb)->nr_frags; |
2879 | int err = -ENOMEM; |
2880 | int i = 0; |
2881 | int pos; |
2882 | int dummy; |
2883 | |
2884 | proto = skb_network_protocol(head_skb, &dummy); |
2885 | if (unlikely(!proto)) |
2886 | return ERR_PTR(-EINVAL); |
2887 | |
2888 | csum = !!can_checksum_protocol(features, proto); |
2889 | __skb_push(head_skb, doffset); |
2890 | headroom = skb_headroom(head_skb); |
2891 | pos = skb_headlen(head_skb); |
2892 | |
2893 | do { |
2894 | struct sk_buff *nskb; |
2895 | skb_frag_t *nskb_frag; |
2896 | int hsize; |
2897 | int size; |
2898 | |
2899 | len = head_skb->len - offset; |
2900 | if (len > mss) |
2901 | len = mss; |
2902 | |
2903 | hsize = skb_headlen(head_skb) - offset; |
2904 | if (hsize < 0) |
2905 | hsize = 0; |
2906 | if (hsize > len || !sg) |
2907 | hsize = len; |
2908 | |
2909 | if (!hsize && i >= nfrags && skb_headlen(list_skb) && |
2910 | (skb_headlen(list_skb) == len || sg)) { |
2911 | BUG_ON(skb_headlen(list_skb) > len); |
2912 | |
2913 | i = 0; |
2914 | nfrags = skb_shinfo(list_skb)->nr_frags; |
2915 | frag = skb_shinfo(list_skb)->frags; |
2916 | frag_skb = list_skb; |
2917 | pos += skb_headlen(list_skb); |
2918 | |
2919 | while (pos < offset + len) { |
2920 | BUG_ON(i >= nfrags); |
2921 | |
2922 | size = skb_frag_size(frag); |
2923 | if (pos + size > offset + len) |
2924 | break; |
2925 | |
2926 | i++; |
2927 | pos += size; |
2928 | frag++; |
2929 | } |
2930 | |
2931 | nskb = skb_clone(list_skb, GFP_ATOMIC); |
2932 | list_skb = list_skb->next; |
2933 | |
2934 | if (unlikely(!nskb)) |
2935 | goto err; |
2936 | |
2937 | if (unlikely(pskb_trim(nskb, len))) { |
2938 | kfree_skb(nskb); |
2939 | goto err; |
2940 | } |
2941 | |
2942 | hsize = skb_end_offset(nskb); |
2943 | if (skb_cow_head(nskb, doffset + headroom)) { |
2944 | kfree_skb(nskb); |
2945 | goto err; |
2946 | } |
2947 | |
2948 | nskb->truesize += skb_end_offset(nskb) - hsize; |
2949 | skb_release_head_state(nskb); |
2950 | __skb_push(nskb, doffset); |
2951 | } else { |
2952 | nskb = __alloc_skb(hsize + doffset + headroom, |
2953 | GFP_ATOMIC, skb_alloc_rx_flag(head_skb), |
2954 | NUMA_NO_NODE); |
2955 | |
2956 | if (unlikely(!nskb)) |
2957 | goto err; |
2958 | |
2959 | skb_reserve(nskb, headroom); |
2960 | __skb_put(nskb, doffset); |
2961 | } |
2962 | |
2963 | if (segs) |
2964 | tail->next = nskb; |
2965 | else |
2966 | segs = nskb; |
2967 | tail = nskb; |
2968 | |
2969 | __copy_skb_header(nskb, head_skb); |
2970 | nskb->mac_len = head_skb->mac_len; |
2971 | |
2972 | skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); |
2973 | |
2974 | skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, |
2975 | nskb->data - tnl_hlen, |
2976 | doffset + tnl_hlen); |
2977 | |
2978 | if (nskb->len == len + doffset) |
2979 | goto perform_csum_check; |
2980 | |
2981 | if (!sg) { |
2982 | nskb->ip_summed = CHECKSUM_NONE; |
2983 | nskb->csum = skb_copy_and_csum_bits(head_skb, offset, |
2984 | skb_put(nskb, len), |
2985 | len, 0); |
2986 | continue; |
2987 | } |
2988 | |
2989 | nskb_frag = skb_shinfo(nskb)->frags; |
2990 | |
2991 | skb_copy_from_linear_data_offset(head_skb, offset, |
2992 | skb_put(nskb, hsize), hsize); |
2993 | |
2994 | skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & |
2995 | SKBTX_SHARED_FRAG; |
2996 | |
2997 | while (pos < offset + len) { |
2998 | if (i >= nfrags) { |
2999 | BUG_ON(skb_headlen(list_skb)); |
3000 | |
3001 | i = 0; |
3002 | nfrags = skb_shinfo(list_skb)->nr_frags; |
3003 | frag = skb_shinfo(list_skb)->frags; |
3004 | frag_skb = list_skb; |
3005 | |
3006 | BUG_ON(!nfrags); |
3007 | |
3008 | list_skb = list_skb->next; |
3009 | } |
3010 | |
3011 | if (unlikely(skb_shinfo(nskb)->nr_frags >= |
3012 | MAX_SKB_FRAGS)) { |
3013 | net_warn_ratelimited( |
3014 | "skb_segment: too many frags: %u %u\n", |
3015 | pos, mss); |
3016 | goto err; |
3017 | } |
3018 | |
3019 | if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) |
3020 | goto err; |
3021 | |
3022 | *nskb_frag = *frag; |
3023 | __skb_frag_ref(nskb_frag); |
3024 | size = skb_frag_size(nskb_frag); |
3025 | |
3026 | if (pos < offset) { |
3027 | nskb_frag->page_offset += offset - pos; |
3028 | skb_frag_size_sub(nskb_frag, offset - pos); |
3029 | } |
3030 | |
3031 | skb_shinfo(nskb)->nr_frags++; |
3032 | |
3033 | if (pos + size <= offset + len) { |
3034 | i++; |
3035 | frag++; |
3036 | pos += size; |
3037 | } else { |
3038 | skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); |
3039 | goto skip_fraglist; |
3040 | } |
3041 | |
3042 | nskb_frag++; |
3043 | } |
3044 | |
3045 | skip_fraglist: |
3046 | nskb->data_len = len - hsize; |
3047 | nskb->len += nskb->data_len; |
3048 | nskb->truesize += nskb->data_len; |
3049 | |
3050 | perform_csum_check: |
3051 | if (!csum) { |
3052 | nskb->csum = skb_checksum(nskb, doffset, |
3053 | nskb->len - doffset, 0); |
3054 | nskb->ip_summed = CHECKSUM_NONE; |
3055 | } |
3056 | } while ((offset += len) < head_skb->len); |
3057 | |
3058 | return segs; |
3059 | |
3060 | err: |
3061 | kfree_skb_list(segs); |
3062 | return ERR_PTR(err); |
3063 | } |
3064 | EXPORT_SYMBOL_GPL(skb_segment); |
3065 | |
3066 | int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) |
3067 | { |
3068 | struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); |
3069 | unsigned int offset = skb_gro_offset(skb); |
3070 | unsigned int headlen = skb_headlen(skb); |
3071 | struct sk_buff *nskb, *lp, *p = *head; |
3072 | unsigned int len = skb_gro_len(skb); |
3073 | unsigned int delta_truesize; |
3074 | unsigned int headroom; |
3075 | |
3076 | if (unlikely(p->len + len >= 65536)) |
3077 | return -E2BIG; |
3078 | |
3079 | lp = NAPI_GRO_CB(p)->last; |
3080 | pinfo = skb_shinfo(lp); |
3081 | |
3082 | if (headlen <= offset) { |
3083 | skb_frag_t *frag; |
3084 | skb_frag_t *frag2; |
3085 | int i = skbinfo->nr_frags; |
3086 | int nr_frags = pinfo->nr_frags + i; |
3087 | |
3088 | if (nr_frags > MAX_SKB_FRAGS) |
3089 | goto merge; |
3090 | |
3091 | offset -= headlen; |
3092 | pinfo->nr_frags = nr_frags; |
3093 | skbinfo->nr_frags = 0; |
3094 | |
3095 | frag = pinfo->frags + nr_frags; |
3096 | frag2 = skbinfo->frags + i; |
3097 | do { |
3098 | *--frag = *--frag2; |
3099 | } while (--i); |
3100 | |
3101 | frag->page_offset += offset; |
3102 | skb_frag_size_sub(frag, offset); |
3103 | |
3104 | /* all fragments truesize : remove (head size + sk_buff) */ |
3105 | delta_truesize = skb->truesize - |
3106 | SKB_TRUESIZE(skb_end_offset(skb)); |
3107 | |
3108 | skb->truesize -= skb->data_len; |
3109 | skb->len -= skb->data_len; |
3110 | skb->data_len = 0; |
3111 | |
3112 | NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; |
3113 | goto done; |
3114 | } else if (skb->head_frag) { |
3115 | int nr_frags = pinfo->nr_frags; |
3116 | skb_frag_t *frag = pinfo->frags + nr_frags; |
3117 | struct page *page = virt_to_head_page(skb->head); |
3118 | unsigned int first_size = headlen - offset; |
3119 | unsigned int first_offset; |
3120 | |
3121 | if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) |
3122 | goto merge; |
3123 | |
3124 | first_offset = skb->data - |
3125 | (unsigned char *)page_address(page) + |
3126 | offset; |
3127 | |
3128 | pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; |
3129 | |
3130 | frag->page.p = page; |
3131 | frag->page_offset = first_offset; |
3132 | skb_frag_size_set(frag, first_size); |
3133 | |
3134 | memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); |
3135 | /* We dont need to clear skbinfo->nr_frags here */ |
3136 | |
3137 | delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); |
3138 | NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; |
3139 | goto done; |
3140 | } |
3141 | if (pinfo->frag_list) |
3142 | goto merge; |
3143 | if (skb_gro_len(p) != pinfo->gso_size) |
3144 | return -E2BIG; |
3145 | |
3146 | headroom = skb_headroom(p); |
3147 | nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); |
3148 | if (unlikely(!nskb)) |
3149 | return -ENOMEM; |
3150 | |
3151 | __copy_skb_header(nskb, p); |
3152 | nskb->mac_len = p->mac_len; |
3153 | |
3154 | skb_reserve(nskb, headroom); |
3155 | __skb_put(nskb, skb_gro_offset(p)); |
3156 | |
3157 | skb_set_mac_header(nskb, skb_mac_header(p) - p->data); |
3158 | skb_set_network_header(nskb, skb_network_offset(p)); |
3159 | skb_set_transport_header(nskb, skb_transport_offset(p)); |
3160 | |
3161 | __skb_pull(p, skb_gro_offset(p)); |
3162 | memcpy(skb_mac_header(nskb), skb_mac_header(p), |
3163 | p->data - skb_mac_header(p)); |
3164 | |
3165 | skb_shinfo(nskb)->frag_list = p; |
3166 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; |
3167 | pinfo->gso_size = 0; |
3168 | skb_header_release(p); |
3169 | NAPI_GRO_CB(nskb)->last = p; |
3170 | |
3171 | nskb->data_len += p->len; |
3172 | nskb->truesize += p->truesize; |
3173 | nskb->len += p->len; |
3174 | |
3175 | *head = nskb; |
3176 | nskb->next = p->next; |
3177 | p->next = NULL; |
3178 | |
3179 | p = nskb; |
3180 | |
3181 | merge: |
3182 | delta_truesize = skb->truesize; |
3183 | if (offset > headlen) { |
3184 | unsigned int eat = offset - headlen; |
3185 | |
3186 | skbinfo->frags[0].page_offset += eat; |
3187 | skb_frag_size_sub(&skbinfo->frags[0], eat); |
3188 | skb->data_len -= eat; |
3189 | skb->len -= eat; |
3190 | offset = headlen; |
3191 | } |
3192 | |
3193 | __skb_pull(skb, offset); |
3194 | |
3195 | if (NAPI_GRO_CB(p)->last == p) |
3196 | skb_shinfo(p)->frag_list = skb; |
3197 | else |
3198 | NAPI_GRO_CB(p)->last->next = skb; |
3199 | NAPI_GRO_CB(p)->last = skb; |
3200 | skb_header_release(skb); |
3201 | lp = p; |
3202 | |
3203 | done: |
3204 | NAPI_GRO_CB(p)->count++; |
3205 | p->data_len += len; |
3206 | p->truesize += delta_truesize; |
3207 | p->len += len; |
3208 | if (lp != p) { |
3209 | lp->data_len += len; |
3210 | lp->truesize += delta_truesize; |
3211 | lp->len += len; |
3212 | } |
3213 | NAPI_GRO_CB(skb)->same_flow = 1; |
3214 | return 0; |
3215 | } |
3216 | EXPORT_SYMBOL_GPL(skb_gro_receive); |
3217 | |
3218 | void __init skb_init(void) |
3219 | { |
3220 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", |
3221 | sizeof(struct sk_buff), |
3222 | 0, |
3223 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
3224 | NULL); |
3225 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", |
3226 | (2*sizeof(struct sk_buff)) + |
3227 | sizeof(atomic_t), |
3228 | 0, |
3229 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
3230 | NULL); |
3231 | } |
3232 | |
3233 | /** |
3234 | * skb_to_sgvec - Fill a scatter-gather list from a socket buffer |
3235 | * @skb: Socket buffer containing the buffers to be mapped |
3236 | * @sg: The scatter-gather list to map into |
3237 | * @offset: The offset into the buffer's contents to start mapping |
3238 | * @len: Length of buffer space to be mapped |
3239 | * |
3240 | * Fill the specified scatter-gather list with mappings/pointers into a |
3241 | * region of the buffer space attached to a socket buffer. |
3242 | */ |
3243 | static int |
3244 | __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) |
3245 | { |
3246 | int start = skb_headlen(skb); |
3247 | int i, copy = start - offset; |
3248 | struct sk_buff *frag_iter; |
3249 | int elt = 0; |
3250 | |
3251 | if (copy > 0) { |
3252 | if (copy > len) |
3253 | copy = len; |
3254 | sg_set_buf(sg, skb->data + offset, copy); |
3255 | elt++; |
3256 | if ((len -= copy) == 0) |
3257 | return elt; |
3258 | offset += copy; |
3259 | } |
3260 | |
3261 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
3262 | int end; |
3263 | |
3264 | WARN_ON(start > offset + len); |
3265 | |
3266 | end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); |
3267 | if ((copy = end - offset) > 0) { |
3268 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
3269 | |
3270 | if (copy > len) |
3271 | copy = len; |
3272 | sg_set_page(&sg[elt], skb_frag_page(frag), copy, |
3273 | frag->page_offset+offset-start); |
3274 | elt++; |
3275 | if (!(len -= copy)) |
3276 | return elt; |
3277 | offset += copy; |
3278 | } |
3279 | start = end; |
3280 | } |
3281 | |
3282 | skb_walk_frags(skb, frag_iter) { |
3283 | int end; |
3284 | |
3285 | WARN_ON(start > offset + len); |
3286 | |
3287 | end = start + frag_iter->len; |
3288 | if ((copy = end - offset) > 0) { |
3289 | if (copy > len) |
3290 | copy = len; |
3291 | elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, |
3292 | copy); |
3293 | if ((len -= copy) == 0) |
3294 | return elt; |
3295 | offset += copy; |
3296 | } |
3297 | start = end; |
3298 | } |
3299 | BUG_ON(len); |
3300 | return elt; |
3301 | } |
3302 | |
3303 | /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given |
3304 | * sglist without mark the sg which contain last skb data as the end. |
3305 | * So the caller can mannipulate sg list as will when padding new data after |
3306 | * the first call without calling sg_unmark_end to expend sg list. |
3307 | * |
3308 | * Scenario to use skb_to_sgvec_nomark: |
3309 | * 1. sg_init_table |
3310 | * 2. skb_to_sgvec_nomark(payload1) |
3311 | * 3. skb_to_sgvec_nomark(payload2) |
3312 | * |
3313 | * This is equivalent to: |
3314 | * 1. sg_init_table |
3315 | * 2. skb_to_sgvec(payload1) |
3316 | * 3. sg_unmark_end |
3317 | * 4. skb_to_sgvec(payload2) |
3318 | * |
3319 | * When mapping mutilple payload conditionally, skb_to_sgvec_nomark |
3320 | * is more preferable. |
3321 | */ |
3322 | int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, |
3323 | int offset, int len) |
3324 | { |
3325 | return __skb_to_sgvec(skb, sg, offset, len); |
3326 | } |
3327 | EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); |
3328 | |
3329 | int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) |
3330 | { |
3331 | int nsg = __skb_to_sgvec(skb, sg, offset, len); |
3332 | |
3333 | sg_mark_end(&sg[nsg - 1]); |
3334 | |
3335 | return nsg; |
3336 | } |
3337 | EXPORT_SYMBOL_GPL(skb_to_sgvec); |
3338 | |
3339 | /** |
3340 | * skb_cow_data - Check that a socket buffer's data buffers are writable |
3341 | * @skb: The socket buffer to check. |
3342 | * @tailbits: Amount of trailing space to be added |
3343 | * @trailer: Returned pointer to the skb where the @tailbits space begins |
3344 | * |
3345 | * Make sure that the data buffers attached to a socket buffer are |
3346 | * writable. If they are not, private copies are made of the data buffers |
3347 | * and the socket buffer is set to use these instead. |
3348 | * |
3349 | * If @tailbits is given, make sure that there is space to write @tailbits |
3350 | * bytes of data beyond current end of socket buffer. @trailer will be |
3351 | * set to point to the skb in which this space begins. |
3352 | * |
3353 | * The number of scatterlist elements required to completely map the |
3354 | * COW'd and extended socket buffer will be returned. |
3355 | */ |
3356 | int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) |
3357 | { |
3358 | int copyflag; |
3359 | int elt; |
3360 | struct sk_buff *skb1, **skb_p; |
3361 | |
3362 | /* If skb is cloned or its head is paged, reallocate |
3363 | * head pulling out all the pages (pages are considered not writable |
3364 | * at the moment even if they are anonymous). |
3365 | */ |
3366 | if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && |
3367 | __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) |
3368 | return -ENOMEM; |
3369 | |
3370 | /* Easy case. Most of packets will go this way. */ |
3371 | if (!skb_has_frag_list(skb)) { |
3372 | /* A little of trouble, not enough of space for trailer. |
3373 | * This should not happen, when stack is tuned to generate |
3374 | * good frames. OK, on miss we reallocate and reserve even more |
3375 | * space, 128 bytes is fair. */ |
3376 | |
3377 | if (skb_tailroom(skb) < tailbits && |
3378 | pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) |
3379 | return -ENOMEM; |
3380 | |
3381 | /* Voila! */ |
3382 | *trailer = skb; |
3383 | return 1; |
3384 | } |
3385 | |
3386 | /* Misery. We are in troubles, going to mincer fragments... */ |
3387 | |
3388 | elt = 1; |
3389 | skb_p = &skb_shinfo(skb)->frag_list; |
3390 | copyflag = 0; |
3391 | |
3392 | while ((skb1 = *skb_p) != NULL) { |
3393 | int ntail = 0; |
3394 | |
3395 | /* The fragment is partially pulled by someone, |
3396 | * this can happen on input. Copy it and everything |
3397 | * after it. */ |
3398 | |
3399 | if (skb_shared(skb1)) |
3400 | copyflag = 1; |
3401 | |
3402 | /* If the skb is the last, worry about trailer. */ |
3403 | |
3404 | if (skb1->next == NULL && tailbits) { |
3405 | if (skb_shinfo(skb1)->nr_frags || |
3406 | skb_has_frag_list(skb1) || |
3407 | skb_tailroom(skb1) < tailbits) |
3408 | ntail = tailbits + 128; |
3409 | } |
3410 | |
3411 | if (copyflag || |
3412 | skb_cloned(skb1) || |
3413 | ntail || |
3414 | skb_shinfo(skb1)->nr_frags || |
3415 | skb_has_frag_list(skb1)) { |
3416 | struct sk_buff *skb2; |
3417 | |
3418 | /* Fuck, we are miserable poor guys... */ |
3419 | if (ntail == 0) |
3420 | skb2 = skb_copy(skb1, GFP_ATOMIC); |
3421 | else |
3422 | skb2 = skb_copy_expand(skb1, |
3423 | skb_headroom(skb1), |
3424 | ntail, |
3425 | GFP_ATOMIC); |
3426 | if (unlikely(skb2 == NULL)) |
3427 | return -ENOMEM; |
3428 | |
3429 | if (skb1->sk) |
3430 | skb_set_owner_w(skb2, skb1->sk); |
3431 | |
3432 | /* Looking around. Are we still alive? |
3433 | * OK, link new skb, drop old one */ |
3434 | |
3435 | skb2->next = skb1->next; |
3436 | *skb_p = skb2; |
3437 | kfree_skb(skb1); |
3438 | skb1 = skb2; |
3439 | } |
3440 | elt++; |
3441 | *trailer = skb1; |
3442 | skb_p = &skb1->next; |
3443 | } |
3444 | |
3445 | return elt; |
3446 | } |
3447 | EXPORT_SYMBOL_GPL(skb_cow_data); |
3448 | |
3449 | static void sock_rmem_free(struct sk_buff *skb) |
3450 | { |
3451 | struct sock *sk = skb->sk; |
3452 | |
3453 | atomic_sub(skb->truesize, &sk->sk_rmem_alloc); |
3454 | } |
3455 | |
3456 | /* |
3457 | * Note: We dont mem charge error packets (no sk_forward_alloc changes) |
3458 | */ |
3459 | int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) |
3460 | { |
3461 | if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= |
3462 | (unsigned int)sk->sk_rcvbuf) |
3463 | return -ENOMEM; |
3464 | |
3465 | skb_orphan(skb); |
3466 | skb->sk = sk; |
3467 | skb->destructor = sock_rmem_free; |
3468 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); |
3469 | |
3470 | /* before exiting rcu section, make sure dst is refcounted */ |
3471 | skb_dst_force(skb); |
3472 | |
3473 | skb_queue_tail(&sk->sk_error_queue, skb); |
3474 | if (!sock_flag(sk, SOCK_DEAD)) |
3475 | sk->sk_data_ready(sk); |
3476 | return 0; |
3477 | } |
3478 | EXPORT_SYMBOL(sock_queue_err_skb); |
3479 | |
3480 | void skb_tstamp_tx(struct sk_buff *orig_skb, |
3481 | struct skb_shared_hwtstamps *hwtstamps) |
3482 | { |
3483 | struct sock *sk = orig_skb->sk; |
3484 | struct sock_exterr_skb *serr; |
3485 | struct sk_buff *skb; |
3486 | int err; |
3487 | |
3488 | if (!sk) |
3489 | return; |
3490 | |
3491 | if (hwtstamps) { |
3492 | *skb_hwtstamps(orig_skb) = |
3493 | *hwtstamps; |
3494 | } else { |
3495 | /* |
3496 | * no hardware time stamps available, |
3497 | * so keep the shared tx_flags and only |
3498 | * store software time stamp |
3499 | */ |
3500 | orig_skb->tstamp = ktime_get_real(); |
3501 | } |
3502 | |
3503 | skb = skb_clone(orig_skb, GFP_ATOMIC); |
3504 | if (!skb) |
3505 | return; |
3506 | |
3507 | serr = SKB_EXT_ERR(skb); |
3508 | memset(serr, 0, sizeof(*serr)); |
3509 | serr->ee.ee_errno = ENOMSG; |
3510 | serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; |
3511 | |
3512 | err = sock_queue_err_skb(sk, skb); |
3513 | |
3514 | if (err) |
3515 | kfree_skb(skb); |
3516 | } |
3517 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); |
3518 | |
3519 | void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) |
3520 | { |
3521 | struct sock *sk = skb->sk; |
3522 | struct sock_exterr_skb *serr; |
3523 | int err; |
3524 | |
3525 | skb->wifi_acked_valid = 1; |
3526 | skb->wifi_acked = acked; |
3527 | |
3528 | serr = SKB_EXT_ERR(skb); |
3529 | memset(serr, 0, sizeof(*serr)); |
3530 | serr->ee.ee_errno = ENOMSG; |
3531 | serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; |
3532 | |
3533 | err = sock_queue_err_skb(sk, skb); |
3534 | if (err) |
3535 | kfree_skb(skb); |
3536 | } |
3537 | EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); |
3538 | |
3539 | |
3540 | /** |
3541 | * skb_partial_csum_set - set up and verify partial csum values for packet |
3542 | * @skb: the skb to set |
3543 | * @start: the number of bytes after skb->data to start checksumming. |
3544 | * @off: the offset from start to place the checksum. |
3545 | * |
3546 | * For untrusted partially-checksummed packets, we need to make sure the values |
3547 | * for skb->csum_start and skb->csum_offset are valid so we don't oops. |
3548 | * |
3549 | * This function checks and sets those values and skb->ip_summed: if this |
3550 | * returns false you should drop the packet. |
3551 | */ |
3552 | bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) |
3553 | { |
3554 | if (unlikely(start > skb_headlen(skb)) || |
3555 | unlikely((int)start + off > skb_headlen(skb) - 2)) { |
3556 | net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", |
3557 | start, off, skb_headlen(skb)); |
3558 | return false; |
3559 | } |
3560 | skb->ip_summed = CHECKSUM_PARTIAL; |
3561 | skb->csum_start = skb_headroom(skb) + start; |
3562 | skb->csum_offset = off; |
3563 | skb_set_transport_header(skb, start); |
3564 | return true; |
3565 | } |
3566 | EXPORT_SYMBOL_GPL(skb_partial_csum_set); |
3567 | |
3568 | static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, |
3569 | unsigned int max) |
3570 | { |
3571 | if (skb_headlen(skb) >= len) |
3572 | return 0; |
3573 | |
3574 | /* If we need to pullup then pullup to the max, so we |
3575 | * won't need to do it again. |
3576 | */ |
3577 | if (max > skb->len) |
3578 | max = skb->len; |
3579 | |
3580 | if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) |
3581 | return -ENOMEM; |
3582 | |
3583 | if (skb_headlen(skb) < len) |
3584 | return -EPROTO; |
3585 | |
3586 | return 0; |
3587 | } |
3588 | |
3589 | #define MAX_TCP_HDR_LEN (15 * 4) |
3590 | |
3591 | static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, |
3592 | typeof(IPPROTO_IP) proto, |
3593 | unsigned int off) |
3594 | { |
3595 | switch (proto) { |
3596 | int err; |
3597 | |
3598 | case IPPROTO_TCP: |
3599 | err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), |
3600 | off + MAX_TCP_HDR_LEN); |
3601 | if (!err && !skb_partial_csum_set(skb, off, |
3602 | offsetof(struct tcphdr, |
3603 | check))) |
3604 | err = -EPROTO; |
3605 | return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; |
3606 | |
3607 | case IPPROTO_UDP: |
3608 | err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), |
3609 | off + sizeof(struct udphdr)); |
3610 | if (!err && !skb_partial_csum_set(skb, off, |
3611 | offsetof(struct udphdr, |
3612 | check))) |
3613 | err = -EPROTO; |
3614 | return err ? ERR_PTR(err) : &udp_hdr(skb)->check; |
3615 | } |
3616 | |
3617 | return ERR_PTR(-EPROTO); |
3618 | } |
3619 | |
3620 | /* This value should be large enough to cover a tagged ethernet header plus |
3621 | * maximally sized IP and TCP or UDP headers. |
3622 | */ |
3623 | #define MAX_IP_HDR_LEN 128 |
3624 | |
3625 | static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) |
3626 | { |
3627 | unsigned int off; |
3628 | bool fragment; |
3629 | __sum16 *csum; |
3630 | int err; |
3631 | |
3632 | fragment = false; |
3633 | |
3634 | err = skb_maybe_pull_tail(skb, |
3635 | sizeof(struct iphdr), |
3636 | MAX_IP_HDR_LEN); |
3637 | if (err < 0) |
3638 | goto out; |
3639 | |
3640 | if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) |
3641 | fragment = true; |
3642 | |
3643 | off = ip_hdrlen(skb); |
3644 | |
3645 | err = -EPROTO; |
3646 | |
3647 | if (fragment) |
3648 | goto out; |
3649 | |
3650 | csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); |
3651 | if (IS_ERR(csum)) |
3652 | return PTR_ERR(csum); |
3653 | |
3654 | if (recalculate) |
3655 | *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, |
3656 | ip_hdr(skb)->daddr, |
3657 | skb->len - off, |
3658 | ip_hdr(skb)->protocol, 0); |
3659 | err = 0; |
3660 | |
3661 | out: |
3662 | return err; |
3663 | } |
3664 | |
3665 | /* This value should be large enough to cover a tagged ethernet header plus |
3666 | * an IPv6 header, all options, and a maximal TCP or UDP header. |
3667 | */ |
3668 | #define MAX_IPV6_HDR_LEN 256 |
3669 | |
3670 | #define OPT_HDR(type, skb, off) \ |
3671 | (type *)(skb_network_header(skb) + (off)) |
3672 | |
3673 | static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) |
3674 | { |
3675 | int err; |
3676 | u8 nexthdr; |
3677 | unsigned int off; |
3678 | unsigned int len; |
3679 | bool fragment; |
3680 | bool done; |
3681 | __sum16 *csum; |
3682 | |
3683 | fragment = false; |
3684 | done = false; |
3685 | |
3686 | off = sizeof(struct ipv6hdr); |
3687 | |
3688 | err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); |
3689 | if (err < 0) |
3690 | goto out; |
3691 | |
3692 | nexthdr = ipv6_hdr(skb)->nexthdr; |
3693 | |
3694 | len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); |
3695 | while (off <= len && !done) { |
3696 | switch (nexthdr) { |
3697 | case IPPROTO_DSTOPTS: |
3698 | case IPPROTO_HOPOPTS: |
3699 | case IPPROTO_ROUTING: { |
3700 | struct ipv6_opt_hdr *hp; |
3701 | |
3702 | err = skb_maybe_pull_tail(skb, |
3703 | off + |
3704 | sizeof(struct ipv6_opt_hdr), |
3705 | MAX_IPV6_HDR_LEN); |
3706 | if (err < 0) |
3707 | goto out; |
3708 | |
3709 | hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); |
3710 | nexthdr = hp->nexthdr; |
3711 | off += ipv6_optlen(hp); |
3712 | break; |
3713 | } |
3714 | case IPPROTO_AH: { |
3715 | struct ip_auth_hdr *hp; |
3716 | |
3717 | err = skb_maybe_pull_tail(skb, |
3718 | off + |
3719 | sizeof(struct ip_auth_hdr), |
3720 | MAX_IPV6_HDR_LEN); |
3721 | if (err < 0) |
3722 | goto out; |
3723 | |
3724 | hp = OPT_HDR(struct ip_auth_hdr, skb, off); |
3725 | nexthdr = hp->nexthdr; |
3726 | off += ipv6_authlen(hp); |
3727 | break; |
3728 | } |
3729 | case IPPROTO_FRAGMENT: { |
3730 | struct frag_hdr *hp; |
3731 | |
3732 | err = skb_maybe_pull_tail(skb, |
3733 | off + |
3734 | sizeof(struct frag_hdr), |
3735 | MAX_IPV6_HDR_LEN); |
3736 | if (err < 0) |
3737 | goto out; |
3738 | |
3739 | hp = OPT_HDR(struct frag_hdr, skb, off); |
3740 | |
3741 | if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) |
3742 | fragment = true; |
3743 | |
3744 | nexthdr = hp->nexthdr; |
3745 | off += sizeof(struct frag_hdr); |
3746 | break; |
3747 | } |
3748 | default: |
3749 | done = true; |
3750 | break; |
3751 | } |
3752 | } |
3753 | |
3754 | err = -EPROTO; |
3755 | |
3756 | if (!done || fragment) |
3757 | goto out; |
3758 | |
3759 | csum = skb_checksum_setup_ip(skb, nexthdr, off); |
3760 | if (IS_ERR(csum)) |
3761 | return PTR_ERR(csum); |
3762 | |
3763 | if (recalculate) |
3764 | *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, |
3765 | &ipv6_hdr(skb)->daddr, |
3766 | skb->len - off, nexthdr, 0); |
3767 | err = 0; |
3768 | |
3769 | out: |
3770 | return err; |
3771 | } |
3772 | |
3773 | /** |
3774 | * skb_checksum_setup - set up partial checksum offset |
3775 | * @skb: the skb to set up |
3776 | * @recalculate: if true the pseudo-header checksum will be recalculated |
3777 | */ |
3778 | int skb_checksum_setup(struct sk_buff *skb, bool recalculate) |
3779 | { |
3780 | int err; |
3781 | |
3782 | switch (skb->protocol) { |
3783 | case htons(ETH_P_IP): |
3784 | err = skb_checksum_setup_ipv4(skb, recalculate); |
3785 | break; |
3786 | |
3787 | case htons(ETH_P_IPV6): |
3788 | err = skb_checksum_setup_ipv6(skb, recalculate); |
3789 | break; |
3790 | |
3791 | default: |
3792 | err = -EPROTO; |
3793 | break; |
3794 | } |
3795 | |
3796 | return err; |
3797 | } |
3798 | EXPORT_SYMBOL(skb_checksum_setup); |
3799 | |
3800 | void __skb_warn_lro_forwarding(const struct sk_buff *skb) |
3801 | { |
3802 | net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", |
3803 | skb->dev->name); |
3804 | } |
3805 | EXPORT_SYMBOL(__skb_warn_lro_forwarding); |
3806 | |
3807 | void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) |
3808 | { |
3809 | if (head_stolen) { |
3810 | skb_release_head_state(skb); |
3811 | kmem_cache_free(skbuff_head_cache, skb); |
3812 | } else { |
3813 | __kfree_skb(skb); |
3814 | } |
3815 | } |
3816 | EXPORT_SYMBOL(kfree_skb_partial); |
3817 | |
3818 | /** |
3819 | * skb_try_coalesce - try to merge skb to prior one |
3820 | * @to: prior buffer |
3821 | * @from: buffer to add |
3822 | * @fragstolen: pointer to boolean |
3823 | * @delta_truesize: how much more was allocated than was requested |
3824 | */ |
3825 | bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, |
3826 | bool *fragstolen, int *delta_truesize) |
3827 | { |
3828 | int i, delta, len = from->len; |
3829 | |
3830 | *fragstolen = false; |
3831 | |
3832 | if (skb_cloned(to)) |
3833 | return false; |
3834 | |
3835 | if (len <= skb_tailroom(to)) { |
3836 | BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); |
3837 | *delta_truesize = 0; |
3838 | return true; |
3839 | } |
3840 | |
3841 | if (skb_has_frag_list(to) || skb_has_frag_list(from)) |
3842 | return false; |
3843 | |
3844 | if (skb_headlen(from) != 0) { |
3845 | struct page *page; |
3846 | unsigned int offset; |
3847 | |
3848 | if (skb_shinfo(to)->nr_frags + |
3849 | skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) |
3850 | return false; |
3851 | |
3852 | if (skb_head_is_locked(from)) |
3853 | return false; |
3854 | |
3855 | delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); |
3856 | |
3857 | page = virt_to_head_page(from->head); |
3858 | offset = from->data - (unsigned char *)page_address(page); |
3859 | |
3860 | skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, |
3861 | page, offset, skb_headlen(from)); |
3862 | *fragstolen = true; |
3863 | } else { |
3864 | if (skb_shinfo(to)->nr_frags + |
3865 | skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) |
3866 | return false; |
3867 | |
3868 | delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); |
3869 | } |
3870 | |
3871 | WARN_ON_ONCE(delta < len); |
3872 | |
3873 | memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, |
3874 | skb_shinfo(from)->frags, |
3875 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); |
3876 | skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; |
3877 | |
3878 | if (!skb_cloned(from)) |
3879 | skb_shinfo(from)->nr_frags = 0; |
3880 | |
3881 | /* if the skb is not cloned this does nothing |
3882 | * since we set nr_frags to 0. |
3883 | */ |
3884 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) |
3885 | skb_frag_ref(from, i); |
3886 | |
3887 | to->truesize += delta; |
3888 | to->len += len; |
3889 | to->data_len += len; |
3890 | |
3891 | *delta_truesize = delta; |
3892 | return true; |
3893 | } |
3894 | EXPORT_SYMBOL(skb_try_coalesce); |
3895 | |
3896 | /** |
3897 | * skb_scrub_packet - scrub an skb |
3898 | * |
3899 | * @skb: buffer to clean |
3900 | * @xnet: packet is crossing netns |
3901 | * |
3902 | * skb_scrub_packet can be used after encapsulating or decapsulting a packet |
3903 | * into/from a tunnel. Some information have to be cleared during these |
3904 | * operations. |
3905 | * skb_scrub_packet can also be used to clean a skb before injecting it in |
3906 | * another namespace (@xnet == true). We have to clear all information in the |
3907 | * skb that could impact namespace isolation. |
3908 | */ |
3909 | void skb_scrub_packet(struct sk_buff *skb, bool xnet) |
3910 | { |
3911 | if (xnet) |
3912 | skb_orphan(skb); |
3913 | skb->tstamp.tv64 = 0; |
3914 | skb->pkt_type = PACKET_HOST; |
3915 | skb->skb_iif = 0; |
3916 | skb->local_df = 0; |
3917 | skb_dst_drop(skb); |
3918 | skb->mark = 0; |
3919 | secpath_reset(skb); |
3920 | nf_reset(skb); |
3921 | nf_reset_trace(skb); |
3922 | } |
3923 | EXPORT_SYMBOL_GPL(skb_scrub_packet); |
3924 | |
3925 | /** |
3926 | * skb_gso_transport_seglen - Return length of individual segments of a gso packet |
3927 | * |
3928 | * @skb: GSO skb |
3929 | * |
3930 | * skb_gso_transport_seglen is used to determine the real size of the |
3931 | * individual segments, including Layer4 headers (TCP/UDP). |
3932 | * |
3933 | * The MAC/L2 or network (IP, IPv6) headers are not accounted for. |
3934 | */ |
3935 | unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) |
3936 | { |
3937 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
3938 | |
3939 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) |
3940 | return tcp_hdrlen(skb) + shinfo->gso_size; |
3941 | |
3942 | /* UFO sets gso_size to the size of the fragmentation |
3943 | * payload, i.e. the size of the L4 (UDP) header is already |
3944 | * accounted for. |
3945 | */ |
3946 | return shinfo->gso_size; |
3947 | } |
3948 | EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); |
3949 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9