Root/
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. |
5 | * |
6 | * The IP fragmentation functionality. |
7 | * |
8 | * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> |
9 | * Alan Cox <alan@lxorguk.ukuu.org.uk> |
10 | * |
11 | * Fixes: |
12 | * Alan Cox : Split from ip.c , see ip_input.c for history. |
13 | * David S. Miller : Begin massive cleanup... |
14 | * Andi Kleen : Add sysctls. |
15 | * xxxx : Overlapfrag bug. |
16 | * Ultima : ip_expire() kernel panic. |
17 | * Bill Hawes : Frag accounting and evictor fixes. |
18 | * John McDonald : 0 length frag bug. |
19 | * Alexey Kuznetsov: SMP races, threading, cleanup. |
20 | * Patrick McHardy : LRU queue of frag heads for evictor. |
21 | */ |
22 | |
23 | #include <linux/compiler.h> |
24 | #include <linux/module.h> |
25 | #include <linux/types.h> |
26 | #include <linux/mm.h> |
27 | #include <linux/jiffies.h> |
28 | #include <linux/skbuff.h> |
29 | #include <linux/list.h> |
30 | #include <linux/ip.h> |
31 | #include <linux/icmp.h> |
32 | #include <linux/netdevice.h> |
33 | #include <linux/jhash.h> |
34 | #include <linux/random.h> |
35 | #include <linux/slab.h> |
36 | #include <net/route.h> |
37 | #include <net/dst.h> |
38 | #include <net/sock.h> |
39 | #include <net/ip.h> |
40 | #include <net/icmp.h> |
41 | #include <net/checksum.h> |
42 | #include <net/inetpeer.h> |
43 | #include <net/inet_frag.h> |
44 | #include <linux/tcp.h> |
45 | #include <linux/udp.h> |
46 | #include <linux/inet.h> |
47 | #include <linux/netfilter_ipv4.h> |
48 | |
49 | /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 |
50 | * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c |
51 | * as well. Or notify me, at least. --ANK |
52 | */ |
53 | |
54 | static int sysctl_ipfrag_max_dist __read_mostly = 64; |
55 | |
56 | struct ipfrag_skb_cb |
57 | { |
58 | struct inet_skb_parm h; |
59 | int offset; |
60 | }; |
61 | |
62 | #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) |
63 | |
64 | /* Describe an entry in the "incomplete datagrams" queue. */ |
65 | struct ipq { |
66 | struct inet_frag_queue q; |
67 | |
68 | u32 user; |
69 | __be32 saddr; |
70 | __be32 daddr; |
71 | __be16 id; |
72 | u8 protocol; |
73 | int iif; |
74 | unsigned int rid; |
75 | struct inet_peer *peer; |
76 | }; |
77 | |
78 | static struct inet_frags ip4_frags; |
79 | |
80 | int ip_frag_nqueues(struct net *net) |
81 | { |
82 | return net->ipv4.frags.nqueues; |
83 | } |
84 | |
85 | int ip_frag_mem(struct net *net) |
86 | { |
87 | return atomic_read(&net->ipv4.frags.mem); |
88 | } |
89 | |
90 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
91 | struct net_device *dev); |
92 | |
93 | struct ip4_create_arg { |
94 | struct iphdr *iph; |
95 | u32 user; |
96 | }; |
97 | |
98 | static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) |
99 | { |
100 | return jhash_3words((__force u32)id << 16 | prot, |
101 | (__force u32)saddr, (__force u32)daddr, |
102 | ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); |
103 | } |
104 | |
105 | static unsigned int ip4_hashfn(struct inet_frag_queue *q) |
106 | { |
107 | struct ipq *ipq; |
108 | |
109 | ipq = container_of(q, struct ipq, q); |
110 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
111 | } |
112 | |
113 | static int ip4_frag_match(struct inet_frag_queue *q, void *a) |
114 | { |
115 | struct ipq *qp; |
116 | struct ip4_create_arg *arg = a; |
117 | |
118 | qp = container_of(q, struct ipq, q); |
119 | return (qp->id == arg->iph->id && |
120 | qp->saddr == arg->iph->saddr && |
121 | qp->daddr == arg->iph->daddr && |
122 | qp->protocol == arg->iph->protocol && |
123 | qp->user == arg->user); |
124 | } |
125 | |
126 | /* Memory Tracking Functions. */ |
127 | static __inline__ void frag_kfree_skb(struct netns_frags *nf, |
128 | struct sk_buff *skb, int *work) |
129 | { |
130 | if (work) |
131 | *work -= skb->truesize; |
132 | atomic_sub(skb->truesize, &nf->mem); |
133 | kfree_skb(skb); |
134 | } |
135 | |
136 | static void ip4_frag_init(struct inet_frag_queue *q, void *a) |
137 | { |
138 | struct ipq *qp = container_of(q, struct ipq, q); |
139 | struct ip4_create_arg *arg = a; |
140 | |
141 | qp->protocol = arg->iph->protocol; |
142 | qp->id = arg->iph->id; |
143 | qp->saddr = arg->iph->saddr; |
144 | qp->daddr = arg->iph->daddr; |
145 | qp->user = arg->user; |
146 | qp->peer = sysctl_ipfrag_max_dist ? |
147 | inet_getpeer(arg->iph->saddr, 1) : NULL; |
148 | } |
149 | |
150 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) |
151 | { |
152 | struct ipq *qp; |
153 | |
154 | qp = container_of(q, struct ipq, q); |
155 | if (qp->peer) |
156 | inet_putpeer(qp->peer); |
157 | } |
158 | |
159 | |
160 | /* Destruction primitives. */ |
161 | |
162 | static __inline__ void ipq_put(struct ipq *ipq) |
163 | { |
164 | inet_frag_put(&ipq->q, &ip4_frags); |
165 | } |
166 | |
167 | /* Kill ipq entry. It is not destroyed immediately, |
168 | * because caller (and someone more) holds reference count. |
169 | */ |
170 | static void ipq_kill(struct ipq *ipq) |
171 | { |
172 | inet_frag_kill(&ipq->q, &ip4_frags); |
173 | } |
174 | |
175 | /* Memory limiting on fragments. Evictor trashes the oldest |
176 | * fragment queue until we are back under the threshold. |
177 | */ |
178 | static void ip_evictor(struct net *net) |
179 | { |
180 | int evicted; |
181 | |
182 | evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); |
183 | if (evicted) |
184 | IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); |
185 | } |
186 | |
187 | /* |
188 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. |
189 | */ |
190 | static void ip_expire(unsigned long arg) |
191 | { |
192 | struct ipq *qp; |
193 | struct net *net; |
194 | |
195 | qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); |
196 | net = container_of(qp->q.net, struct net, ipv4.frags); |
197 | |
198 | spin_lock(&qp->q.lock); |
199 | |
200 | if (qp->q.last_in & INET_FRAG_COMPLETE) |
201 | goto out; |
202 | |
203 | ipq_kill(qp); |
204 | |
205 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); |
206 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
207 | |
208 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |
209 | struct sk_buff *head = qp->q.fragments; |
210 | |
211 | rcu_read_lock(); |
212 | head->dev = dev_get_by_index_rcu(net, qp->iif); |
213 | if (!head->dev) |
214 | goto out_rcu_unlock; |
215 | |
216 | /* |
217 | * Only search router table for the head fragment, |
218 | * when defraging timeout at PRE_ROUTING HOOK. |
219 | */ |
220 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { |
221 | const struct iphdr *iph = ip_hdr(head); |
222 | int err = ip_route_input(head, iph->daddr, iph->saddr, |
223 | iph->tos, head->dev); |
224 | if (unlikely(err)) |
225 | goto out_rcu_unlock; |
226 | |
227 | /* |
228 | * Only an end host needs to send an ICMP |
229 | * "Fragment Reassembly Timeout" message, per RFC792. |
230 | */ |
231 | if (skb_rtable(head)->rt_type != RTN_LOCAL) |
232 | goto out_rcu_unlock; |
233 | |
234 | } |
235 | |
236 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
237 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
238 | out_rcu_unlock: |
239 | rcu_read_unlock(); |
240 | } |
241 | out: |
242 | spin_unlock(&qp->q.lock); |
243 | ipq_put(qp); |
244 | } |
245 | |
246 | /* Find the correct entry in the "incomplete datagrams" queue for |
247 | * this IP datagram, and create new one, if nothing is found. |
248 | */ |
249 | static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) |
250 | { |
251 | struct inet_frag_queue *q; |
252 | struct ip4_create_arg arg; |
253 | unsigned int hash; |
254 | |
255 | arg.iph = iph; |
256 | arg.user = user; |
257 | |
258 | read_lock(&ip4_frags.lock); |
259 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
260 | |
261 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
262 | if (q == NULL) |
263 | goto out_nomem; |
264 | |
265 | return container_of(q, struct ipq, q); |
266 | |
267 | out_nomem: |
268 | LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); |
269 | return NULL; |
270 | } |
271 | |
272 | /* Is the fragment too far ahead to be part of ipq? */ |
273 | static inline int ip_frag_too_far(struct ipq *qp) |
274 | { |
275 | struct inet_peer *peer = qp->peer; |
276 | unsigned int max = sysctl_ipfrag_max_dist; |
277 | unsigned int start, end; |
278 | |
279 | int rc; |
280 | |
281 | if (!peer || !max) |
282 | return 0; |
283 | |
284 | start = qp->rid; |
285 | end = atomic_inc_return(&peer->rid); |
286 | qp->rid = end; |
287 | |
288 | rc = qp->q.fragments && (end - start) > max; |
289 | |
290 | if (rc) { |
291 | struct net *net; |
292 | |
293 | net = container_of(qp->q.net, struct net, ipv4.frags); |
294 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
295 | } |
296 | |
297 | return rc; |
298 | } |
299 | |
300 | static int ip_frag_reinit(struct ipq *qp) |
301 | { |
302 | struct sk_buff *fp; |
303 | |
304 | if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { |
305 | atomic_inc(&qp->q.refcnt); |
306 | return -ETIMEDOUT; |
307 | } |
308 | |
309 | fp = qp->q.fragments; |
310 | do { |
311 | struct sk_buff *xp = fp->next; |
312 | frag_kfree_skb(qp->q.net, fp, NULL); |
313 | fp = xp; |
314 | } while (fp); |
315 | |
316 | qp->q.last_in = 0; |
317 | qp->q.len = 0; |
318 | qp->q.meat = 0; |
319 | qp->q.fragments = NULL; |
320 | qp->iif = 0; |
321 | |
322 | return 0; |
323 | } |
324 | |
325 | /* Add new segment to existing queue. */ |
326 | static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
327 | { |
328 | struct sk_buff *prev, *next; |
329 | struct net_device *dev; |
330 | int flags, offset; |
331 | int ihl, end; |
332 | int err = -ENOENT; |
333 | |
334 | if (qp->q.last_in & INET_FRAG_COMPLETE) |
335 | goto err; |
336 | |
337 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && |
338 | unlikely(ip_frag_too_far(qp)) && |
339 | unlikely(err = ip_frag_reinit(qp))) { |
340 | ipq_kill(qp); |
341 | goto err; |
342 | } |
343 | |
344 | offset = ntohs(ip_hdr(skb)->frag_off); |
345 | flags = offset & ~IP_OFFSET; |
346 | offset &= IP_OFFSET; |
347 | offset <<= 3; /* offset is in 8-byte chunks */ |
348 | ihl = ip_hdrlen(skb); |
349 | |
350 | /* Determine the position of this fragment. */ |
351 | end = offset + skb->len - ihl; |
352 | err = -EINVAL; |
353 | |
354 | /* Is this the final fragment? */ |
355 | if ((flags & IP_MF) == 0) { |
356 | /* If we already have some bits beyond end |
357 | * or have different end, the segment is corrrupted. |
358 | */ |
359 | if (end < qp->q.len || |
360 | ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) |
361 | goto err; |
362 | qp->q.last_in |= INET_FRAG_LAST_IN; |
363 | qp->q.len = end; |
364 | } else { |
365 | if (end&7) { |
366 | end &= ~7; |
367 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
368 | skb->ip_summed = CHECKSUM_NONE; |
369 | } |
370 | if (end > qp->q.len) { |
371 | /* Some bits beyond end -> corruption. */ |
372 | if (qp->q.last_in & INET_FRAG_LAST_IN) |
373 | goto err; |
374 | qp->q.len = end; |
375 | } |
376 | } |
377 | if (end == offset) |
378 | goto err; |
379 | |
380 | err = -ENOMEM; |
381 | if (pskb_pull(skb, ihl) == NULL) |
382 | goto err; |
383 | |
384 | err = pskb_trim_rcsum(skb, end - offset); |
385 | if (err) |
386 | goto err; |
387 | |
388 | /* Find out which fragments are in front and at the back of us |
389 | * in the chain of fragments so far. We must know where to put |
390 | * this fragment, right? |
391 | */ |
392 | prev = NULL; |
393 | for (next = qp->q.fragments; next != NULL; next = next->next) { |
394 | if (FRAG_CB(next)->offset >= offset) |
395 | break; /* bingo! */ |
396 | prev = next; |
397 | } |
398 | |
399 | /* We found where to put this one. Check for overlap with |
400 | * preceding fragment, and, if needed, align things so that |
401 | * any overlaps are eliminated. |
402 | */ |
403 | if (prev) { |
404 | int i = (FRAG_CB(prev)->offset + prev->len) - offset; |
405 | |
406 | if (i > 0) { |
407 | offset += i; |
408 | err = -EINVAL; |
409 | if (end <= offset) |
410 | goto err; |
411 | err = -ENOMEM; |
412 | if (!pskb_pull(skb, i)) |
413 | goto err; |
414 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
415 | skb->ip_summed = CHECKSUM_NONE; |
416 | } |
417 | } |
418 | |
419 | err = -ENOMEM; |
420 | |
421 | while (next && FRAG_CB(next)->offset < end) { |
422 | int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ |
423 | |
424 | if (i < next->len) { |
425 | /* Eat head of the next overlapped fragment |
426 | * and leave the loop. The next ones cannot overlap. |
427 | */ |
428 | if (!pskb_pull(next, i)) |
429 | goto err; |
430 | FRAG_CB(next)->offset += i; |
431 | qp->q.meat -= i; |
432 | if (next->ip_summed != CHECKSUM_UNNECESSARY) |
433 | next->ip_summed = CHECKSUM_NONE; |
434 | break; |
435 | } else { |
436 | struct sk_buff *free_it = next; |
437 | |
438 | /* Old fragment is completely overridden with |
439 | * new one drop it. |
440 | */ |
441 | next = next->next; |
442 | |
443 | if (prev) |
444 | prev->next = next; |
445 | else |
446 | qp->q.fragments = next; |
447 | |
448 | qp->q.meat -= free_it->len; |
449 | frag_kfree_skb(qp->q.net, free_it, NULL); |
450 | } |
451 | } |
452 | |
453 | FRAG_CB(skb)->offset = offset; |
454 | |
455 | /* Insert this fragment in the chain of fragments. */ |
456 | skb->next = next; |
457 | if (prev) |
458 | prev->next = skb; |
459 | else |
460 | qp->q.fragments = skb; |
461 | |
462 | dev = skb->dev; |
463 | if (dev) { |
464 | qp->iif = dev->ifindex; |
465 | skb->dev = NULL; |
466 | } |
467 | qp->q.stamp = skb->tstamp; |
468 | qp->q.meat += skb->len; |
469 | atomic_add(skb->truesize, &qp->q.net->mem); |
470 | if (offset == 0) |
471 | qp->q.last_in |= INET_FRAG_FIRST_IN; |
472 | |
473 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
474 | qp->q.meat == qp->q.len) |
475 | return ip_frag_reasm(qp, prev, dev); |
476 | |
477 | write_lock(&ip4_frags.lock); |
478 | list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); |
479 | write_unlock(&ip4_frags.lock); |
480 | return -EINPROGRESS; |
481 | |
482 | err: |
483 | kfree_skb(skb); |
484 | return err; |
485 | } |
486 | |
487 | |
488 | /* Build a new IP datagram from all its fragments. */ |
489 | |
490 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
491 | struct net_device *dev) |
492 | { |
493 | struct net *net = container_of(qp->q.net, struct net, ipv4.frags); |
494 | struct iphdr *iph; |
495 | struct sk_buff *fp, *head = qp->q.fragments; |
496 | int len; |
497 | int ihlen; |
498 | int err; |
499 | |
500 | ipq_kill(qp); |
501 | |
502 | /* Make the one we just received the head. */ |
503 | if (prev) { |
504 | head = prev->next; |
505 | fp = skb_clone(head, GFP_ATOMIC); |
506 | if (!fp) |
507 | goto out_nomem; |
508 | |
509 | fp->next = head->next; |
510 | prev->next = fp; |
511 | |
512 | skb_morph(head, qp->q.fragments); |
513 | head->next = qp->q.fragments->next; |
514 | |
515 | kfree_skb(qp->q.fragments); |
516 | qp->q.fragments = head; |
517 | } |
518 | |
519 | WARN_ON(head == NULL); |
520 | WARN_ON(FRAG_CB(head)->offset != 0); |
521 | |
522 | /* Allocate a new buffer for the datagram. */ |
523 | ihlen = ip_hdrlen(head); |
524 | len = ihlen + qp->q.len; |
525 | |
526 | err = -E2BIG; |
527 | if (len > 65535) |
528 | goto out_oversize; |
529 | |
530 | /* Head of list must not be cloned. */ |
531 | if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) |
532 | goto out_nomem; |
533 | |
534 | /* If the first fragment is fragmented itself, we split |
535 | * it to two chunks: the first with data and paged part |
536 | * and the second, holding only fragments. */ |
537 | if (skb_has_frags(head)) { |
538 | struct sk_buff *clone; |
539 | int i, plen = 0; |
540 | |
541 | if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) |
542 | goto out_nomem; |
543 | clone->next = head->next; |
544 | head->next = clone; |
545 | skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; |
546 | skb_frag_list_init(head); |
547 | for (i=0; i<skb_shinfo(head)->nr_frags; i++) |
548 | plen += skb_shinfo(head)->frags[i].size; |
549 | clone->len = clone->data_len = head->data_len - plen; |
550 | head->data_len -= clone->len; |
551 | head->len -= clone->len; |
552 | clone->csum = 0; |
553 | clone->ip_summed = head->ip_summed; |
554 | atomic_add(clone->truesize, &qp->q.net->mem); |
555 | } |
556 | |
557 | skb_shinfo(head)->frag_list = head->next; |
558 | skb_push(head, head->data - skb_network_header(head)); |
559 | atomic_sub(head->truesize, &qp->q.net->mem); |
560 | |
561 | for (fp=head->next; fp; fp = fp->next) { |
562 | head->data_len += fp->len; |
563 | head->len += fp->len; |
564 | if (head->ip_summed != fp->ip_summed) |
565 | head->ip_summed = CHECKSUM_NONE; |
566 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
567 | head->csum = csum_add(head->csum, fp->csum); |
568 | head->truesize += fp->truesize; |
569 | atomic_sub(fp->truesize, &qp->q.net->mem); |
570 | } |
571 | |
572 | head->next = NULL; |
573 | head->dev = dev; |
574 | head->tstamp = qp->q.stamp; |
575 | |
576 | iph = ip_hdr(head); |
577 | iph->frag_off = 0; |
578 | iph->tot_len = htons(len); |
579 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
580 | qp->q.fragments = NULL; |
581 | return 0; |
582 | |
583 | out_nomem: |
584 | LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " |
585 | "queue %p\n", qp); |
586 | err = -ENOMEM; |
587 | goto out_fail; |
588 | out_oversize: |
589 | if (net_ratelimit()) |
590 | printk(KERN_INFO "Oversized IP packet from %pI4.\n", |
591 | &qp->saddr); |
592 | out_fail: |
593 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
594 | return err; |
595 | } |
596 | |
597 | /* Process an incoming IP datagram fragment. */ |
598 | int ip_defrag(struct sk_buff *skb, u32 user) |
599 | { |
600 | struct ipq *qp; |
601 | struct net *net; |
602 | |
603 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); |
604 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); |
605 | |
606 | /* Start by cleaning up the memory. */ |
607 | if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) |
608 | ip_evictor(net); |
609 | |
610 | /* Lookup (or create) queue header */ |
611 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { |
612 | int ret; |
613 | |
614 | spin_lock(&qp->q.lock); |
615 | |
616 | ret = ip_frag_queue(qp, skb); |
617 | |
618 | spin_unlock(&qp->q.lock); |
619 | ipq_put(qp); |
620 | return ret; |
621 | } |
622 | |
623 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
624 | kfree_skb(skb); |
625 | return -ENOMEM; |
626 | } |
627 | |
628 | #ifdef CONFIG_SYSCTL |
629 | static int zero; |
630 | |
631 | static struct ctl_table ip4_frags_ns_ctl_table[] = { |
632 | { |
633 | .procname = "ipfrag_high_thresh", |
634 | .data = &init_net.ipv4.frags.high_thresh, |
635 | .maxlen = sizeof(int), |
636 | .mode = 0644, |
637 | .proc_handler = proc_dointvec |
638 | }, |
639 | { |
640 | .procname = "ipfrag_low_thresh", |
641 | .data = &init_net.ipv4.frags.low_thresh, |
642 | .maxlen = sizeof(int), |
643 | .mode = 0644, |
644 | .proc_handler = proc_dointvec |
645 | }, |
646 | { |
647 | .procname = "ipfrag_time", |
648 | .data = &init_net.ipv4.frags.timeout, |
649 | .maxlen = sizeof(int), |
650 | .mode = 0644, |
651 | .proc_handler = proc_dointvec_jiffies, |
652 | }, |
653 | { } |
654 | }; |
655 | |
656 | static struct ctl_table ip4_frags_ctl_table[] = { |
657 | { |
658 | .procname = "ipfrag_secret_interval", |
659 | .data = &ip4_frags.secret_interval, |
660 | .maxlen = sizeof(int), |
661 | .mode = 0644, |
662 | .proc_handler = proc_dointvec_jiffies, |
663 | }, |
664 | { |
665 | .procname = "ipfrag_max_dist", |
666 | .data = &sysctl_ipfrag_max_dist, |
667 | .maxlen = sizeof(int), |
668 | .mode = 0644, |
669 | .proc_handler = proc_dointvec_minmax, |
670 | .extra1 = &zero |
671 | }, |
672 | { } |
673 | }; |
674 | |
675 | static int __net_init ip4_frags_ns_ctl_register(struct net *net) |
676 | { |
677 | struct ctl_table *table; |
678 | struct ctl_table_header *hdr; |
679 | |
680 | table = ip4_frags_ns_ctl_table; |
681 | if (!net_eq(net, &init_net)) { |
682 | table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); |
683 | if (table == NULL) |
684 | goto err_alloc; |
685 | |
686 | table[0].data = &net->ipv4.frags.high_thresh; |
687 | table[1].data = &net->ipv4.frags.low_thresh; |
688 | table[2].data = &net->ipv4.frags.timeout; |
689 | } |
690 | |
691 | hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); |
692 | if (hdr == NULL) |
693 | goto err_reg; |
694 | |
695 | net->ipv4.frags_hdr = hdr; |
696 | return 0; |
697 | |
698 | err_reg: |
699 | if (!net_eq(net, &init_net)) |
700 | kfree(table); |
701 | err_alloc: |
702 | return -ENOMEM; |
703 | } |
704 | |
705 | static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) |
706 | { |
707 | struct ctl_table *table; |
708 | |
709 | table = net->ipv4.frags_hdr->ctl_table_arg; |
710 | unregister_net_sysctl_table(net->ipv4.frags_hdr); |
711 | kfree(table); |
712 | } |
713 | |
714 | static void ip4_frags_ctl_register(void) |
715 | { |
716 | register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); |
717 | } |
718 | #else |
719 | static inline int ip4_frags_ns_ctl_register(struct net *net) |
720 | { |
721 | return 0; |
722 | } |
723 | |
724 | static inline void ip4_frags_ns_ctl_unregister(struct net *net) |
725 | { |
726 | } |
727 | |
728 | static inline void ip4_frags_ctl_register(void) |
729 | { |
730 | } |
731 | #endif |
732 | |
733 | static int __net_init ipv4_frags_init_net(struct net *net) |
734 | { |
735 | /* |
736 | * Fragment cache limits. We will commit 256K at one time. Should we |
737 | * cross that limit we will prune down to 192K. This should cope with |
738 | * even the most extreme cases without allowing an attacker to |
739 | * measurably harm machine performance. |
740 | */ |
741 | net->ipv4.frags.high_thresh = 256 * 1024; |
742 | net->ipv4.frags.low_thresh = 192 * 1024; |
743 | /* |
744 | * Important NOTE! Fragment queue must be destroyed before MSL expires. |
745 | * RFC791 is wrong proposing to prolongate timer each fragment arrival |
746 | * by TTL. |
747 | */ |
748 | net->ipv4.frags.timeout = IP_FRAG_TIME; |
749 | |
750 | inet_frags_init_net(&net->ipv4.frags); |
751 | |
752 | return ip4_frags_ns_ctl_register(net); |
753 | } |
754 | |
755 | static void __net_exit ipv4_frags_exit_net(struct net *net) |
756 | { |
757 | ip4_frags_ns_ctl_unregister(net); |
758 | inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); |
759 | } |
760 | |
761 | static struct pernet_operations ip4_frags_ops = { |
762 | .init = ipv4_frags_init_net, |
763 | .exit = ipv4_frags_exit_net, |
764 | }; |
765 | |
766 | void __init ipfrag_init(void) |
767 | { |
768 | ip4_frags_ctl_register(); |
769 | register_pernet_subsys(&ip4_frags_ops); |
770 | ip4_frags.hashfn = ip4_hashfn; |
771 | ip4_frags.constructor = ip4_frag_init; |
772 | ip4_frags.destructor = ip4_frag_free; |
773 | ip4_frags.skb_free = NULL; |
774 | ip4_frags.qsize = sizeof(struct ipq); |
775 | ip4_frags.match = ip4_frag_match; |
776 | ip4_frags.frag_expire = ip_expire; |
777 | ip4_frags.secret_interval = 10 * 60 * HZ; |
778 | inet_frags_init(&ip4_frags); |
779 | } |
780 | |
781 | EXPORT_SYMBOL(ip_defrag); |
782 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9