Root/
1 | /* |
2 | * net/dccp/ipv4.c |
3 | * |
4 | * An implementation of the DCCP protocol |
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> |
6 | * |
7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License |
9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. |
11 | */ |
12 | |
13 | #include <linux/dccp.h> |
14 | #include <linux/icmp.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/module.h> |
17 | #include <linux/skbuff.h> |
18 | #include <linux/random.h> |
19 | |
20 | #include <net/icmp.h> |
21 | #include <net/inet_common.h> |
22 | #include <net/inet_hashtables.h> |
23 | #include <net/inet_sock.h> |
24 | #include <net/protocol.h> |
25 | #include <net/sock.h> |
26 | #include <net/timewait_sock.h> |
27 | #include <net/tcp_states.h> |
28 | #include <net/xfrm.h> |
29 | |
30 | #include "ackvec.h" |
31 | #include "ccid.h" |
32 | #include "dccp.h" |
33 | #include "feat.h" |
34 | |
35 | /* |
36 | * The per-net dccp.v4_ctl_sk socket is used for responding to |
37 | * the Out-of-the-blue (OOTB) packets. A control sock will be created |
38 | * for this socket at the initialization time. |
39 | */ |
40 | |
41 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
42 | { |
43 | struct inet_sock *inet = inet_sk(sk); |
44 | struct dccp_sock *dp = dccp_sk(sk); |
45 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; |
46 | struct rtable *rt; |
47 | __be32 daddr, nexthop; |
48 | int tmp; |
49 | int err; |
50 | |
51 | dp->dccps_role = DCCP_ROLE_CLIENT; |
52 | |
53 | if (addr_len < sizeof(struct sockaddr_in)) |
54 | return -EINVAL; |
55 | |
56 | if (usin->sin_family != AF_INET) |
57 | return -EAFNOSUPPORT; |
58 | |
59 | nexthop = daddr = usin->sin_addr.s_addr; |
60 | if (inet->opt != NULL && inet->opt->srr) { |
61 | if (daddr == 0) |
62 | return -EINVAL; |
63 | nexthop = inet->opt->faddr; |
64 | } |
65 | |
66 | tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, |
67 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
68 | IPPROTO_DCCP, |
69 | inet->inet_sport, usin->sin_port, sk, 1); |
70 | if (tmp < 0) |
71 | return tmp; |
72 | |
73 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { |
74 | ip_rt_put(rt); |
75 | return -ENETUNREACH; |
76 | } |
77 | |
78 | if (inet->opt == NULL || !inet->opt->srr) |
79 | daddr = rt->rt_dst; |
80 | |
81 | if (inet->inet_saddr == 0) |
82 | inet->inet_saddr = rt->rt_src; |
83 | inet->inet_rcv_saddr = inet->inet_saddr; |
84 | |
85 | inet->inet_dport = usin->sin_port; |
86 | inet->inet_daddr = daddr; |
87 | |
88 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
89 | if (inet->opt != NULL) |
90 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; |
91 | /* |
92 | * Socket identity is still unknown (sport may be zero). |
93 | * However we set state to DCCP_REQUESTING and not releasing socket |
94 | * lock select source port, enter ourselves into the hash tables and |
95 | * complete initialization after this. |
96 | */ |
97 | dccp_set_state(sk, DCCP_REQUESTING); |
98 | err = inet_hash_connect(&dccp_death_row, sk); |
99 | if (err != 0) |
100 | goto failure; |
101 | |
102 | err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, |
103 | inet->inet_dport, sk); |
104 | if (err != 0) |
105 | goto failure; |
106 | |
107 | /* OK, now commit destination to socket. */ |
108 | sk_setup_caps(sk, &rt->dst); |
109 | |
110 | dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, |
111 | inet->inet_daddr, |
112 | inet->inet_sport, |
113 | inet->inet_dport); |
114 | inet->inet_id = dp->dccps_iss ^ jiffies; |
115 | |
116 | err = dccp_connect(sk); |
117 | rt = NULL; |
118 | if (err != 0) |
119 | goto failure; |
120 | out: |
121 | return err; |
122 | failure: |
123 | /* |
124 | * This unhashes the socket and releases the local port, if necessary. |
125 | */ |
126 | dccp_set_state(sk, DCCP_CLOSED); |
127 | ip_rt_put(rt); |
128 | sk->sk_route_caps = 0; |
129 | inet->inet_dport = 0; |
130 | goto out; |
131 | } |
132 | |
133 | EXPORT_SYMBOL_GPL(dccp_v4_connect); |
134 | |
135 | /* |
136 | * This routine does path mtu discovery as defined in RFC1191. |
137 | */ |
138 | static inline void dccp_do_pmtu_discovery(struct sock *sk, |
139 | const struct iphdr *iph, |
140 | u32 mtu) |
141 | { |
142 | struct dst_entry *dst; |
143 | const struct inet_sock *inet = inet_sk(sk); |
144 | const struct dccp_sock *dp = dccp_sk(sk); |
145 | |
146 | /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs |
147 | * send out by Linux are always < 576bytes so they should go through |
148 | * unfragmented). |
149 | */ |
150 | if (sk->sk_state == DCCP_LISTEN) |
151 | return; |
152 | |
153 | /* We don't check in the destentry if pmtu discovery is forbidden |
154 | * on this route. We just assume that no packet_to_big packets |
155 | * are send back when pmtu discovery is not active. |
156 | * There is a small race when the user changes this flag in the |
157 | * route, but I think that's acceptable. |
158 | */ |
159 | if ((dst = __sk_dst_check(sk, 0)) == NULL) |
160 | return; |
161 | |
162 | dst->ops->update_pmtu(dst, mtu); |
163 | |
164 | /* Something is about to be wrong... Remember soft error |
165 | * for the case, if this connection will not able to recover. |
166 | */ |
167 | if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) |
168 | sk->sk_err_soft = EMSGSIZE; |
169 | |
170 | mtu = dst_mtu(dst); |
171 | |
172 | if (inet->pmtudisc != IP_PMTUDISC_DONT && |
173 | inet_csk(sk)->icsk_pmtu_cookie > mtu) { |
174 | dccp_sync_mss(sk, mtu); |
175 | |
176 | /* |
177 | * From RFC 4340, sec. 14.1: |
178 | * |
179 | * DCCP-Sync packets are the best choice for upward |
180 | * probing, since DCCP-Sync probes do not risk application |
181 | * data loss. |
182 | */ |
183 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); |
184 | } /* else let the usual retransmit timer handle it */ |
185 | } |
186 | |
187 | /* |
188 | * This routine is called by the ICMP module when it gets some sort of error |
189 | * condition. If err < 0 then the socket should be closed and the error |
190 | * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. |
191 | * After adjustment header points to the first 8 bytes of the tcp header. We |
192 | * need to find the appropriate port. |
193 | * |
194 | * The locking strategy used here is very "optimistic". When someone else |
195 | * accesses the socket the ICMP is just dropped and for some paths there is no |
196 | * check at all. A more general error queue to queue errors for later handling |
197 | * is probably better. |
198 | */ |
199 | static void dccp_v4_err(struct sk_buff *skb, u32 info) |
200 | { |
201 | const struct iphdr *iph = (struct iphdr *)skb->data; |
202 | const u8 offset = iph->ihl << 2; |
203 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); |
204 | struct dccp_sock *dp; |
205 | struct inet_sock *inet; |
206 | const int type = icmp_hdr(skb)->type; |
207 | const int code = icmp_hdr(skb)->code; |
208 | struct sock *sk; |
209 | __u64 seq; |
210 | int err; |
211 | struct net *net = dev_net(skb->dev); |
212 | |
213 | if (skb->len < offset + sizeof(*dh) || |
214 | skb->len < offset + __dccp_basic_hdr_len(dh)) { |
215 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
216 | return; |
217 | } |
218 | |
219 | sk = inet_lookup(net, &dccp_hashinfo, |
220 | iph->daddr, dh->dccph_dport, |
221 | iph->saddr, dh->dccph_sport, inet_iif(skb)); |
222 | if (sk == NULL) { |
223 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
224 | return; |
225 | } |
226 | |
227 | if (sk->sk_state == DCCP_TIME_WAIT) { |
228 | inet_twsk_put(inet_twsk(sk)); |
229 | return; |
230 | } |
231 | |
232 | bh_lock_sock(sk); |
233 | /* If too many ICMPs get dropped on busy |
234 | * servers this needs to be solved differently. |
235 | */ |
236 | if (sock_owned_by_user(sk)) |
237 | NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); |
238 | |
239 | if (sk->sk_state == DCCP_CLOSED) |
240 | goto out; |
241 | |
242 | dp = dccp_sk(sk); |
243 | seq = dccp_hdr_seq(dh); |
244 | if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && |
245 | !between48(seq, dp->dccps_awl, dp->dccps_awh)) { |
246 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
247 | goto out; |
248 | } |
249 | |
250 | switch (type) { |
251 | case ICMP_SOURCE_QUENCH: |
252 | /* Just silently ignore these. */ |
253 | goto out; |
254 | case ICMP_PARAMETERPROB: |
255 | err = EPROTO; |
256 | break; |
257 | case ICMP_DEST_UNREACH: |
258 | if (code > NR_ICMP_UNREACH) |
259 | goto out; |
260 | |
261 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ |
262 | if (!sock_owned_by_user(sk)) |
263 | dccp_do_pmtu_discovery(sk, iph, info); |
264 | goto out; |
265 | } |
266 | |
267 | err = icmp_err_convert[code].errno; |
268 | break; |
269 | case ICMP_TIME_EXCEEDED: |
270 | err = EHOSTUNREACH; |
271 | break; |
272 | default: |
273 | goto out; |
274 | } |
275 | |
276 | switch (sk->sk_state) { |
277 | struct request_sock *req , **prev; |
278 | case DCCP_LISTEN: |
279 | if (sock_owned_by_user(sk)) |
280 | goto out; |
281 | req = inet_csk_search_req(sk, &prev, dh->dccph_dport, |
282 | iph->daddr, iph->saddr); |
283 | if (!req) |
284 | goto out; |
285 | |
286 | /* |
287 | * ICMPs are not backlogged, hence we cannot get an established |
288 | * socket here. |
289 | */ |
290 | WARN_ON(req->sk); |
291 | |
292 | if (seq != dccp_rsk(req)->dreq_iss) { |
293 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
294 | goto out; |
295 | } |
296 | /* |
297 | * Still in RESPOND, just remove it silently. |
298 | * There is no good way to pass the error to the newly |
299 | * created socket, and POSIX does not want network |
300 | * errors returned from accept(). |
301 | */ |
302 | inet_csk_reqsk_queue_drop(sk, req, prev); |
303 | goto out; |
304 | |
305 | case DCCP_REQUESTING: |
306 | case DCCP_RESPOND: |
307 | if (!sock_owned_by_user(sk)) { |
308 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); |
309 | sk->sk_err = err; |
310 | |
311 | sk->sk_error_report(sk); |
312 | |
313 | dccp_done(sk); |
314 | } else |
315 | sk->sk_err_soft = err; |
316 | goto out; |
317 | } |
318 | |
319 | /* If we've already connected we will keep trying |
320 | * until we time out, or the user gives up. |
321 | * |
322 | * rfc1122 4.2.3.9 allows to consider as hard errors |
323 | * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, |
324 | * but it is obsoleted by pmtu discovery). |
325 | * |
326 | * Note, that in modern internet, where routing is unreliable |
327 | * and in each dark corner broken firewalls sit, sending random |
328 | * errors ordered by their masters even this two messages finally lose |
329 | * their original sense (even Linux sends invalid PORT_UNREACHs) |
330 | * |
331 | * Now we are in compliance with RFCs. |
332 | * --ANK (980905) |
333 | */ |
334 | |
335 | inet = inet_sk(sk); |
336 | if (!sock_owned_by_user(sk) && inet->recverr) { |
337 | sk->sk_err = err; |
338 | sk->sk_error_report(sk); |
339 | } else /* Only an error on timeout */ |
340 | sk->sk_err_soft = err; |
341 | out: |
342 | bh_unlock_sock(sk); |
343 | sock_put(sk); |
344 | } |
345 | |
346 | static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, |
347 | __be32 src, __be32 dst) |
348 | { |
349 | return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); |
350 | } |
351 | |
352 | void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb) |
353 | { |
354 | const struct inet_sock *inet = inet_sk(sk); |
355 | struct dccp_hdr *dh = dccp_hdr(skb); |
356 | |
357 | dccp_csum_outgoing(skb); |
358 | dh->dccph_checksum = dccp_v4_csum_finish(skb, |
359 | inet->inet_saddr, |
360 | inet->inet_daddr); |
361 | } |
362 | |
363 | EXPORT_SYMBOL_GPL(dccp_v4_send_check); |
364 | |
365 | static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) |
366 | { |
367 | return secure_dccp_sequence_number(ip_hdr(skb)->daddr, |
368 | ip_hdr(skb)->saddr, |
369 | dccp_hdr(skb)->dccph_dport, |
370 | dccp_hdr(skb)->dccph_sport); |
371 | } |
372 | |
373 | /* |
374 | * The three way handshake has completed - we got a valid ACK or DATAACK - |
375 | * now create the new socket. |
376 | * |
377 | * This is the equivalent of TCP's tcp_v4_syn_recv_sock |
378 | */ |
379 | struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, |
380 | struct request_sock *req, |
381 | struct dst_entry *dst) |
382 | { |
383 | struct inet_request_sock *ireq; |
384 | struct inet_sock *newinet; |
385 | struct sock *newsk; |
386 | |
387 | if (sk_acceptq_is_full(sk)) |
388 | goto exit_overflow; |
389 | |
390 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) |
391 | goto exit; |
392 | |
393 | newsk = dccp_create_openreq_child(sk, req, skb); |
394 | if (newsk == NULL) |
395 | goto exit; |
396 | |
397 | sk_setup_caps(newsk, dst); |
398 | |
399 | newinet = inet_sk(newsk); |
400 | ireq = inet_rsk(req); |
401 | newinet->inet_daddr = ireq->rmt_addr; |
402 | newinet->inet_rcv_saddr = ireq->loc_addr; |
403 | newinet->inet_saddr = ireq->loc_addr; |
404 | newinet->opt = ireq->opt; |
405 | ireq->opt = NULL; |
406 | newinet->mc_index = inet_iif(skb); |
407 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
408 | newinet->inet_id = jiffies; |
409 | |
410 | dccp_sync_mss(newsk, dst_mtu(dst)); |
411 | |
412 | __inet_hash_nolisten(newsk, NULL); |
413 | __inet_inherit_port(sk, newsk); |
414 | |
415 | return newsk; |
416 | |
417 | exit_overflow: |
418 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
419 | exit: |
420 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
421 | dst_release(dst); |
422 | return NULL; |
423 | } |
424 | |
425 | EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); |
426 | |
427 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) |
428 | { |
429 | const struct dccp_hdr *dh = dccp_hdr(skb); |
430 | const struct iphdr *iph = ip_hdr(skb); |
431 | struct sock *nsk; |
432 | struct request_sock **prev; |
433 | /* Find possible connection requests. */ |
434 | struct request_sock *req = inet_csk_search_req(sk, &prev, |
435 | dh->dccph_sport, |
436 | iph->saddr, iph->daddr); |
437 | if (req != NULL) |
438 | return dccp_check_req(sk, skb, req, prev); |
439 | |
440 | nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, |
441 | iph->saddr, dh->dccph_sport, |
442 | iph->daddr, dh->dccph_dport, |
443 | inet_iif(skb)); |
444 | if (nsk != NULL) { |
445 | if (nsk->sk_state != DCCP_TIME_WAIT) { |
446 | bh_lock_sock(nsk); |
447 | return nsk; |
448 | } |
449 | inet_twsk_put(inet_twsk(nsk)); |
450 | return NULL; |
451 | } |
452 | |
453 | return sk; |
454 | } |
455 | |
456 | static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, |
457 | struct sk_buff *skb) |
458 | { |
459 | struct rtable *rt; |
460 | struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, |
461 | .nl_u = { .ip4_u = |
462 | { .daddr = ip_hdr(skb)->saddr, |
463 | .saddr = ip_hdr(skb)->daddr, |
464 | .tos = RT_CONN_FLAGS(sk) } }, |
465 | .proto = sk->sk_protocol, |
466 | .uli_u = { .ports = |
467 | { .sport = dccp_hdr(skb)->dccph_dport, |
468 | .dport = dccp_hdr(skb)->dccph_sport } |
469 | } |
470 | }; |
471 | |
472 | security_skb_classify_flow(skb, &fl); |
473 | if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { |
474 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
475 | return NULL; |
476 | } |
477 | |
478 | return &rt->dst; |
479 | } |
480 | |
481 | static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, |
482 | struct request_values *rv_unused) |
483 | { |
484 | int err = -1; |
485 | struct sk_buff *skb; |
486 | struct dst_entry *dst; |
487 | |
488 | dst = inet_csk_route_req(sk, req); |
489 | if (dst == NULL) |
490 | goto out; |
491 | |
492 | skb = dccp_make_response(sk, dst, req); |
493 | if (skb != NULL) { |
494 | const struct inet_request_sock *ireq = inet_rsk(req); |
495 | struct dccp_hdr *dh = dccp_hdr(skb); |
496 | |
497 | dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, |
498 | ireq->rmt_addr); |
499 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, |
500 | ireq->rmt_addr, |
501 | ireq->opt); |
502 | err = net_xmit_eval(err); |
503 | } |
504 | |
505 | out: |
506 | dst_release(dst); |
507 | return err; |
508 | } |
509 | |
510 | static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) |
511 | { |
512 | int err; |
513 | const struct iphdr *rxiph; |
514 | struct sk_buff *skb; |
515 | struct dst_entry *dst; |
516 | struct net *net = dev_net(skb_dst(rxskb)->dev); |
517 | struct sock *ctl_sk = net->dccp.v4_ctl_sk; |
518 | |
519 | /* Never send a reset in response to a reset. */ |
520 | if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) |
521 | return; |
522 | |
523 | if (skb_rtable(rxskb)->rt_type != RTN_LOCAL) |
524 | return; |
525 | |
526 | dst = dccp_v4_route_skb(net, ctl_sk, rxskb); |
527 | if (dst == NULL) |
528 | return; |
529 | |
530 | skb = dccp_ctl_make_reset(ctl_sk, rxskb); |
531 | if (skb == NULL) |
532 | goto out; |
533 | |
534 | rxiph = ip_hdr(rxskb); |
535 | dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, |
536 | rxiph->daddr); |
537 | skb_dst_set(skb, dst_clone(dst)); |
538 | |
539 | bh_lock_sock(ctl_sk); |
540 | err = ip_build_and_send_pkt(skb, ctl_sk, |
541 | rxiph->daddr, rxiph->saddr, NULL); |
542 | bh_unlock_sock(ctl_sk); |
543 | |
544 | if (net_xmit_eval(err) == 0) { |
545 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); |
546 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); |
547 | } |
548 | out: |
549 | dst_release(dst); |
550 | } |
551 | |
552 | static void dccp_v4_reqsk_destructor(struct request_sock *req) |
553 | { |
554 | dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); |
555 | kfree(inet_rsk(req)->opt); |
556 | } |
557 | |
558 | static struct request_sock_ops dccp_request_sock_ops __read_mostly = { |
559 | .family = PF_INET, |
560 | .obj_size = sizeof(struct dccp_request_sock), |
561 | .rtx_syn_ack = dccp_v4_send_response, |
562 | .send_ack = dccp_reqsk_send_ack, |
563 | .destructor = dccp_v4_reqsk_destructor, |
564 | .send_reset = dccp_v4_ctl_send_reset, |
565 | }; |
566 | |
567 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
568 | { |
569 | struct inet_request_sock *ireq; |
570 | struct request_sock *req; |
571 | struct dccp_request_sock *dreq; |
572 | const __be32 service = dccp_hdr_request(skb)->dccph_req_service; |
573 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
574 | |
575 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ |
576 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
577 | return 0; /* discard, don't send a reset here */ |
578 | |
579 | if (dccp_bad_service_code(sk, service)) { |
580 | dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; |
581 | goto drop; |
582 | } |
583 | /* |
584 | * TW buckets are converted to open requests without |
585 | * limitations, they conserve resources and peer is |
586 | * evidently real one. |
587 | */ |
588 | dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; |
589 | if (inet_csk_reqsk_queue_is_full(sk)) |
590 | goto drop; |
591 | |
592 | /* |
593 | * Accept backlog is full. If we have already queued enough |
594 | * of warm entries in syn queue, drop request. It is better than |
595 | * clogging syn queue with openreqs with exponentially increasing |
596 | * timeout. |
597 | */ |
598 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) |
599 | goto drop; |
600 | |
601 | req = inet_reqsk_alloc(&dccp_request_sock_ops); |
602 | if (req == NULL) |
603 | goto drop; |
604 | |
605 | if (dccp_reqsk_init(req, dccp_sk(sk), skb)) |
606 | goto drop_and_free; |
607 | |
608 | dreq = dccp_rsk(req); |
609 | if (dccp_parse_options(sk, dreq, skb)) |
610 | goto drop_and_free; |
611 | |
612 | if (security_inet_conn_request(sk, skb, req)) |
613 | goto drop_and_free; |
614 | |
615 | ireq = inet_rsk(req); |
616 | ireq->loc_addr = ip_hdr(skb)->daddr; |
617 | ireq->rmt_addr = ip_hdr(skb)->saddr; |
618 | |
619 | /* |
620 | * Step 3: Process LISTEN state |
621 | * |
622 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie |
623 | * |
624 | * In fact we defer setting S.GSR, S.SWL, S.SWH to |
625 | * dccp_create_openreq_child. |
626 | */ |
627 | dreq->dreq_isr = dcb->dccpd_seq; |
628 | dreq->dreq_iss = dccp_v4_init_sequence(skb); |
629 | dreq->dreq_service = service; |
630 | |
631 | if (dccp_v4_send_response(sk, req, NULL)) |
632 | goto drop_and_free; |
633 | |
634 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); |
635 | return 0; |
636 | |
637 | drop_and_free: |
638 | reqsk_free(req); |
639 | drop: |
640 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); |
641 | return -1; |
642 | } |
643 | |
644 | EXPORT_SYMBOL_GPL(dccp_v4_conn_request); |
645 | |
646 | int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) |
647 | { |
648 | struct dccp_hdr *dh = dccp_hdr(skb); |
649 | |
650 | if (sk->sk_state == DCCP_OPEN) { /* Fast path */ |
651 | if (dccp_rcv_established(sk, skb, dh, skb->len)) |
652 | goto reset; |
653 | return 0; |
654 | } |
655 | |
656 | /* |
657 | * Step 3: Process LISTEN state |
658 | * If P.type == Request or P contains a valid Init Cookie option, |
659 | * (* Must scan the packet's options to check for Init |
660 | * Cookies. Only Init Cookies are processed here, |
661 | * however; other options are processed in Step 8. This |
662 | * scan need only be performed if the endpoint uses Init |
663 | * Cookies *) |
664 | * (* Generate a new socket and switch to that socket *) |
665 | * Set S := new socket for this port pair |
666 | * S.state = RESPOND |
667 | * Choose S.ISS (initial seqno) or set from Init Cookies |
668 | * Initialize S.GAR := S.ISS |
669 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies |
670 | * Continue with S.state == RESPOND |
671 | * (* A Response packet will be generated in Step 11 *) |
672 | * Otherwise, |
673 | * Generate Reset(No Connection) unless P.type == Reset |
674 | * Drop packet and return |
675 | * |
676 | * NOTE: the check for the packet types is done in |
677 | * dccp_rcv_state_process |
678 | */ |
679 | if (sk->sk_state == DCCP_LISTEN) { |
680 | struct sock *nsk = dccp_v4_hnd_req(sk, skb); |
681 | |
682 | if (nsk == NULL) |
683 | goto discard; |
684 | |
685 | if (nsk != sk) { |
686 | if (dccp_child_process(sk, nsk, skb)) |
687 | goto reset; |
688 | return 0; |
689 | } |
690 | } |
691 | |
692 | if (dccp_rcv_state_process(sk, skb, dh, skb->len)) |
693 | goto reset; |
694 | return 0; |
695 | |
696 | reset: |
697 | dccp_v4_ctl_send_reset(sk, skb); |
698 | discard: |
699 | kfree_skb(skb); |
700 | return 0; |
701 | } |
702 | |
703 | EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); |
704 | |
705 | /** |
706 | * dccp_invalid_packet - check for malformed packets |
707 | * Implements RFC 4340, 8.5: Step 1: Check header basics |
708 | * Packets that fail these checks are ignored and do not receive Resets. |
709 | */ |
710 | int dccp_invalid_packet(struct sk_buff *skb) |
711 | { |
712 | const struct dccp_hdr *dh; |
713 | unsigned int cscov; |
714 | |
715 | if (skb->pkt_type != PACKET_HOST) |
716 | return 1; |
717 | |
718 | /* If the packet is shorter than 12 bytes, drop packet and return */ |
719 | if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { |
720 | DCCP_WARN("pskb_may_pull failed\n"); |
721 | return 1; |
722 | } |
723 | |
724 | dh = dccp_hdr(skb); |
725 | |
726 | /* If P.type is not understood, drop packet and return */ |
727 | if (dh->dccph_type >= DCCP_PKT_INVALID) { |
728 | DCCP_WARN("invalid packet type\n"); |
729 | return 1; |
730 | } |
731 | |
732 | /* |
733 | * If P.Data Offset is too small for packet type, drop packet and return |
734 | */ |
735 | if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { |
736 | DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); |
737 | return 1; |
738 | } |
739 | /* |
740 | * If P.Data Offset is too too large for packet, drop packet and return |
741 | */ |
742 | if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { |
743 | DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); |
744 | return 1; |
745 | } |
746 | |
747 | /* |
748 | * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet |
749 | * has short sequence numbers), drop packet and return |
750 | */ |
751 | if ((dh->dccph_type < DCCP_PKT_DATA || |
752 | dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { |
753 | DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", |
754 | dccp_packet_name(dh->dccph_type)); |
755 | return 1; |
756 | } |
757 | |
758 | /* |
759 | * If P.CsCov is too large for the packet size, drop packet and return. |
760 | * This must come _before_ checksumming (not as RFC 4340 suggests). |
761 | */ |
762 | cscov = dccp_csum_coverage(skb); |
763 | if (cscov > skb->len) { |
764 | DCCP_WARN("P.CsCov %u exceeds packet length %d\n", |
765 | dh->dccph_cscov, skb->len); |
766 | return 1; |
767 | } |
768 | |
769 | /* If header checksum is incorrect, drop packet and return. |
770 | * (This step is completed in the AF-dependent functions.) */ |
771 | skb->csum = skb_checksum(skb, 0, cscov, 0); |
772 | |
773 | return 0; |
774 | } |
775 | |
776 | EXPORT_SYMBOL_GPL(dccp_invalid_packet); |
777 | |
778 | /* this is called when real data arrives */ |
779 | static int dccp_v4_rcv(struct sk_buff *skb) |
780 | { |
781 | const struct dccp_hdr *dh; |
782 | const struct iphdr *iph; |
783 | struct sock *sk; |
784 | int min_cov; |
785 | |
786 | /* Step 1: Check header basics */ |
787 | |
788 | if (dccp_invalid_packet(skb)) |
789 | goto discard_it; |
790 | |
791 | iph = ip_hdr(skb); |
792 | /* Step 1: If header checksum is incorrect, drop packet and return */ |
793 | if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) { |
794 | DCCP_WARN("dropped packet with invalid checksum\n"); |
795 | goto discard_it; |
796 | } |
797 | |
798 | dh = dccp_hdr(skb); |
799 | |
800 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); |
801 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; |
802 | |
803 | dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu", |
804 | dccp_packet_name(dh->dccph_type), |
805 | &iph->saddr, ntohs(dh->dccph_sport), |
806 | &iph->daddr, ntohs(dh->dccph_dport), |
807 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); |
808 | |
809 | if (dccp_packet_without_ack(skb)) { |
810 | DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; |
811 | dccp_pr_debug_cat("\n"); |
812 | } else { |
813 | DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); |
814 | dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) |
815 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
816 | } |
817 | |
818 | /* Step 2: |
819 | * Look up flow ID in table and get corresponding socket */ |
820 | sk = __inet_lookup_skb(&dccp_hashinfo, skb, |
821 | dh->dccph_sport, dh->dccph_dport); |
822 | /* |
823 | * Step 2: |
824 | * If no socket ... |
825 | */ |
826 | if (sk == NULL) { |
827 | dccp_pr_debug("failed to look up flow ID in table and " |
828 | "get corresponding socket\n"); |
829 | goto no_dccp_socket; |
830 | } |
831 | |
832 | /* |
833 | * Step 2: |
834 | * ... or S.state == TIMEWAIT, |
835 | * Generate Reset(No Connection) unless P.type == Reset |
836 | * Drop packet and return |
837 | */ |
838 | if (sk->sk_state == DCCP_TIME_WAIT) { |
839 | dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); |
840 | inet_twsk_put(inet_twsk(sk)); |
841 | goto no_dccp_socket; |
842 | } |
843 | |
844 | /* |
845 | * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage |
846 | * o if MinCsCov = 0, only packets with CsCov = 0 are accepted |
847 | * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov |
848 | */ |
849 | min_cov = dccp_sk(sk)->dccps_pcrlen; |
850 | if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { |
851 | dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", |
852 | dh->dccph_cscov, min_cov); |
853 | /* FIXME: "Such packets SHOULD be reported using Data Dropped |
854 | * options (Section 11.7) with Drop Code 0, Protocol |
855 | * Constraints." */ |
856 | goto discard_and_relse; |
857 | } |
858 | |
859 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
860 | goto discard_and_relse; |
861 | nf_reset(skb); |
862 | |
863 | return sk_receive_skb(sk, skb, 1); |
864 | |
865 | no_dccp_socket: |
866 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
867 | goto discard_it; |
868 | /* |
869 | * Step 2: |
870 | * If no socket ... |
871 | * Generate Reset(No Connection) unless P.type == Reset |
872 | * Drop packet and return |
873 | */ |
874 | if (dh->dccph_type != DCCP_PKT_RESET) { |
875 | DCCP_SKB_CB(skb)->dccpd_reset_code = |
876 | DCCP_RESET_CODE_NO_CONNECTION; |
877 | dccp_v4_ctl_send_reset(sk, skb); |
878 | } |
879 | |
880 | discard_it: |
881 | kfree_skb(skb); |
882 | return 0; |
883 | |
884 | discard_and_relse: |
885 | sock_put(sk); |
886 | goto discard_it; |
887 | } |
888 | |
889 | static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { |
890 | .queue_xmit = ip_queue_xmit, |
891 | .send_check = dccp_v4_send_check, |
892 | .rebuild_header = inet_sk_rebuild_header, |
893 | .conn_request = dccp_v4_conn_request, |
894 | .syn_recv_sock = dccp_v4_request_recv_sock, |
895 | .net_header_len = sizeof(struct iphdr), |
896 | .setsockopt = ip_setsockopt, |
897 | .getsockopt = ip_getsockopt, |
898 | .addr2sockaddr = inet_csk_addr2sockaddr, |
899 | .sockaddr_len = sizeof(struct sockaddr_in), |
900 | .bind_conflict = inet_csk_bind_conflict, |
901 | #ifdef CONFIG_COMPAT |
902 | .compat_setsockopt = compat_ip_setsockopt, |
903 | .compat_getsockopt = compat_ip_getsockopt, |
904 | #endif |
905 | }; |
906 | |
907 | static int dccp_v4_init_sock(struct sock *sk) |
908 | { |
909 | static __u8 dccp_v4_ctl_sock_initialized; |
910 | int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized); |
911 | |
912 | if (err == 0) { |
913 | if (unlikely(!dccp_v4_ctl_sock_initialized)) |
914 | dccp_v4_ctl_sock_initialized = 1; |
915 | inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; |
916 | } |
917 | |
918 | return err; |
919 | } |
920 | |
921 | static struct timewait_sock_ops dccp_timewait_sock_ops = { |
922 | .twsk_obj_size = sizeof(struct inet_timewait_sock), |
923 | }; |
924 | |
925 | static struct proto dccp_v4_prot = { |
926 | .name = "DCCP", |
927 | .owner = THIS_MODULE, |
928 | .close = dccp_close, |
929 | .connect = dccp_v4_connect, |
930 | .disconnect = dccp_disconnect, |
931 | .ioctl = dccp_ioctl, |
932 | .init = dccp_v4_init_sock, |
933 | .setsockopt = dccp_setsockopt, |
934 | .getsockopt = dccp_getsockopt, |
935 | .sendmsg = dccp_sendmsg, |
936 | .recvmsg = dccp_recvmsg, |
937 | .backlog_rcv = dccp_v4_do_rcv, |
938 | .hash = inet_hash, |
939 | .unhash = inet_unhash, |
940 | .accept = inet_csk_accept, |
941 | .get_port = inet_csk_get_port, |
942 | .shutdown = dccp_shutdown, |
943 | .destroy = dccp_destroy_sock, |
944 | .orphan_count = &dccp_orphan_count, |
945 | .max_header = MAX_DCCP_HEADER, |
946 | .obj_size = sizeof(struct dccp_sock), |
947 | .slab_flags = SLAB_DESTROY_BY_RCU, |
948 | .rsk_prot = &dccp_request_sock_ops, |
949 | .twsk_prot = &dccp_timewait_sock_ops, |
950 | .h.hashinfo = &dccp_hashinfo, |
951 | #ifdef CONFIG_COMPAT |
952 | .compat_setsockopt = compat_dccp_setsockopt, |
953 | .compat_getsockopt = compat_dccp_getsockopt, |
954 | #endif |
955 | }; |
956 | |
957 | static const struct net_protocol dccp_v4_protocol = { |
958 | .handler = dccp_v4_rcv, |
959 | .err_handler = dccp_v4_err, |
960 | .no_policy = 1, |
961 | .netns_ok = 1, |
962 | }; |
963 | |
964 | static const struct proto_ops inet_dccp_ops = { |
965 | .family = PF_INET, |
966 | .owner = THIS_MODULE, |
967 | .release = inet_release, |
968 | .bind = inet_bind, |
969 | .connect = inet_stream_connect, |
970 | .socketpair = sock_no_socketpair, |
971 | .accept = inet_accept, |
972 | .getname = inet_getname, |
973 | /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ |
974 | .poll = dccp_poll, |
975 | .ioctl = inet_ioctl, |
976 | /* FIXME: work on inet_listen to rename it to sock_common_listen */ |
977 | .listen = inet_dccp_listen, |
978 | .shutdown = inet_shutdown, |
979 | .setsockopt = sock_common_setsockopt, |
980 | .getsockopt = sock_common_getsockopt, |
981 | .sendmsg = inet_sendmsg, |
982 | .recvmsg = sock_common_recvmsg, |
983 | .mmap = sock_no_mmap, |
984 | .sendpage = sock_no_sendpage, |
985 | #ifdef CONFIG_COMPAT |
986 | .compat_setsockopt = compat_sock_common_setsockopt, |
987 | .compat_getsockopt = compat_sock_common_getsockopt, |
988 | #endif |
989 | }; |
990 | |
991 | static struct inet_protosw dccp_v4_protosw = { |
992 | .type = SOCK_DCCP, |
993 | .protocol = IPPROTO_DCCP, |
994 | .prot = &dccp_v4_prot, |
995 | .ops = &inet_dccp_ops, |
996 | .no_check = 0, |
997 | .flags = INET_PROTOSW_ICSK, |
998 | }; |
999 | |
1000 | static int __net_init dccp_v4_init_net(struct net *net) |
1001 | { |
1002 | if (dccp_hashinfo.bhash == NULL) |
1003 | return -ESOCKTNOSUPPORT; |
1004 | |
1005 | return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, |
1006 | SOCK_DCCP, IPPROTO_DCCP, net); |
1007 | } |
1008 | |
1009 | static void __net_exit dccp_v4_exit_net(struct net *net) |
1010 | { |
1011 | inet_ctl_sock_destroy(net->dccp.v4_ctl_sk); |
1012 | } |
1013 | |
1014 | static struct pernet_operations dccp_v4_ops = { |
1015 | .init = dccp_v4_init_net, |
1016 | .exit = dccp_v4_exit_net, |
1017 | }; |
1018 | |
1019 | static int __init dccp_v4_init(void) |
1020 | { |
1021 | int err = proto_register(&dccp_v4_prot, 1); |
1022 | |
1023 | if (err != 0) |
1024 | goto out; |
1025 | |
1026 | err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); |
1027 | if (err != 0) |
1028 | goto out_proto_unregister; |
1029 | |
1030 | inet_register_protosw(&dccp_v4_protosw); |
1031 | |
1032 | err = register_pernet_subsys(&dccp_v4_ops); |
1033 | if (err) |
1034 | goto out_destroy_ctl_sock; |
1035 | out: |
1036 | return err; |
1037 | out_destroy_ctl_sock: |
1038 | inet_unregister_protosw(&dccp_v4_protosw); |
1039 | inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); |
1040 | out_proto_unregister: |
1041 | proto_unregister(&dccp_v4_prot); |
1042 | goto out; |
1043 | } |
1044 | |
1045 | static void __exit dccp_v4_exit(void) |
1046 | { |
1047 | unregister_pernet_subsys(&dccp_v4_ops); |
1048 | inet_unregister_protosw(&dccp_v4_protosw); |
1049 | inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); |
1050 | proto_unregister(&dccp_v4_prot); |
1051 | } |
1052 | |
1053 | module_init(dccp_v4_init); |
1054 | module_exit(dccp_v4_exit); |
1055 | |
1056 | /* |
1057 | * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) |
1058 | * values directly, Also cover the case where the protocol is not specified, |
1059 | * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP |
1060 | */ |
1061 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6); |
1062 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6); |
1063 | MODULE_LICENSE("GPL"); |
1064 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); |
1065 | MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); |
1066 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9