Root/
1 | /* |
2 | * NET3 Protocol independent device support routines. |
3 | * |
4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License |
6 | * as published by the Free Software Foundation; either version |
7 | * 2 of the License, or (at your option) any later version. |
8 | * |
9 | * Derived from the non IP parts of dev.c 1.0.19 |
10 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> |
13 | * |
14 | * Additional Authors: |
15 | * Florian la Roche <rzsfl@rz.uni-sb.de> |
16 | * Alan Cox <gw4pts@gw4pts.ampr.org> |
17 | * David Hinds <dahinds@users.sourceforge.net> |
18 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
19 | * Adam Sulmicki <adam@cfar.umd.edu> |
20 | * Pekka Riikonen <priikone@poesidon.pspt.fi> |
21 | * |
22 | * Changes: |
23 | * D.J. Barrow : Fixed bug where dev->refcnt gets set |
24 | * to 2 if register_netdev gets called |
25 | * before net_dev_init & also removed a |
26 | * few lines of code in the process. |
27 | * Alan Cox : device private ioctl copies fields back. |
28 | * Alan Cox : Transmit queue code does relevant |
29 | * stunts to keep the queue safe. |
30 | * Alan Cox : Fixed double lock. |
31 | * Alan Cox : Fixed promisc NULL pointer trap |
32 | * ???????? : Support the full private ioctl range |
33 | * Alan Cox : Moved ioctl permission check into |
34 | * drivers |
35 | * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI |
36 | * Alan Cox : 100 backlog just doesn't cut it when |
37 | * you start doing multicast video 8) |
38 | * Alan Cox : Rewrote net_bh and list manager. |
39 | * Alan Cox : Fix ETH_P_ALL echoback lengths. |
40 | * Alan Cox : Took out transmit every packet pass |
41 | * Saved a few bytes in the ioctl handler |
42 | * Alan Cox : Network driver sets packet type before |
43 | * calling netif_rx. Saves a function |
44 | * call a packet. |
45 | * Alan Cox : Hashed net_bh() |
46 | * Richard Kooijman: Timestamp fixes. |
47 | * Alan Cox : Wrong field in SIOCGIFDSTADDR |
48 | * Alan Cox : Device lock protection. |
49 | * Alan Cox : Fixed nasty side effect of device close |
50 | * changes. |
51 | * Rudi Cilibrasi : Pass the right thing to |
52 | * set_mac_address() |
53 | * Dave Miller : 32bit quantity for the device lock to |
54 | * make it work out on a Sparc. |
55 | * Bjorn Ekwall : Added KERNELD hack. |
56 | * Alan Cox : Cleaned up the backlog initialise. |
57 | * Craig Metz : SIOCGIFCONF fix if space for under |
58 | * 1 device. |
59 | * Thomas Bogendoerfer : Return ENODEV for dev_open, if there |
60 | * is no device open function. |
61 | * Andi Kleen : Fix error reporting for SIOCGIFCONF |
62 | * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF |
63 | * Cyrus Durgin : Cleaned for KMOD |
64 | * Adam Sulmicki : Bug Fix : Network Device Unload |
65 | * A network device unload needs to purge |
66 | * the backlog queue. |
67 | * Paul Rusty Russell : SIOCSIFNAME |
68 | * Pekka Riikonen : Netdev boot-time settings code |
69 | * Andrew Morton : Make unregister_netdevice wait |
70 | * indefinitely on dev->refcnt |
71 | * J Hadi Salim : - Backlog queue sampling |
72 | * - netif_rx() feedback |
73 | */ |
74 | |
75 | #include <asm/uaccess.h> |
76 | #include <asm/system.h> |
77 | #include <linux/bitops.h> |
78 | #include <linux/capability.h> |
79 | #include <linux/cpu.h> |
80 | #include <linux/types.h> |
81 | #include <linux/kernel.h> |
82 | #include <linux/sched.h> |
83 | #include <linux/mutex.h> |
84 | #include <linux/string.h> |
85 | #include <linux/mm.h> |
86 | #include <linux/socket.h> |
87 | #include <linux/sockios.h> |
88 | #include <linux/errno.h> |
89 | #include <linux/interrupt.h> |
90 | #include <linux/if_ether.h> |
91 | #include <linux/netdevice.h> |
92 | #include <linux/etherdevice.h> |
93 | #include <linux/ethtool.h> |
94 | #include <linux/notifier.h> |
95 | #include <linux/skbuff.h> |
96 | #include <net/net_namespace.h> |
97 | #include <net/sock.h> |
98 | #include <linux/rtnetlink.h> |
99 | #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) |
100 | #include <linux/imq.h> |
101 | #endif |
102 | #include <linux/proc_fs.h> |
103 | #include <linux/seq_file.h> |
104 | #include <linux/stat.h> |
105 | #include <linux/if_bridge.h> |
106 | #include <linux/if_macvlan.h> |
107 | #include <net/dst.h> |
108 | #include <net/pkt_sched.h> |
109 | #include <net/checksum.h> |
110 | #include <linux/highmem.h> |
111 | #include <linux/init.h> |
112 | #include <linux/kmod.h> |
113 | #include <linux/module.h> |
114 | #include <linux/netpoll.h> |
115 | #include <linux/rcupdate.h> |
116 | #include <linux/delay.h> |
117 | #include <net/wext.h> |
118 | #include <net/iw_handler.h> |
119 | #include <asm/current.h> |
120 | #include <linux/audit.h> |
121 | #include <linux/dmaengine.h> |
122 | #include <linux/err.h> |
123 | #include <linux/ctype.h> |
124 | #include <linux/if_arp.h> |
125 | #include <linux/if_vlan.h> |
126 | #include <linux/ip.h> |
127 | #include <net/ip.h> |
128 | #include <linux/ipv6.h> |
129 | #include <linux/in.h> |
130 | #include <linux/jhash.h> |
131 | #include <linux/random.h> |
132 | #include <trace/events/napi.h> |
133 | |
134 | #include "net-sysfs.h" |
135 | |
136 | /* Instead of increasing this, you should create a hash table. */ |
137 | #define MAX_GRO_SKBS 8 |
138 | |
139 | /* This should be increased if a protocol with a bigger head is added. */ |
140 | #define GRO_MAX_HEAD (MAX_HEADER + 128) |
141 | |
142 | /* |
143 | * The list of packet types we will receive (as opposed to discard) |
144 | * and the routines to invoke. |
145 | * |
146 | * Why 16. Because with 16 the only overlap we get on a hash of the |
147 | * low nibble of the protocol value is RARP/SNAP/X.25. |
148 | * |
149 | * NOTE: That is no longer true with the addition of VLAN tags. Not |
150 | * sure which should go first, but I bet it won't make much |
151 | * difference if we are running VLANs. The good news is that |
152 | * this protocol won't be in the list unless compiled in, so |
153 | * the average user (w/out VLANs) will not be adversely affected. |
154 | * --BLG |
155 | * |
156 | * 0800 IP |
157 | * 8100 802.1Q VLAN |
158 | * 0001 802.3 |
159 | * 0002 AX.25 |
160 | * 0004 802.2 |
161 | * 8035 RARP |
162 | * 0005 SNAP |
163 | * 0805 X.25 |
164 | * 0806 ARP |
165 | * 8137 IPX |
166 | * 0009 Localtalk |
167 | * 86DD IPv6 |
168 | */ |
169 | |
170 | #define PTYPE_HASH_SIZE (16) |
171 | #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) |
172 | |
173 | static DEFINE_SPINLOCK(ptype_lock); |
174 | static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; |
175 | static struct list_head ptype_all __read_mostly; /* Taps */ |
176 | |
177 | /* |
178 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl |
179 | * semaphore. |
180 | * |
181 | * Pure readers hold dev_base_lock for reading. |
182 | * |
183 | * Writers must hold the rtnl semaphore while they loop through the |
184 | * dev_base_head list, and hold dev_base_lock for writing when they do the |
185 | * actual updates. This allows pure readers to access the list even |
186 | * while a writer is preparing to update it. |
187 | * |
188 | * To put it another way, dev_base_lock is held for writing only to |
189 | * protect against pure readers; the rtnl semaphore provides the |
190 | * protection against other writers. |
191 | * |
192 | * See, for example usages, register_netdevice() and |
193 | * unregister_netdevice(), which must be called with the rtnl |
194 | * semaphore held. |
195 | */ |
196 | DEFINE_RWLOCK(dev_base_lock); |
197 | EXPORT_SYMBOL(dev_base_lock); |
198 | |
199 | #define NETDEV_HASHBITS 8 |
200 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) |
201 | |
202 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) |
203 | { |
204 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); |
205 | return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; |
206 | } |
207 | |
208 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) |
209 | { |
210 | return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; |
211 | } |
212 | |
213 | /* Device list insertion */ |
214 | static int list_netdevice(struct net_device *dev) |
215 | { |
216 | struct net *net = dev_net(dev); |
217 | |
218 | ASSERT_RTNL(); |
219 | |
220 | write_lock_bh(&dev_base_lock); |
221 | list_add_tail(&dev->dev_list, &net->dev_base_head); |
222 | hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); |
223 | hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); |
224 | write_unlock_bh(&dev_base_lock); |
225 | return 0; |
226 | } |
227 | |
228 | /* Device list removal */ |
229 | static void unlist_netdevice(struct net_device *dev) |
230 | { |
231 | ASSERT_RTNL(); |
232 | |
233 | /* Unlink dev from the device chain */ |
234 | write_lock_bh(&dev_base_lock); |
235 | list_del(&dev->dev_list); |
236 | hlist_del(&dev->name_hlist); |
237 | hlist_del(&dev->index_hlist); |
238 | write_unlock_bh(&dev_base_lock); |
239 | } |
240 | |
241 | /* |
242 | * Our notifier list |
243 | */ |
244 | |
245 | static RAW_NOTIFIER_HEAD(netdev_chain); |
246 | |
247 | /* |
248 | * Device drivers call our routines to queue packets here. We empty the |
249 | * queue in the local softnet handler. |
250 | */ |
251 | |
252 | DEFINE_PER_CPU(struct softnet_data, softnet_data); |
253 | EXPORT_PER_CPU_SYMBOL(softnet_data); |
254 | |
255 | #ifdef CONFIG_LOCKDEP |
256 | /* |
257 | * register_netdevice() inits txq->_xmit_lock and sets lockdep class |
258 | * according to dev->type |
259 | */ |
260 | static const unsigned short netdev_lock_type[] = |
261 | {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, |
262 | ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, |
263 | ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, |
264 | ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, |
265 | ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, |
266 | ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, |
267 | ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, |
268 | ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, |
269 | ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, |
270 | ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, |
271 | ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, |
272 | ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, |
273 | ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, |
274 | ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, |
275 | ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, |
276 | ARPHRD_VOID, ARPHRD_NONE}; |
277 | |
278 | static const char *const netdev_lock_name[] = |
279 | {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", |
280 | "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", |
281 | "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", |
282 | "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", |
283 | "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", |
284 | "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", |
285 | "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", |
286 | "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", |
287 | "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", |
288 | "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", |
289 | "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", |
290 | "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", |
291 | "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", |
292 | "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", |
293 | "_xmit_PHONET_PIPE", "_xmit_IEEE802154", |
294 | "_xmit_VOID", "_xmit_NONE"}; |
295 | |
296 | static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
297 | static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
298 | |
299 | static inline unsigned short netdev_lock_pos(unsigned short dev_type) |
300 | { |
301 | int i; |
302 | |
303 | for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) |
304 | if (netdev_lock_type[i] == dev_type) |
305 | return i; |
306 | /* the last key is used by default */ |
307 | return ARRAY_SIZE(netdev_lock_type) - 1; |
308 | } |
309 | |
310 | static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, |
311 | unsigned short dev_type) |
312 | { |
313 | int i; |
314 | |
315 | i = netdev_lock_pos(dev_type); |
316 | lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], |
317 | netdev_lock_name[i]); |
318 | } |
319 | |
320 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) |
321 | { |
322 | int i; |
323 | |
324 | i = netdev_lock_pos(dev->type); |
325 | lockdep_set_class_and_name(&dev->addr_list_lock, |
326 | &netdev_addr_lock_key[i], |
327 | netdev_lock_name[i]); |
328 | } |
329 | #else |
330 | static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, |
331 | unsigned short dev_type) |
332 | { |
333 | } |
334 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) |
335 | { |
336 | } |
337 | #endif |
338 | |
339 | /******************************************************************************* |
340 | |
341 | Protocol management and registration routines |
342 | |
343 | *******************************************************************************/ |
344 | |
345 | /* |
346 | * Add a protocol ID to the list. Now that the input handler is |
347 | * smarter we can dispense with all the messy stuff that used to be |
348 | * here. |
349 | * |
350 | * BEWARE!!! Protocol handlers, mangling input packets, |
351 | * MUST BE last in hash buckets and checking protocol handlers |
352 | * MUST start from promiscuous ptype_all chain in net_bh. |
353 | * It is true now, do not change it. |
354 | * Explanation follows: if protocol handler, mangling packet, will |
355 | * be the first on list, it is not able to sense, that packet |
356 | * is cloned and should be copied-on-write, so that it will |
357 | * change it and subsequent readers will get broken packet. |
358 | * --ANK (980803) |
359 | */ |
360 | |
361 | /** |
362 | * dev_add_pack - add packet handler |
363 | * @pt: packet type declaration |
364 | * |
365 | * Add a protocol handler to the networking stack. The passed &packet_type |
366 | * is linked into kernel lists and may not be freed until it has been |
367 | * removed from the kernel lists. |
368 | * |
369 | * This call does not sleep therefore it can not |
370 | * guarantee all CPU's that are in middle of receiving packets |
371 | * will see the new packet type (until the next received packet). |
372 | */ |
373 | |
374 | void dev_add_pack(struct packet_type *pt) |
375 | { |
376 | int hash; |
377 | |
378 | spin_lock_bh(&ptype_lock); |
379 | if (pt->type == htons(ETH_P_ALL)) |
380 | list_add_rcu(&pt->list, &ptype_all); |
381 | else { |
382 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; |
383 | list_add_rcu(&pt->list, &ptype_base[hash]); |
384 | } |
385 | spin_unlock_bh(&ptype_lock); |
386 | } |
387 | EXPORT_SYMBOL(dev_add_pack); |
388 | |
389 | /** |
390 | * __dev_remove_pack - remove packet handler |
391 | * @pt: packet type declaration |
392 | * |
393 | * Remove a protocol handler that was previously added to the kernel |
394 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed |
395 | * from the kernel lists and can be freed or reused once this function |
396 | * returns. |
397 | * |
398 | * The packet type might still be in use by receivers |
399 | * and must not be freed until after all the CPU's have gone |
400 | * through a quiescent state. |
401 | */ |
402 | void __dev_remove_pack(struct packet_type *pt) |
403 | { |
404 | struct list_head *head; |
405 | struct packet_type *pt1; |
406 | |
407 | spin_lock_bh(&ptype_lock); |
408 | |
409 | if (pt->type == htons(ETH_P_ALL)) |
410 | head = &ptype_all; |
411 | else |
412 | head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; |
413 | |
414 | list_for_each_entry(pt1, head, list) { |
415 | if (pt == pt1) { |
416 | list_del_rcu(&pt->list); |
417 | goto out; |
418 | } |
419 | } |
420 | |
421 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
422 | out: |
423 | spin_unlock_bh(&ptype_lock); |
424 | } |
425 | EXPORT_SYMBOL(__dev_remove_pack); |
426 | |
427 | /** |
428 | * dev_remove_pack - remove packet handler |
429 | * @pt: packet type declaration |
430 | * |
431 | * Remove a protocol handler that was previously added to the kernel |
432 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed |
433 | * from the kernel lists and can be freed or reused once this function |
434 | * returns. |
435 | * |
436 | * This call sleeps to guarantee that no CPU is looking at the packet |
437 | * type after return. |
438 | */ |
439 | void dev_remove_pack(struct packet_type *pt) |
440 | { |
441 | __dev_remove_pack(pt); |
442 | |
443 | synchronize_net(); |
444 | } |
445 | EXPORT_SYMBOL(dev_remove_pack); |
446 | |
447 | /****************************************************************************** |
448 | |
449 | Device Boot-time Settings Routines |
450 | |
451 | *******************************************************************************/ |
452 | |
453 | /* Boot time configuration table */ |
454 | static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; |
455 | |
456 | /** |
457 | * netdev_boot_setup_add - add new setup entry |
458 | * @name: name of the device |
459 | * @map: configured settings for the device |
460 | * |
461 | * Adds new setup entry to the dev_boot_setup list. The function |
462 | * returns 0 on error and 1 on success. This is a generic routine to |
463 | * all netdevices. |
464 | */ |
465 | static int netdev_boot_setup_add(char *name, struct ifmap *map) |
466 | { |
467 | struct netdev_boot_setup *s; |
468 | int i; |
469 | |
470 | s = dev_boot_setup; |
471 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
472 | if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { |
473 | memset(s[i].name, 0, sizeof(s[i].name)); |
474 | strlcpy(s[i].name, name, IFNAMSIZ); |
475 | memcpy(&s[i].map, map, sizeof(s[i].map)); |
476 | break; |
477 | } |
478 | } |
479 | |
480 | return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; |
481 | } |
482 | |
483 | /** |
484 | * netdev_boot_setup_check - check boot time settings |
485 | * @dev: the netdevice |
486 | * |
487 | * Check boot time settings for the device. |
488 | * The found settings are set for the device to be used |
489 | * later in the device probing. |
490 | * Returns 0 if no settings found, 1 if they are. |
491 | */ |
492 | int netdev_boot_setup_check(struct net_device *dev) |
493 | { |
494 | struct netdev_boot_setup *s = dev_boot_setup; |
495 | int i; |
496 | |
497 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
498 | if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && |
499 | !strcmp(dev->name, s[i].name)) { |
500 | dev->irq = s[i].map.irq; |
501 | dev->base_addr = s[i].map.base_addr; |
502 | dev->mem_start = s[i].map.mem_start; |
503 | dev->mem_end = s[i].map.mem_end; |
504 | return 1; |
505 | } |
506 | } |
507 | return 0; |
508 | } |
509 | EXPORT_SYMBOL(netdev_boot_setup_check); |
510 | |
511 | |
512 | /** |
513 | * netdev_boot_base - get address from boot time settings |
514 | * @prefix: prefix for network device |
515 | * @unit: id for network device |
516 | * |
517 | * Check boot time settings for the base address of device. |
518 | * The found settings are set for the device to be used |
519 | * later in the device probing. |
520 | * Returns 0 if no settings found. |
521 | */ |
522 | unsigned long netdev_boot_base(const char *prefix, int unit) |
523 | { |
524 | const struct netdev_boot_setup *s = dev_boot_setup; |
525 | char name[IFNAMSIZ]; |
526 | int i; |
527 | |
528 | sprintf(name, "%s%d", prefix, unit); |
529 | |
530 | /* |
531 | * If device already registered then return base of 1 |
532 | * to indicate not to probe for this interface |
533 | */ |
534 | if (__dev_get_by_name(&init_net, name)) |
535 | return 1; |
536 | |
537 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) |
538 | if (!strcmp(name, s[i].name)) |
539 | return s[i].map.base_addr; |
540 | return 0; |
541 | } |
542 | |
543 | /* |
544 | * Saves at boot time configured settings for any netdevice. |
545 | */ |
546 | int __init netdev_boot_setup(char *str) |
547 | { |
548 | int ints[5]; |
549 | struct ifmap map; |
550 | |
551 | str = get_options(str, ARRAY_SIZE(ints), ints); |
552 | if (!str || !*str) |
553 | return 0; |
554 | |
555 | /* Save settings */ |
556 | memset(&map, 0, sizeof(map)); |
557 | if (ints[0] > 0) |
558 | map.irq = ints[1]; |
559 | if (ints[0] > 1) |
560 | map.base_addr = ints[2]; |
561 | if (ints[0] > 2) |
562 | map.mem_start = ints[3]; |
563 | if (ints[0] > 3) |
564 | map.mem_end = ints[4]; |
565 | |
566 | /* Add new entry to the list */ |
567 | return netdev_boot_setup_add(str, &map); |
568 | } |
569 | |
570 | __setup("netdev=", netdev_boot_setup); |
571 | |
572 | /******************************************************************************* |
573 | |
574 | Device Interface Subroutines |
575 | |
576 | *******************************************************************************/ |
577 | |
578 | /** |
579 | * __dev_get_by_name - find a device by its name |
580 | * @net: the applicable net namespace |
581 | * @name: name to find |
582 | * |
583 | * Find an interface by name. Must be called under RTNL semaphore |
584 | * or @dev_base_lock. If the name is found a pointer to the device |
585 | * is returned. If the name is not found then %NULL is returned. The |
586 | * reference counters are not incremented so the caller must be |
587 | * careful with locks. |
588 | */ |
589 | |
590 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
591 | { |
592 | struct hlist_node *p; |
593 | |
594 | hlist_for_each(p, dev_name_hash(net, name)) { |
595 | struct net_device *dev |
596 | = hlist_entry(p, struct net_device, name_hlist); |
597 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
598 | return dev; |
599 | } |
600 | return NULL; |
601 | } |
602 | EXPORT_SYMBOL(__dev_get_by_name); |
603 | |
604 | /** |
605 | * dev_get_by_name - find a device by its name |
606 | * @net: the applicable net namespace |
607 | * @name: name to find |
608 | * |
609 | * Find an interface by name. This can be called from any |
610 | * context and does its own locking. The returned handle has |
611 | * the usage count incremented and the caller must use dev_put() to |
612 | * release it when it is no longer needed. %NULL is returned if no |
613 | * matching device is found. |
614 | */ |
615 | |
616 | struct net_device *dev_get_by_name(struct net *net, const char *name) |
617 | { |
618 | struct net_device *dev; |
619 | |
620 | read_lock(&dev_base_lock); |
621 | dev = __dev_get_by_name(net, name); |
622 | if (dev) |
623 | dev_hold(dev); |
624 | read_unlock(&dev_base_lock); |
625 | return dev; |
626 | } |
627 | EXPORT_SYMBOL(dev_get_by_name); |
628 | |
629 | /** |
630 | * __dev_get_by_index - find a device by its ifindex |
631 | * @net: the applicable net namespace |
632 | * @ifindex: index of device |
633 | * |
634 | * Search for an interface by index. Returns %NULL if the device |
635 | * is not found or a pointer to the device. The device has not |
636 | * had its reference counter increased so the caller must be careful |
637 | * about locking. The caller must hold either the RTNL semaphore |
638 | * or @dev_base_lock. |
639 | */ |
640 | |
641 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) |
642 | { |
643 | struct hlist_node *p; |
644 | |
645 | hlist_for_each(p, dev_index_hash(net, ifindex)) { |
646 | struct net_device *dev |
647 | = hlist_entry(p, struct net_device, index_hlist); |
648 | if (dev->ifindex == ifindex) |
649 | return dev; |
650 | } |
651 | return NULL; |
652 | } |
653 | EXPORT_SYMBOL(__dev_get_by_index); |
654 | |
655 | |
656 | /** |
657 | * dev_get_by_index - find a device by its ifindex |
658 | * @net: the applicable net namespace |
659 | * @ifindex: index of device |
660 | * |
661 | * Search for an interface by index. Returns NULL if the device |
662 | * is not found or a pointer to the device. The device returned has |
663 | * had a reference added and the pointer is safe until the user calls |
664 | * dev_put to indicate they have finished with it. |
665 | */ |
666 | |
667 | struct net_device *dev_get_by_index(struct net *net, int ifindex) |
668 | { |
669 | struct net_device *dev; |
670 | |
671 | read_lock(&dev_base_lock); |
672 | dev = __dev_get_by_index(net, ifindex); |
673 | if (dev) |
674 | dev_hold(dev); |
675 | read_unlock(&dev_base_lock); |
676 | return dev; |
677 | } |
678 | EXPORT_SYMBOL(dev_get_by_index); |
679 | |
680 | /** |
681 | * dev_getbyhwaddr - find a device by its hardware address |
682 | * @net: the applicable net namespace |
683 | * @type: media type of device |
684 | * @ha: hardware address |
685 | * |
686 | * Search for an interface by MAC address. Returns NULL if the device |
687 | * is not found or a pointer to the device. The caller must hold the |
688 | * rtnl semaphore. The returned device has not had its ref count increased |
689 | * and the caller must therefore be careful about locking |
690 | * |
691 | * BUGS: |
692 | * If the API was consistent this would be __dev_get_by_hwaddr |
693 | */ |
694 | |
695 | struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) |
696 | { |
697 | struct net_device *dev; |
698 | |
699 | ASSERT_RTNL(); |
700 | |
701 | for_each_netdev(net, dev) |
702 | if (dev->type == type && |
703 | !memcmp(dev->dev_addr, ha, dev->addr_len)) |
704 | return dev; |
705 | |
706 | return NULL; |
707 | } |
708 | EXPORT_SYMBOL(dev_getbyhwaddr); |
709 | |
710 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) |
711 | { |
712 | struct net_device *dev; |
713 | |
714 | ASSERT_RTNL(); |
715 | for_each_netdev(net, dev) |
716 | if (dev->type == type) |
717 | return dev; |
718 | |
719 | return NULL; |
720 | } |
721 | EXPORT_SYMBOL(__dev_getfirstbyhwtype); |
722 | |
723 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) |
724 | { |
725 | struct net_device *dev; |
726 | |
727 | rtnl_lock(); |
728 | dev = __dev_getfirstbyhwtype(net, type); |
729 | if (dev) |
730 | dev_hold(dev); |
731 | rtnl_unlock(); |
732 | return dev; |
733 | } |
734 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
735 | |
736 | /** |
737 | * dev_get_by_flags - find any device with given flags |
738 | * @net: the applicable net namespace |
739 | * @if_flags: IFF_* values |
740 | * @mask: bitmask of bits in if_flags to check |
741 | * |
742 | * Search for any interface with the given flags. Returns NULL if a device |
743 | * is not found or a pointer to the device. The device returned has |
744 | * had a reference added and the pointer is safe until the user calls |
745 | * dev_put to indicate they have finished with it. |
746 | */ |
747 | |
748 | struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, |
749 | unsigned short mask) |
750 | { |
751 | struct net_device *dev, *ret; |
752 | |
753 | ret = NULL; |
754 | read_lock(&dev_base_lock); |
755 | for_each_netdev(net, dev) { |
756 | if (((dev->flags ^ if_flags) & mask) == 0) { |
757 | dev_hold(dev); |
758 | ret = dev; |
759 | break; |
760 | } |
761 | } |
762 | read_unlock(&dev_base_lock); |
763 | return ret; |
764 | } |
765 | EXPORT_SYMBOL(dev_get_by_flags); |
766 | |
767 | /** |
768 | * dev_valid_name - check if name is okay for network device |
769 | * @name: name string |
770 | * |
771 | * Network device names need to be valid file names to |
772 | * to allow sysfs to work. We also disallow any kind of |
773 | * whitespace. |
774 | */ |
775 | int dev_valid_name(const char *name) |
776 | { |
777 | if (*name == '\0') |
778 | return 0; |
779 | if (strlen(name) >= IFNAMSIZ) |
780 | return 0; |
781 | if (!strcmp(name, ".") || !strcmp(name, "..")) |
782 | return 0; |
783 | |
784 | while (*name) { |
785 | if (*name == '/' || isspace(*name)) |
786 | return 0; |
787 | name++; |
788 | } |
789 | return 1; |
790 | } |
791 | EXPORT_SYMBOL(dev_valid_name); |
792 | |
793 | /** |
794 | * __dev_alloc_name - allocate a name for a device |
795 | * @net: network namespace to allocate the device name in |
796 | * @name: name format string |
797 | * @buf: scratch buffer and result name string |
798 | * |
799 | * Passed a format string - eg "lt%d" it will try and find a suitable |
800 | * id. It scans list of devices to build up a free map, then chooses |
801 | * the first empty slot. The caller must hold the dev_base or rtnl lock |
802 | * while allocating the name and adding the device in order to avoid |
803 | * duplicates. |
804 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). |
805 | * Returns the number of the unit assigned or a negative errno code. |
806 | */ |
807 | |
808 | static int __dev_alloc_name(struct net *net, const char *name, char *buf) |
809 | { |
810 | int i = 0; |
811 | const char *p; |
812 | const int max_netdevices = 8*PAGE_SIZE; |
813 | unsigned long *inuse; |
814 | struct net_device *d; |
815 | |
816 | p = strnchr(name, IFNAMSIZ-1, '%'); |
817 | if (p) { |
818 | /* |
819 | * Verify the string as this thing may have come from |
820 | * the user. There must be either one "%d" and no other "%" |
821 | * characters. |
822 | */ |
823 | if (p[1] != 'd' || strchr(p + 2, '%')) |
824 | return -EINVAL; |
825 | |
826 | /* Use one page as a bit array of possible slots */ |
827 | inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); |
828 | if (!inuse) |
829 | return -ENOMEM; |
830 | |
831 | for_each_netdev(net, d) { |
832 | if (!sscanf(d->name, name, &i)) |
833 | continue; |
834 | if (i < 0 || i >= max_netdevices) |
835 | continue; |
836 | |
837 | /* avoid cases where sscanf is not exact inverse of printf */ |
838 | snprintf(buf, IFNAMSIZ, name, i); |
839 | if (!strncmp(buf, d->name, IFNAMSIZ)) |
840 | set_bit(i, inuse); |
841 | } |
842 | |
843 | i = find_first_zero_bit(inuse, max_netdevices); |
844 | free_page((unsigned long) inuse); |
845 | } |
846 | |
847 | snprintf(buf, IFNAMSIZ, name, i); |
848 | if (!__dev_get_by_name(net, buf)) |
849 | return i; |
850 | |
851 | /* It is possible to run out of possible slots |
852 | * when the name is long and there isn't enough space left |
853 | * for the digits, or if all bits are used. |
854 | */ |
855 | return -ENFILE; |
856 | } |
857 | |
858 | /** |
859 | * dev_alloc_name - allocate a name for a device |
860 | * @dev: device |
861 | * @name: name format string |
862 | * |
863 | * Passed a format string - eg "lt%d" it will try and find a suitable |
864 | * id. It scans list of devices to build up a free map, then chooses |
865 | * the first empty slot. The caller must hold the dev_base or rtnl lock |
866 | * while allocating the name and adding the device in order to avoid |
867 | * duplicates. |
868 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). |
869 | * Returns the number of the unit assigned or a negative errno code. |
870 | */ |
871 | |
872 | int dev_alloc_name(struct net_device *dev, const char *name) |
873 | { |
874 | char buf[IFNAMSIZ]; |
875 | struct net *net; |
876 | int ret; |
877 | |
878 | BUG_ON(!dev_net(dev)); |
879 | net = dev_net(dev); |
880 | ret = __dev_alloc_name(net, name, buf); |
881 | if (ret >= 0) |
882 | strlcpy(dev->name, buf, IFNAMSIZ); |
883 | return ret; |
884 | } |
885 | EXPORT_SYMBOL(dev_alloc_name); |
886 | |
887 | |
888 | /** |
889 | * dev_change_name - change name of a device |
890 | * @dev: device |
891 | * @newname: name (or format string) must be at least IFNAMSIZ |
892 | * |
893 | * Change name of a device, can pass format strings "eth%d". |
894 | * for wildcarding. |
895 | */ |
896 | int dev_change_name(struct net_device *dev, const char *newname) |
897 | { |
898 | char oldname[IFNAMSIZ]; |
899 | int err = 0; |
900 | int ret; |
901 | struct net *net; |
902 | |
903 | ASSERT_RTNL(); |
904 | BUG_ON(!dev_net(dev)); |
905 | |
906 | net = dev_net(dev); |
907 | if (dev->flags & IFF_UP) |
908 | return -EBUSY; |
909 | |
910 | if (!dev_valid_name(newname)) |
911 | return -EINVAL; |
912 | |
913 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) |
914 | return 0; |
915 | |
916 | memcpy(oldname, dev->name, IFNAMSIZ); |
917 | |
918 | if (strchr(newname, '%')) { |
919 | err = dev_alloc_name(dev, newname); |
920 | if (err < 0) |
921 | return err; |
922 | } else if (__dev_get_by_name(net, newname)) |
923 | return -EEXIST; |
924 | else |
925 | strlcpy(dev->name, newname, IFNAMSIZ); |
926 | |
927 | rollback: |
928 | /* For now only devices in the initial network namespace |
929 | * are in sysfs. |
930 | */ |
931 | if (net == &init_net) { |
932 | ret = device_rename(&dev->dev, dev->name); |
933 | if (ret) { |
934 | memcpy(dev->name, oldname, IFNAMSIZ); |
935 | return ret; |
936 | } |
937 | } |
938 | |
939 | write_lock_bh(&dev_base_lock); |
940 | hlist_del(&dev->name_hlist); |
941 | hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); |
942 | write_unlock_bh(&dev_base_lock); |
943 | |
944 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); |
945 | ret = notifier_to_errno(ret); |
946 | |
947 | if (ret) { |
948 | /* err >= 0 after dev_alloc_name() or stores the first errno */ |
949 | if (err >= 0) { |
950 | err = ret; |
951 | memcpy(dev->name, oldname, IFNAMSIZ); |
952 | goto rollback; |
953 | } else { |
954 | printk(KERN_ERR |
955 | "%s: name change rollback failed: %d.\n", |
956 | dev->name, ret); |
957 | } |
958 | } |
959 | |
960 | return err; |
961 | } |
962 | |
963 | /** |
964 | * dev_set_alias - change ifalias of a device |
965 | * @dev: device |
966 | * @alias: name up to IFALIASZ |
967 | * @len: limit of bytes to copy from info |
968 | * |
969 | * Set ifalias for a device, |
970 | */ |
971 | int dev_set_alias(struct net_device *dev, const char *alias, size_t len) |
972 | { |
973 | ASSERT_RTNL(); |
974 | |
975 | if (len >= IFALIASZ) |
976 | return -EINVAL; |
977 | |
978 | if (!len) { |
979 | if (dev->ifalias) { |
980 | kfree(dev->ifalias); |
981 | dev->ifalias = NULL; |
982 | } |
983 | return 0; |
984 | } |
985 | |
986 | dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); |
987 | if (!dev->ifalias) |
988 | return -ENOMEM; |
989 | |
990 | strlcpy(dev->ifalias, alias, len+1); |
991 | return len; |
992 | } |
993 | |
994 | |
995 | /** |
996 | * netdev_features_change - device changes features |
997 | * @dev: device to cause notification |
998 | * |
999 | * Called to indicate a device has changed features. |
1000 | */ |
1001 | void netdev_features_change(struct net_device *dev) |
1002 | { |
1003 | call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); |
1004 | } |
1005 | EXPORT_SYMBOL(netdev_features_change); |
1006 | |
1007 | /** |
1008 | * netdev_state_change - device changes state |
1009 | * @dev: device to cause notification |
1010 | * |
1011 | * Called to indicate a device has changed state. This function calls |
1012 | * the notifier chains for netdev_chain and sends a NEWLINK message |
1013 | * to the routing socket. |
1014 | */ |
1015 | void netdev_state_change(struct net_device *dev) |
1016 | { |
1017 | if (dev->flags & IFF_UP) { |
1018 | call_netdevice_notifiers(NETDEV_CHANGE, dev); |
1019 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); |
1020 | } |
1021 | } |
1022 | EXPORT_SYMBOL(netdev_state_change); |
1023 | |
1024 | void netdev_bonding_change(struct net_device *dev, unsigned long event) |
1025 | { |
1026 | call_netdevice_notifiers(event, dev); |
1027 | } |
1028 | EXPORT_SYMBOL(netdev_bonding_change); |
1029 | |
1030 | /** |
1031 | * dev_load - load a network module |
1032 | * @net: the applicable net namespace |
1033 | * @name: name of interface |
1034 | * |
1035 | * If a network interface is not present and the process has suitable |
1036 | * privileges this function loads the module. If module loading is not |
1037 | * available in this kernel then it becomes a nop. |
1038 | */ |
1039 | |
1040 | void dev_load(struct net *net, const char *name) |
1041 | { |
1042 | struct net_device *dev; |
1043 | |
1044 | read_lock(&dev_base_lock); |
1045 | dev = __dev_get_by_name(net, name); |
1046 | read_unlock(&dev_base_lock); |
1047 | |
1048 | if (!dev && capable(CAP_NET_ADMIN)) |
1049 | request_module("%s", name); |
1050 | } |
1051 | EXPORT_SYMBOL(dev_load); |
1052 | |
1053 | /** |
1054 | * dev_open - prepare an interface for use. |
1055 | * @dev: device to open |
1056 | * |
1057 | * Takes a device from down to up state. The device's private open |
1058 | * function is invoked and then the multicast lists are loaded. Finally |
1059 | * the device is moved into the up state and a %NETDEV_UP message is |
1060 | * sent to the netdev notifier chain. |
1061 | * |
1062 | * Calling this function on an active interface is a nop. On a failure |
1063 | * a negative errno code is returned. |
1064 | */ |
1065 | int dev_open(struct net_device *dev) |
1066 | { |
1067 | const struct net_device_ops *ops = dev->netdev_ops; |
1068 | int ret; |
1069 | |
1070 | ASSERT_RTNL(); |
1071 | |
1072 | /* |
1073 | * Is it already up? |
1074 | */ |
1075 | |
1076 | if (dev->flags & IFF_UP) |
1077 | return 0; |
1078 | |
1079 | /* |
1080 | * Is it even present? |
1081 | */ |
1082 | if (!netif_device_present(dev)) |
1083 | return -ENODEV; |
1084 | |
1085 | ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); |
1086 | ret = notifier_to_errno(ret); |
1087 | if (ret) |
1088 | return ret; |
1089 | |
1090 | /* |
1091 | * Call device private open method |
1092 | */ |
1093 | set_bit(__LINK_STATE_START, &dev->state); |
1094 | |
1095 | if (ops->ndo_validate_addr) |
1096 | ret = ops->ndo_validate_addr(dev); |
1097 | |
1098 | if (!ret && ops->ndo_open) |
1099 | ret = ops->ndo_open(dev); |
1100 | |
1101 | /* |
1102 | * If it went open OK then: |
1103 | */ |
1104 | |
1105 | if (ret) |
1106 | clear_bit(__LINK_STATE_START, &dev->state); |
1107 | else { |
1108 | /* |
1109 | * Set the flags. |
1110 | */ |
1111 | dev->flags |= IFF_UP; |
1112 | |
1113 | /* |
1114 | * Enable NET_DMA |
1115 | */ |
1116 | net_dmaengine_get(); |
1117 | |
1118 | /* |
1119 | * Initialize multicasting status |
1120 | */ |
1121 | dev_set_rx_mode(dev); |
1122 | |
1123 | /* |
1124 | * Wakeup transmit queue engine |
1125 | */ |
1126 | dev_activate(dev); |
1127 | |
1128 | /* |
1129 | * ... and announce new interface. |
1130 | */ |
1131 | call_netdevice_notifiers(NETDEV_UP, dev); |
1132 | } |
1133 | |
1134 | return ret; |
1135 | } |
1136 | EXPORT_SYMBOL(dev_open); |
1137 | |
1138 | /** |
1139 | * dev_close - shutdown an interface. |
1140 | * @dev: device to shutdown |
1141 | * |
1142 | * This function moves an active device into down state. A |
1143 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device |
1144 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier |
1145 | * chain. |
1146 | */ |
1147 | int dev_close(struct net_device *dev) |
1148 | { |
1149 | const struct net_device_ops *ops = dev->netdev_ops; |
1150 | ASSERT_RTNL(); |
1151 | |
1152 | might_sleep(); |
1153 | |
1154 | if (!(dev->flags & IFF_UP)) |
1155 | return 0; |
1156 | |
1157 | /* |
1158 | * Tell people we are going down, so that they can |
1159 | * prepare to death, when device is still operating. |
1160 | */ |
1161 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); |
1162 | |
1163 | clear_bit(__LINK_STATE_START, &dev->state); |
1164 | |
1165 | /* Synchronize to scheduled poll. We cannot touch poll list, |
1166 | * it can be even on different cpu. So just clear netif_running(). |
1167 | * |
1168 | * dev->stop() will invoke napi_disable() on all of it's |
1169 | * napi_struct instances on this device. |
1170 | */ |
1171 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ |
1172 | |
1173 | dev_deactivate(dev); |
1174 | |
1175 | /* |
1176 | * Call the device specific close. This cannot fail. |
1177 | * Only if device is UP |
1178 | * |
1179 | * We allow it to be called even after a DETACH hot-plug |
1180 | * event. |
1181 | */ |
1182 | if (ops->ndo_stop) |
1183 | ops->ndo_stop(dev); |
1184 | |
1185 | /* |
1186 | * Device is now down. |
1187 | */ |
1188 | |
1189 | dev->flags &= ~IFF_UP; |
1190 | |
1191 | /* |
1192 | * Tell people we are down |
1193 | */ |
1194 | call_netdevice_notifiers(NETDEV_DOWN, dev); |
1195 | |
1196 | /* |
1197 | * Shutdown NET_DMA |
1198 | */ |
1199 | net_dmaengine_put(); |
1200 | |
1201 | return 0; |
1202 | } |
1203 | EXPORT_SYMBOL(dev_close); |
1204 | |
1205 | |
1206 | /** |
1207 | * dev_disable_lro - disable Large Receive Offload on a device |
1208 | * @dev: device |
1209 | * |
1210 | * Disable Large Receive Offload (LRO) on a net device. Must be |
1211 | * called under RTNL. This is needed if received packets may be |
1212 | * forwarded to another interface. |
1213 | */ |
1214 | void dev_disable_lro(struct net_device *dev) |
1215 | { |
1216 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags && |
1217 | dev->ethtool_ops->set_flags) { |
1218 | u32 flags = dev->ethtool_ops->get_flags(dev); |
1219 | if (flags & ETH_FLAG_LRO) { |
1220 | flags &= ~ETH_FLAG_LRO; |
1221 | dev->ethtool_ops->set_flags(dev, flags); |
1222 | } |
1223 | } |
1224 | WARN_ON(dev->features & NETIF_F_LRO); |
1225 | } |
1226 | EXPORT_SYMBOL(dev_disable_lro); |
1227 | |
1228 | |
1229 | static int dev_boot_phase = 1; |
1230 | |
1231 | /* |
1232 | * Device change register/unregister. These are not inline or static |
1233 | * as we export them to the world. |
1234 | */ |
1235 | |
1236 | /** |
1237 | * register_netdevice_notifier - register a network notifier block |
1238 | * @nb: notifier |
1239 | * |
1240 | * Register a notifier to be called when network device events occur. |
1241 | * The notifier passed is linked into the kernel structures and must |
1242 | * not be reused until it has been unregistered. A negative errno code |
1243 | * is returned on a failure. |
1244 | * |
1245 | * When registered all registration and up events are replayed |
1246 | * to the new notifier to allow device to have a race free |
1247 | * view of the network device list. |
1248 | */ |
1249 | |
1250 | int register_netdevice_notifier(struct notifier_block *nb) |
1251 | { |
1252 | struct net_device *dev; |
1253 | struct net_device *last; |
1254 | struct net *net; |
1255 | int err; |
1256 | |
1257 | rtnl_lock(); |
1258 | err = raw_notifier_chain_register(&netdev_chain, nb); |
1259 | if (err) |
1260 | goto unlock; |
1261 | if (dev_boot_phase) |
1262 | goto unlock; |
1263 | for_each_net(net) { |
1264 | for_each_netdev(net, dev) { |
1265 | err = nb->notifier_call(nb, NETDEV_REGISTER, dev); |
1266 | err = notifier_to_errno(err); |
1267 | if (err) |
1268 | goto rollback; |
1269 | |
1270 | if (!(dev->flags & IFF_UP)) |
1271 | continue; |
1272 | |
1273 | nb->notifier_call(nb, NETDEV_UP, dev); |
1274 | } |
1275 | } |
1276 | |
1277 | unlock: |
1278 | rtnl_unlock(); |
1279 | return err; |
1280 | |
1281 | rollback: |
1282 | last = dev; |
1283 | for_each_net(net) { |
1284 | for_each_netdev(net, dev) { |
1285 | if (dev == last) |
1286 | break; |
1287 | |
1288 | if (dev->flags & IFF_UP) { |
1289 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); |
1290 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1291 | } |
1292 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1293 | } |
1294 | } |
1295 | |
1296 | raw_notifier_chain_unregister(&netdev_chain, nb); |
1297 | goto unlock; |
1298 | } |
1299 | EXPORT_SYMBOL(register_netdevice_notifier); |
1300 | |
1301 | /** |
1302 | * unregister_netdevice_notifier - unregister a network notifier block |
1303 | * @nb: notifier |
1304 | * |
1305 | * Unregister a notifier previously registered by |
1306 | * register_netdevice_notifier(). The notifier is unlinked into the |
1307 | * kernel structures and may then be reused. A negative errno code |
1308 | * is returned on a failure. |
1309 | */ |
1310 | |
1311 | int unregister_netdevice_notifier(struct notifier_block *nb) |
1312 | { |
1313 | int err; |
1314 | |
1315 | rtnl_lock(); |
1316 | err = raw_notifier_chain_unregister(&netdev_chain, nb); |
1317 | rtnl_unlock(); |
1318 | return err; |
1319 | } |
1320 | EXPORT_SYMBOL(unregister_netdevice_notifier); |
1321 | |
1322 | /** |
1323 | * call_netdevice_notifiers - call all network notifier blocks |
1324 | * @val: value passed unmodified to notifier function |
1325 | * @dev: net_device pointer passed unmodified to notifier function |
1326 | * |
1327 | * Call all network notifier blocks. Parameters and return value |
1328 | * are as for raw_notifier_call_chain(). |
1329 | */ |
1330 | |
1331 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
1332 | { |
1333 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1334 | } |
1335 | |
1336 | /* When > 0 there are consumers of rx skb time stamps */ |
1337 | static atomic_t netstamp_needed = ATOMIC_INIT(0); |
1338 | |
1339 | void net_enable_timestamp(void) |
1340 | { |
1341 | atomic_inc(&netstamp_needed); |
1342 | } |
1343 | EXPORT_SYMBOL(net_enable_timestamp); |
1344 | |
1345 | void net_disable_timestamp(void) |
1346 | { |
1347 | atomic_dec(&netstamp_needed); |
1348 | } |
1349 | EXPORT_SYMBOL(net_disable_timestamp); |
1350 | |
1351 | static inline void net_timestamp(struct sk_buff *skb) |
1352 | { |
1353 | if (atomic_read(&netstamp_needed)) |
1354 | __net_timestamp(skb); |
1355 | else |
1356 | skb->tstamp.tv64 = 0; |
1357 | } |
1358 | |
1359 | /* |
1360 | * Support routine. Sends outgoing frames to any network |
1361 | * taps currently in use. |
1362 | */ |
1363 | |
1364 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
1365 | { |
1366 | struct packet_type *ptype; |
1367 | |
1368 | #ifdef CONFIG_NET_CLS_ACT |
1369 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) |
1370 | net_timestamp(skb); |
1371 | #else |
1372 | net_timestamp(skb); |
1373 | #endif |
1374 | |
1375 | rcu_read_lock(); |
1376 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
1377 | /* Never send packets back to the socket |
1378 | * they originated from - MvS (miquels@drinkel.ow.org) |
1379 | */ |
1380 | if ((ptype->dev == dev || !ptype->dev) && |
1381 | (ptype->af_packet_priv == NULL || |
1382 | (struct sock *)ptype->af_packet_priv != skb->sk)) { |
1383 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1384 | if (!skb2) |
1385 | break; |
1386 | |
1387 | /* skb->nh should be correctly |
1388 | set by sender, so that the second statement is |
1389 | just protection against buggy protocols. |
1390 | */ |
1391 | skb_reset_mac_header(skb2); |
1392 | |
1393 | if (skb_network_header(skb2) < skb2->data || |
1394 | skb2->network_header > skb2->tail) { |
1395 | if (net_ratelimit()) |
1396 | printk(KERN_CRIT "protocol %04x is " |
1397 | "buggy, dev %s\n", |
1398 | skb2->protocol, dev->name); |
1399 | skb_reset_network_header(skb2); |
1400 | } |
1401 | |
1402 | skb2->transport_header = skb2->network_header; |
1403 | skb2->pkt_type = PACKET_OUTGOING; |
1404 | ptype->func(skb2, skb->dev, ptype, skb->dev); |
1405 | } |
1406 | } |
1407 | rcu_read_unlock(); |
1408 | } |
1409 | |
1410 | |
1411 | static inline void __netif_reschedule(struct Qdisc *q) |
1412 | { |
1413 | struct softnet_data *sd; |
1414 | unsigned long flags; |
1415 | |
1416 | local_irq_save(flags); |
1417 | sd = &__get_cpu_var(softnet_data); |
1418 | q->next_sched = sd->output_queue; |
1419 | sd->output_queue = q; |
1420 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
1421 | local_irq_restore(flags); |
1422 | } |
1423 | |
1424 | void __netif_schedule(struct Qdisc *q) |
1425 | { |
1426 | if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) |
1427 | __netif_reschedule(q); |
1428 | } |
1429 | EXPORT_SYMBOL(__netif_schedule); |
1430 | |
1431 | void dev_kfree_skb_irq(struct sk_buff *skb) |
1432 | { |
1433 | if (atomic_dec_and_test(&skb->users)) { |
1434 | struct softnet_data *sd; |
1435 | unsigned long flags; |
1436 | |
1437 | local_irq_save(flags); |
1438 | sd = &__get_cpu_var(softnet_data); |
1439 | skb->next = sd->completion_queue; |
1440 | sd->completion_queue = skb; |
1441 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
1442 | local_irq_restore(flags); |
1443 | } |
1444 | } |
1445 | EXPORT_SYMBOL(dev_kfree_skb_irq); |
1446 | |
1447 | void dev_kfree_skb_any(struct sk_buff *skb) |
1448 | { |
1449 | if (in_irq() || irqs_disabled()) |
1450 | dev_kfree_skb_irq(skb); |
1451 | else |
1452 | dev_kfree_skb(skb); |
1453 | } |
1454 | EXPORT_SYMBOL(dev_kfree_skb_any); |
1455 | |
1456 | |
1457 | /** |
1458 | * netif_device_detach - mark device as removed |
1459 | * @dev: network device |
1460 | * |
1461 | * Mark device as removed from system and therefore no longer available. |
1462 | */ |
1463 | void netif_device_detach(struct net_device *dev) |
1464 | { |
1465 | if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && |
1466 | netif_running(dev)) { |
1467 | netif_tx_stop_all_queues(dev); |
1468 | } |
1469 | } |
1470 | EXPORT_SYMBOL(netif_device_detach); |
1471 | |
1472 | /** |
1473 | * netif_device_attach - mark device as attached |
1474 | * @dev: network device |
1475 | * |
1476 | * Mark device as attached from system and restart if needed. |
1477 | */ |
1478 | void netif_device_attach(struct net_device *dev) |
1479 | { |
1480 | if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && |
1481 | netif_running(dev)) { |
1482 | netif_tx_wake_all_queues(dev); |
1483 | __netdev_watchdog_up(dev); |
1484 | } |
1485 | } |
1486 | EXPORT_SYMBOL(netif_device_attach); |
1487 | |
1488 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) |
1489 | { |
1490 | return ((features & NETIF_F_GEN_CSUM) || |
1491 | ((features & NETIF_F_IP_CSUM) && |
1492 | protocol == htons(ETH_P_IP)) || |
1493 | ((features & NETIF_F_IPV6_CSUM) && |
1494 | protocol == htons(ETH_P_IPV6)) || |
1495 | ((features & NETIF_F_FCOE_CRC) && |
1496 | protocol == htons(ETH_P_FCOE))); |
1497 | } |
1498 | |
1499 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) |
1500 | { |
1501 | if (can_checksum_protocol(dev->features, skb->protocol)) |
1502 | return true; |
1503 | |
1504 | if (skb->protocol == htons(ETH_P_8021Q)) { |
1505 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
1506 | if (can_checksum_protocol(dev->features & dev->vlan_features, |
1507 | veh->h_vlan_encapsulated_proto)) |
1508 | return true; |
1509 | } |
1510 | |
1511 | return false; |
1512 | } |
1513 | |
1514 | /* |
1515 | * Invalidate hardware checksum when packet is to be mangled, and |
1516 | * complete checksum manually on outgoing path. |
1517 | */ |
1518 | int skb_checksum_help(struct sk_buff *skb) |
1519 | { |
1520 | __wsum csum; |
1521 | int ret = 0, offset; |
1522 | |
1523 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
1524 | goto out_set_summed; |
1525 | |
1526 | if (unlikely(skb_shinfo(skb)->gso_size)) { |
1527 | /* Let GSO fix up the checksum. */ |
1528 | goto out_set_summed; |
1529 | } |
1530 | |
1531 | offset = skb->csum_start - skb_headroom(skb); |
1532 | BUG_ON(offset >= skb_headlen(skb)); |
1533 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
1534 | |
1535 | offset += skb->csum_offset; |
1536 | BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); |
1537 | |
1538 | if (skb_cloned(skb) && |
1539 | !skb_clone_writable(skb, offset + sizeof(__sum16))) { |
1540 | ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
1541 | if (ret) |
1542 | goto out; |
1543 | } |
1544 | |
1545 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); |
1546 | out_set_summed: |
1547 | skb->ip_summed = CHECKSUM_NONE; |
1548 | out: |
1549 | return ret; |
1550 | } |
1551 | EXPORT_SYMBOL(skb_checksum_help); |
1552 | |
1553 | /** |
1554 | * skb_gso_segment - Perform segmentation on skb. |
1555 | * @skb: buffer to segment |
1556 | * @features: features for the output path (see dev->features) |
1557 | * |
1558 | * This function segments the given skb and returns a list of segments. |
1559 | * |
1560 | * It may return NULL if the skb requires no segmentation. This is |
1561 | * only possible when GSO is used for verifying header integrity. |
1562 | */ |
1563 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) |
1564 | { |
1565 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1566 | struct packet_type *ptype; |
1567 | __be16 type = skb->protocol; |
1568 | int err; |
1569 | |
1570 | skb_reset_mac_header(skb); |
1571 | skb->mac_len = skb->network_header - skb->mac_header; |
1572 | __skb_pull(skb, skb->mac_len); |
1573 | |
1574 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1575 | struct net_device *dev = skb->dev; |
1576 | struct ethtool_drvinfo info = {}; |
1577 | |
1578 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) |
1579 | dev->ethtool_ops->get_drvinfo(dev, &info); |
1580 | |
1581 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " |
1582 | "ip_summed=%d", |
1583 | info.driver, dev ? dev->features : 0L, |
1584 | skb->sk ? skb->sk->sk_route_caps : 0L, |
1585 | skb->len, skb->data_len, skb->ip_summed); |
1586 | |
1587 | if (skb_header_cloned(skb) && |
1588 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
1589 | return ERR_PTR(err); |
1590 | } |
1591 | |
1592 | rcu_read_lock(); |
1593 | list_for_each_entry_rcu(ptype, |
1594 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
1595 | if (ptype->type == type && !ptype->dev && ptype->gso_segment) { |
1596 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1597 | err = ptype->gso_send_check(skb); |
1598 | segs = ERR_PTR(err); |
1599 | if (err || skb_gso_ok(skb, features)) |
1600 | break; |
1601 | __skb_push(skb, (skb->data - |
1602 | skb_network_header(skb))); |
1603 | } |
1604 | segs = ptype->gso_segment(skb, features); |
1605 | break; |
1606 | } |
1607 | } |
1608 | rcu_read_unlock(); |
1609 | |
1610 | __skb_push(skb, skb->data - skb_mac_header(skb)); |
1611 | |
1612 | return segs; |
1613 | } |
1614 | EXPORT_SYMBOL(skb_gso_segment); |
1615 | |
1616 | /* Take action when hardware reception checksum errors are detected. */ |
1617 | #ifdef CONFIG_BUG |
1618 | void netdev_rx_csum_fault(struct net_device *dev) |
1619 | { |
1620 | if (net_ratelimit()) { |
1621 | printk(KERN_ERR "%s: hw csum failure.\n", |
1622 | dev ? dev->name : "<unknown>"); |
1623 | dump_stack(); |
1624 | } |
1625 | } |
1626 | EXPORT_SYMBOL(netdev_rx_csum_fault); |
1627 | #endif |
1628 | |
1629 | /* Actually, we should eliminate this check as soon as we know, that: |
1630 | * 1. IOMMU is present and allows to map all the memory. |
1631 | * 2. No high memory really exists on this machine. |
1632 | */ |
1633 | |
1634 | static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
1635 | { |
1636 | #ifdef CONFIG_HIGHMEM |
1637 | int i; |
1638 | |
1639 | if (dev->features & NETIF_F_HIGHDMA) |
1640 | return 0; |
1641 | |
1642 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) |
1643 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) |
1644 | return 1; |
1645 | |
1646 | #endif |
1647 | return 0; |
1648 | } |
1649 | |
1650 | struct dev_gso_cb { |
1651 | void (*destructor)(struct sk_buff *skb); |
1652 | }; |
1653 | |
1654 | #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) |
1655 | |
1656 | static void dev_gso_skb_destructor(struct sk_buff *skb) |
1657 | { |
1658 | struct dev_gso_cb *cb; |
1659 | |
1660 | do { |
1661 | struct sk_buff *nskb = skb->next; |
1662 | |
1663 | skb->next = nskb->next; |
1664 | nskb->next = NULL; |
1665 | kfree_skb(nskb); |
1666 | } while (skb->next); |
1667 | |
1668 | cb = DEV_GSO_CB(skb); |
1669 | if (cb->destructor) |
1670 | cb->destructor(skb); |
1671 | } |
1672 | |
1673 | /** |
1674 | * dev_gso_segment - Perform emulated hardware segmentation on skb. |
1675 | * @skb: buffer to segment |
1676 | * |
1677 | * This function segments the given skb and stores the list of segments |
1678 | * in skb->next. |
1679 | */ |
1680 | static int dev_gso_segment(struct sk_buff *skb) |
1681 | { |
1682 | struct net_device *dev = skb->dev; |
1683 | struct sk_buff *segs; |
1684 | int features = dev->features & ~(illegal_highdma(dev, skb) ? |
1685 | NETIF_F_SG : 0); |
1686 | |
1687 | segs = skb_gso_segment(skb, features); |
1688 | |
1689 | /* Verifying header integrity only. */ |
1690 | if (!segs) |
1691 | return 0; |
1692 | |
1693 | if (IS_ERR(segs)) |
1694 | return PTR_ERR(segs); |
1695 | |
1696 | skb->next = segs; |
1697 | DEV_GSO_CB(skb)->destructor = skb->destructor; |
1698 | skb->destructor = dev_gso_skb_destructor; |
1699 | |
1700 | return 0; |
1701 | } |
1702 | |
1703 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
1704 | struct netdev_queue *txq) |
1705 | { |
1706 | const struct net_device_ops *ops = dev->netdev_ops; |
1707 | int rc; |
1708 | |
1709 | if (likely(!skb->next)) { |
1710 | if (!list_empty(&ptype_all) |
1711 | #if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) |
1712 | && !(skb->imq_flags & IMQ_F_ENQUEUE) |
1713 | #endif |
1714 | ) |
1715 | dev_queue_xmit_nit(skb, dev); |
1716 | |
1717 | if (netif_needs_gso(dev, skb)) { |
1718 | if (unlikely(dev_gso_segment(skb))) |
1719 | goto out_kfree_skb; |
1720 | if (skb->next) |
1721 | goto gso; |
1722 | } |
1723 | |
1724 | /* |
1725 | * If device doesnt need skb->dst, release it right now while |
1726 | * its hot in this cpu cache |
1727 | */ |
1728 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1729 | skb_dst_drop(skb); |
1730 | |
1731 | rc = ops->ndo_start_xmit(skb, dev); |
1732 | if (rc == NETDEV_TX_OK) |
1733 | txq_trans_update(txq); |
1734 | /* |
1735 | * TODO: if skb_orphan() was called by |
1736 | * dev->hard_start_xmit() (for example, the unmodified |
1737 | * igb driver does that; bnx2 doesn't), then |
1738 | * skb_tx_software_timestamp() will be unable to send |
1739 | * back the time stamp. |
1740 | * |
1741 | * How can this be prevented? Always create another |
1742 | * reference to the socket before calling |
1743 | * dev->hard_start_xmit()? Prevent that skb_orphan() |
1744 | * does anything in dev->hard_start_xmit() by clearing |
1745 | * the skb destructor before the call and restoring it |
1746 | * afterwards, then doing the skb_orphan() ourselves? |
1747 | */ |
1748 | return rc; |
1749 | } |
1750 | |
1751 | gso: |
1752 | do { |
1753 | struct sk_buff *nskb = skb->next; |
1754 | |
1755 | skb->next = nskb->next; |
1756 | nskb->next = NULL; |
1757 | rc = ops->ndo_start_xmit(nskb, dev); |
1758 | if (unlikely(rc != NETDEV_TX_OK)) { |
1759 | nskb->next = skb->next; |
1760 | skb->next = nskb; |
1761 | return rc; |
1762 | } |
1763 | txq_trans_update(txq); |
1764 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) |
1765 | return NETDEV_TX_BUSY; |
1766 | } while (skb->next); |
1767 | |
1768 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
1769 | |
1770 | out_kfree_skb: |
1771 | kfree_skb(skb); |
1772 | return NETDEV_TX_OK; |
1773 | } |
1774 | |
1775 | static u32 skb_tx_hashrnd; |
1776 | |
1777 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
1778 | { |
1779 | u32 hash; |
1780 | |
1781 | if (skb_rx_queue_recorded(skb)) { |
1782 | hash = skb_get_rx_queue(skb); |
1783 | while (unlikely(hash >= dev->real_num_tx_queues)) |
1784 | hash -= dev->real_num_tx_queues; |
1785 | return hash; |
1786 | } |
1787 | |
1788 | if (skb->sk && skb->sk->sk_hash) |
1789 | hash = skb->sk->sk_hash; |
1790 | else |
1791 | hash = skb->protocol; |
1792 | |
1793 | hash = jhash_1word(hash, skb_tx_hashrnd); |
1794 | |
1795 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
1796 | } |
1797 | EXPORT_SYMBOL(skb_tx_hash); |
1798 | |
1799 | struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) |
1800 | { |
1801 | const struct net_device_ops *ops = dev->netdev_ops; |
1802 | u16 queue_index = 0; |
1803 | |
1804 | if (ops->ndo_select_queue) |
1805 | queue_index = ops->ndo_select_queue(dev, skb); |
1806 | else if (dev->real_num_tx_queues > 1) |
1807 | queue_index = skb_tx_hash(dev, skb); |
1808 | |
1809 | skb_set_queue_mapping(skb, queue_index); |
1810 | return netdev_get_tx_queue(dev, queue_index); |
1811 | } |
1812 | EXPORT_SYMBOL(dev_pick_tx); |
1813 | |
1814 | static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, |
1815 | struct net_device *dev, |
1816 | struct netdev_queue *txq) |
1817 | { |
1818 | spinlock_t *root_lock = qdisc_lock(q); |
1819 | int rc; |
1820 | |
1821 | spin_lock(root_lock); |
1822 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { |
1823 | kfree_skb(skb); |
1824 | rc = NET_XMIT_DROP; |
1825 | } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && |
1826 | !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { |
1827 | /* |
1828 | * This is a work-conserving queue; there are no old skbs |
1829 | * waiting to be sent out; and the qdisc is not running - |
1830 | * xmit the skb directly. |
1831 | */ |
1832 | __qdisc_update_bstats(q, skb->len); |
1833 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) |
1834 | __qdisc_run(q); |
1835 | else |
1836 | clear_bit(__QDISC_STATE_RUNNING, &q->state); |
1837 | |
1838 | rc = NET_XMIT_SUCCESS; |
1839 | } else { |
1840 | rc = qdisc_enqueue_root(skb, q); |
1841 | qdisc_run(q); |
1842 | } |
1843 | spin_unlock(root_lock); |
1844 | |
1845 | return rc; |
1846 | } |
1847 | |
1848 | /** |
1849 | * dev_queue_xmit - transmit a buffer |
1850 | * @skb: buffer to transmit |
1851 | * |
1852 | * Queue a buffer for transmission to a network device. The caller must |
1853 | * have set the device and priority and built the buffer before calling |
1854 | * this function. The function can be called from an interrupt. |
1855 | * |
1856 | * A negative errno code is returned on a failure. A success does not |
1857 | * guarantee the frame will be transmitted as it may be dropped due |
1858 | * to congestion or traffic shaping. |
1859 | * |
1860 | * ----------------------------------------------------------------------------------- |
1861 | * I notice this method can also return errors from the queue disciplines, |
1862 | * including NET_XMIT_DROP, which is a positive value. So, errors can also |
1863 | * be positive. |
1864 | * |
1865 | * Regardless of the return value, the skb is consumed, so it is currently |
1866 | * difficult to retry a send to this method. (You can bump the ref count |
1867 | * before sending to hold a reference for retry if you are careful.) |
1868 | * |
1869 | * When calling this method, interrupts MUST be enabled. This is because |
1870 | * the BH enable code must have IRQs enabled so that it will not deadlock. |
1871 | * --BLG |
1872 | */ |
1873 | int dev_queue_xmit(struct sk_buff *skb) |
1874 | { |
1875 | struct net_device *dev = skb->dev; |
1876 | struct netdev_queue *txq; |
1877 | struct Qdisc *q; |
1878 | int rc = -ENOMEM; |
1879 | |
1880 | /* GSO will handle the following emulations directly. */ |
1881 | if (netif_needs_gso(dev, skb)) |
1882 | goto gso; |
1883 | |
1884 | if (skb_has_frags(skb) && |
1885 | !(dev->features & NETIF_F_FRAGLIST) && |
1886 | __skb_linearize(skb)) |
1887 | goto out_kfree_skb; |
1888 | |
1889 | /* Fragmented skb is linearized if device does not support SG, |
1890 | * or if at least one of fragments is in highmem and device |
1891 | * does not support DMA from it. |
1892 | */ |
1893 | if (skb_shinfo(skb)->nr_frags && |
1894 | (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && |
1895 | __skb_linearize(skb)) |
1896 | goto out_kfree_skb; |
1897 | |
1898 | /* If packet is not checksummed and device does not support |
1899 | * checksumming for this protocol, complete checksumming here. |
1900 | */ |
1901 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
1902 | skb_set_transport_header(skb, skb->csum_start - |
1903 | skb_headroom(skb)); |
1904 | if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) |
1905 | goto out_kfree_skb; |
1906 | } |
1907 | |
1908 | gso: |
1909 | /* Disable soft irqs for various locks below. Also |
1910 | * stops preemption for RCU. |
1911 | */ |
1912 | rcu_read_lock_bh(); |
1913 | |
1914 | txq = dev_pick_tx(dev, skb); |
1915 | q = rcu_dereference(txq->qdisc); |
1916 | |
1917 | #ifdef CONFIG_NET_CLS_ACT |
1918 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); |
1919 | #endif |
1920 | if (q->enqueue) { |
1921 | rc = __dev_xmit_skb(skb, q, dev, txq); |
1922 | goto out; |
1923 | } |
1924 | |
1925 | /* The device has no queue. Common case for software devices: |
1926 | loopback, all the sorts of tunnels... |
1927 | |
1928 | Really, it is unlikely that netif_tx_lock protection is necessary |
1929 | here. (f.e. loopback and IP tunnels are clean ignoring statistics |
1930 | counters.) |
1931 | However, it is possible, that they rely on protection |
1932 | made by us here. |
1933 | |
1934 | Check this and shot the lock. It is not prone from deadlocks. |
1935 | Either shot noqueue qdisc, it is even simpler 8) |
1936 | */ |
1937 | if (dev->flags & IFF_UP) { |
1938 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
1939 | |
1940 | if (txq->xmit_lock_owner != cpu) { |
1941 | |
1942 | HARD_TX_LOCK(dev, txq, cpu); |
1943 | |
1944 | if (!netif_tx_queue_stopped(txq)) { |
1945 | rc = NET_XMIT_SUCCESS; |
1946 | if (!dev_hard_start_xmit(skb, dev, txq)) { |
1947 | HARD_TX_UNLOCK(dev, txq); |
1948 | goto out; |
1949 | } |
1950 | } |
1951 | HARD_TX_UNLOCK(dev, txq); |
1952 | if (net_ratelimit()) |
1953 | printk(KERN_CRIT "Virtual device %s asks to " |
1954 | "queue packet!\n", dev->name); |
1955 | } else { |
1956 | /* Recursion is detected! It is possible, |
1957 | * unfortunately */ |
1958 | if (net_ratelimit()) |
1959 | printk(KERN_CRIT "Dead loop on virtual device " |
1960 | "%s, fix it urgently!\n", dev->name); |
1961 | } |
1962 | } |
1963 | |
1964 | rc = -ENETDOWN; |
1965 | rcu_read_unlock_bh(); |
1966 | |
1967 | out_kfree_skb: |
1968 | kfree_skb(skb); |
1969 | return rc; |
1970 | out: |
1971 | rcu_read_unlock_bh(); |
1972 | return rc; |
1973 | } |
1974 | EXPORT_SYMBOL(dev_queue_xmit); |
1975 | |
1976 | |
1977 | /*======================================================================= |
1978 | Receiver routines |
1979 | =======================================================================*/ |
1980 | |
1981 | int netdev_max_backlog __read_mostly = 1000; |
1982 | int netdev_budget __read_mostly = 300; |
1983 | int weight_p __read_mostly = 64; /* old backlog weight */ |
1984 | |
1985 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; |
1986 | |
1987 | |
1988 | /** |
1989 | * netif_rx - post buffer to the network code |
1990 | * @skb: buffer to post |
1991 | * |
1992 | * This function receives a packet from a device driver and queues it for |
1993 | * the upper (protocol) levels to process. It always succeeds. The buffer |
1994 | * may be dropped during processing for congestion control or by the |
1995 | * protocol layers. |
1996 | * |
1997 | * return values: |
1998 | * NET_RX_SUCCESS (no congestion) |
1999 | * NET_RX_DROP (packet was dropped) |
2000 | * |
2001 | */ |
2002 | |
2003 | int netif_rx(struct sk_buff *skb) |
2004 | { |
2005 | struct softnet_data *queue; |
2006 | unsigned long flags; |
2007 | |
2008 | /* if netpoll wants it, pretend we never saw it */ |
2009 | if (netpoll_rx(skb)) |
2010 | return NET_RX_DROP; |
2011 | |
2012 | if (!skb->tstamp.tv64) |
2013 | net_timestamp(skb); |
2014 | |
2015 | /* |
2016 | * The code is rearranged so that the path is the most |
2017 | * short when CPU is congested, but is still operating. |
2018 | */ |
2019 | local_irq_save(flags); |
2020 | queue = &__get_cpu_var(softnet_data); |
2021 | |
2022 | __get_cpu_var(netdev_rx_stat).total++; |
2023 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { |
2024 | if (queue->input_pkt_queue.qlen) { |
2025 | enqueue: |
2026 | __skb_queue_tail(&queue->input_pkt_queue, skb); |
2027 | local_irq_restore(flags); |
2028 | return NET_RX_SUCCESS; |
2029 | } |
2030 | |
2031 | napi_schedule(&queue->backlog); |
2032 | goto enqueue; |
2033 | } |
2034 | |
2035 | __get_cpu_var(netdev_rx_stat).dropped++; |
2036 | local_irq_restore(flags); |
2037 | |
2038 | kfree_skb(skb); |
2039 | return NET_RX_DROP; |
2040 | } |
2041 | EXPORT_SYMBOL(netif_rx); |
2042 | |
2043 | int netif_rx_ni(struct sk_buff *skb) |
2044 | { |
2045 | int err; |
2046 | |
2047 | preempt_disable(); |
2048 | err = netif_rx(skb); |
2049 | if (local_softirq_pending()) |
2050 | do_softirq(); |
2051 | preempt_enable(); |
2052 | |
2053 | return err; |
2054 | } |
2055 | EXPORT_SYMBOL(netif_rx_ni); |
2056 | |
2057 | static void net_tx_action(struct softirq_action *h) |
2058 | { |
2059 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
2060 | |
2061 | if (sd->completion_queue) { |
2062 | struct sk_buff *clist; |
2063 | |
2064 | local_irq_disable(); |
2065 | clist = sd->completion_queue; |
2066 | sd->completion_queue = NULL; |
2067 | local_irq_enable(); |
2068 | |
2069 | while (clist) { |
2070 | struct sk_buff *skb = clist; |
2071 | clist = clist->next; |
2072 | |
2073 | WARN_ON(atomic_read(&skb->users)); |
2074 | __kfree_skb(skb); |
2075 | } |
2076 | } |
2077 | |
2078 | if (sd->output_queue) { |
2079 | struct Qdisc *head; |
2080 | |
2081 | local_irq_disable(); |
2082 | head = sd->output_queue; |
2083 | sd->output_queue = NULL; |
2084 | local_irq_enable(); |
2085 | |
2086 | while (head) { |
2087 | struct Qdisc *q = head; |
2088 | spinlock_t *root_lock; |
2089 | |
2090 | head = head->next_sched; |
2091 | |
2092 | root_lock = qdisc_lock(q); |
2093 | if (spin_trylock(root_lock)) { |
2094 | smp_mb__before_clear_bit(); |
2095 | clear_bit(__QDISC_STATE_SCHED, |
2096 | &q->state); |
2097 | qdisc_run(q); |
2098 | spin_unlock(root_lock); |
2099 | } else { |
2100 | if (!test_bit(__QDISC_STATE_DEACTIVATED, |
2101 | &q->state)) { |
2102 | __netif_reschedule(q); |
2103 | } else { |
2104 | smp_mb__before_clear_bit(); |
2105 | clear_bit(__QDISC_STATE_SCHED, |
2106 | &q->state); |
2107 | } |
2108 | } |
2109 | } |
2110 | } |
2111 | } |
2112 | |
2113 | static inline int deliver_skb(struct sk_buff *skb, |
2114 | struct packet_type *pt_prev, |
2115 | struct net_device *orig_dev) |
2116 | { |
2117 | atomic_inc(&skb->users); |
2118 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2119 | } |
2120 | |
2121 | #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) |
2122 | |
2123 | #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) |
2124 | /* This hook is defined here for ATM LANE */ |
2125 | int (*br_fdb_test_addr_hook)(struct net_device *dev, |
2126 | unsigned char *addr) __read_mostly; |
2127 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); |
2128 | #endif |
2129 | |
2130 | /* |
2131 | * If bridge module is loaded call bridging hook. |
2132 | * returns NULL if packet was consumed. |
2133 | */ |
2134 | struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, |
2135 | struct sk_buff *skb) __read_mostly; |
2136 | EXPORT_SYMBOL_GPL(br_handle_frame_hook); |
2137 | |
2138 | static inline struct sk_buff *handle_bridge(struct sk_buff *skb, |
2139 | struct packet_type **pt_prev, int *ret, |
2140 | struct net_device *orig_dev) |
2141 | { |
2142 | struct net_bridge_port *port; |
2143 | |
2144 | if (skb->pkt_type == PACKET_LOOPBACK || |
2145 | (port = rcu_dereference(skb->dev->br_port)) == NULL) |
2146 | return skb; |
2147 | |
2148 | if (*pt_prev) { |
2149 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
2150 | *pt_prev = NULL; |
2151 | } |
2152 | |
2153 | return br_handle_frame_hook(port, skb); |
2154 | } |
2155 | #else |
2156 | #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) |
2157 | #endif |
2158 | |
2159 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) |
2160 | struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; |
2161 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); |
2162 | |
2163 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, |
2164 | struct packet_type **pt_prev, |
2165 | int *ret, |
2166 | struct net_device *orig_dev) |
2167 | { |
2168 | if (skb->dev->macvlan_port == NULL) |
2169 | return skb; |
2170 | |
2171 | if (*pt_prev) { |
2172 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
2173 | *pt_prev = NULL; |
2174 | } |
2175 | return macvlan_handle_frame_hook(skb); |
2176 | } |
2177 | #else |
2178 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) |
2179 | #endif |
2180 | |
2181 | #ifdef CONFIG_NET_CLS_ACT |
2182 | /* TODO: Maybe we should just force sch_ingress to be compiled in |
2183 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
2184 | * a compare and 2 stores extra right now if we dont have it on |
2185 | * but have CONFIG_NET_CLS_ACT |
2186 | * NOTE: This doesnt stop any functionality; if you dont have |
2187 | * the ingress scheduler, you just cant add policies on ingress. |
2188 | * |
2189 | */ |
2190 | static int ing_filter(struct sk_buff *skb) |
2191 | { |
2192 | struct net_device *dev = skb->dev; |
2193 | u32 ttl = G_TC_RTTL(skb->tc_verd); |
2194 | struct netdev_queue *rxq; |
2195 | int result = TC_ACT_OK; |
2196 | struct Qdisc *q; |
2197 | |
2198 | if (MAX_RED_LOOP < ttl++) { |
2199 | printk(KERN_WARNING |
2200 | "Redir loop detected Dropping packet (%d->%d)\n", |
2201 | skb->iif, dev->ifindex); |
2202 | return TC_ACT_SHOT; |
2203 | } |
2204 | |
2205 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); |
2206 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); |
2207 | |
2208 | rxq = &dev->rx_queue; |
2209 | |
2210 | q = rxq->qdisc; |
2211 | if (q != &noop_qdisc) { |
2212 | spin_lock(qdisc_lock(q)); |
2213 | if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) |
2214 | result = qdisc_enqueue_root(skb, q); |
2215 | spin_unlock(qdisc_lock(q)); |
2216 | } |
2217 | |
2218 | return result; |
2219 | } |
2220 | |
2221 | static inline struct sk_buff *handle_ing(struct sk_buff *skb, |
2222 | struct packet_type **pt_prev, |
2223 | int *ret, struct net_device *orig_dev) |
2224 | { |
2225 | if (skb->dev->rx_queue.qdisc == &noop_qdisc) |
2226 | goto out; |
2227 | |
2228 | if (*pt_prev) { |
2229 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
2230 | *pt_prev = NULL; |
2231 | } else { |
2232 | /* Huh? Why does turning on AF_PACKET affect this? */ |
2233 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); |
2234 | } |
2235 | |
2236 | switch (ing_filter(skb)) { |
2237 | case TC_ACT_SHOT: |
2238 | case TC_ACT_STOLEN: |
2239 | kfree_skb(skb); |
2240 | return NULL; |
2241 | } |
2242 | |
2243 | out: |
2244 | skb->tc_verd = 0; |
2245 | return skb; |
2246 | } |
2247 | #endif |
2248 | |
2249 | /* |
2250 | * netif_nit_deliver - deliver received packets to network taps |
2251 | * @skb: buffer |
2252 | * |
2253 | * This function is used to deliver incoming packets to network |
2254 | * taps. It should be used when the normal netif_receive_skb path |
2255 | * is bypassed, for example because of VLAN acceleration. |
2256 | */ |
2257 | void netif_nit_deliver(struct sk_buff *skb) |
2258 | { |
2259 | struct packet_type *ptype; |
2260 | |
2261 | if (list_empty(&ptype_all)) |
2262 | return; |
2263 | |
2264 | skb_reset_network_header(skb); |
2265 | skb_reset_transport_header(skb); |
2266 | skb->mac_len = skb->network_header - skb->mac_header; |
2267 | |
2268 | rcu_read_lock(); |
2269 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
2270 | if (!ptype->dev || ptype->dev == skb->dev) |
2271 | deliver_skb(skb, ptype, skb->dev); |
2272 | } |
2273 | rcu_read_unlock(); |
2274 | } |
2275 | |
2276 | /** |
2277 | * netif_receive_skb - process receive buffer from network |
2278 | * @skb: buffer to process |
2279 | * |
2280 | * netif_receive_skb() is the main receive data processing function. |
2281 | * It always succeeds. The buffer may be dropped during processing |
2282 | * for congestion control or by the protocol layers. |
2283 | * |
2284 | * This function may only be called from softirq context and interrupts |
2285 | * should be enabled. |
2286 | * |
2287 | * Return values (usually ignored): |
2288 | * NET_RX_SUCCESS: no congestion |
2289 | * NET_RX_DROP: packet was dropped |
2290 | */ |
2291 | int netif_receive_skb(struct sk_buff *skb) |
2292 | { |
2293 | struct packet_type *ptype, *pt_prev; |
2294 | struct net_device *orig_dev; |
2295 | struct net_device *null_or_orig; |
2296 | int ret = NET_RX_DROP; |
2297 | __be16 type; |
2298 | |
2299 | if (!skb->tstamp.tv64) |
2300 | net_timestamp(skb); |
2301 | |
2302 | if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) |
2303 | return NET_RX_SUCCESS; |
2304 | |
2305 | /* if we've gotten here through NAPI, check netpoll */ |
2306 | if (netpoll_receive_skb(skb)) |
2307 | return NET_RX_DROP; |
2308 | |
2309 | if (!skb->iif) |
2310 | skb->iif = skb->dev->ifindex; |
2311 | |
2312 | null_or_orig = NULL; |
2313 | orig_dev = skb->dev; |
2314 | if (orig_dev->master) { |
2315 | if (skb_bond_should_drop(skb)) |
2316 | null_or_orig = orig_dev; /* deliver only exact match */ |
2317 | else |
2318 | skb->dev = orig_dev->master; |
2319 | } |
2320 | |
2321 | __get_cpu_var(netdev_rx_stat).total++; |
2322 | |
2323 | skb_reset_network_header(skb); |
2324 | skb_reset_transport_header(skb); |
2325 | skb->mac_len = skb->network_header - skb->mac_header; |
2326 | |
2327 | pt_prev = NULL; |
2328 | |
2329 | rcu_read_lock(); |
2330 | |
2331 | #ifdef CONFIG_NET_CLS_ACT |
2332 | if (skb->tc_verd & TC_NCLS) { |
2333 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
2334 | goto ncls; |
2335 | } |
2336 | #endif |
2337 | |
2338 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
2339 | if (ptype->dev == null_or_orig || ptype->dev == skb->dev || |
2340 | ptype->dev == orig_dev) { |
2341 | if (pt_prev) |
2342 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2343 | pt_prev = ptype; |
2344 | } |
2345 | } |
2346 | |
2347 | #ifdef CONFIG_NET_CLS_ACT |
2348 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); |
2349 | if (!skb) |
2350 | goto out; |
2351 | ncls: |
2352 | #endif |
2353 | |
2354 | skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); |
2355 | if (!skb) |
2356 | goto out; |
2357 | skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); |
2358 | if (!skb) |
2359 | goto out; |
2360 | |
2361 | type = skb->protocol; |
2362 | list_for_each_entry_rcu(ptype, |
2363 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
2364 | if (ptype->type == type && |
2365 | (ptype->dev == null_or_orig || ptype->dev == skb->dev || |
2366 | ptype->dev == orig_dev)) { |
2367 | if (pt_prev) |
2368 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2369 | pt_prev = ptype; |
2370 | } |
2371 | } |
2372 | |
2373 | if (pt_prev) { |
2374 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2375 | } else { |
2376 | kfree_skb(skb); |
2377 | /* Jamal, now you will not able to escape explaining |
2378 | * me how you were going to use this. :-) |
2379 | */ |
2380 | ret = NET_RX_DROP; |
2381 | } |
2382 | |
2383 | out: |
2384 | rcu_read_unlock(); |
2385 | return ret; |
2386 | } |
2387 | EXPORT_SYMBOL(netif_receive_skb); |
2388 | |
2389 | /* Network device is going away, flush any packets still pending */ |
2390 | static void flush_backlog(void *arg) |
2391 | { |
2392 | struct net_device *dev = arg; |
2393 | struct softnet_data *queue = &__get_cpu_var(softnet_data); |
2394 | struct sk_buff *skb, *tmp; |
2395 | |
2396 | skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) |
2397 | if (skb->dev == dev) { |
2398 | __skb_unlink(skb, &queue->input_pkt_queue); |
2399 | kfree_skb(skb); |
2400 | } |
2401 | } |
2402 | |
2403 | static int napi_gro_complete(struct sk_buff *skb) |
2404 | { |
2405 | struct packet_type *ptype; |
2406 | __be16 type = skb->protocol; |
2407 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
2408 | int err = -ENOENT; |
2409 | |
2410 | if (NAPI_GRO_CB(skb)->count == 1) { |
2411 | skb_shinfo(skb)->gso_size = 0; |
2412 | goto out; |
2413 | } |
2414 | |
2415 | rcu_read_lock(); |
2416 | list_for_each_entry_rcu(ptype, head, list) { |
2417 | if (ptype->type != type || ptype->dev || !ptype->gro_complete) |
2418 | continue; |
2419 | |
2420 | err = ptype->gro_complete(skb); |
2421 | break; |
2422 | } |
2423 | rcu_read_unlock(); |
2424 | |
2425 | if (err) { |
2426 | WARN_ON(&ptype->list == head); |
2427 | kfree_skb(skb); |
2428 | return NET_RX_SUCCESS; |
2429 | } |
2430 | |
2431 | out: |
2432 | return netif_receive_skb(skb); |
2433 | } |
2434 | |
2435 | void napi_gro_flush(struct napi_struct *napi) |
2436 | { |
2437 | struct sk_buff *skb, *next; |
2438 | |
2439 | for (skb = napi->gro_list; skb; skb = next) { |
2440 | next = skb->next; |
2441 | skb->next = NULL; |
2442 | napi_gro_complete(skb); |
2443 | } |
2444 | |
2445 | napi->gro_count = 0; |
2446 | napi->gro_list = NULL; |
2447 | } |
2448 | EXPORT_SYMBOL(napi_gro_flush); |
2449 | |
2450 | int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2451 | { |
2452 | struct sk_buff **pp = NULL; |
2453 | struct packet_type *ptype; |
2454 | __be16 type = skb->protocol; |
2455 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
2456 | int same_flow; |
2457 | int mac_len; |
2458 | int ret; |
2459 | |
2460 | if (!(skb->dev->features & NETIF_F_GRO)) |
2461 | goto normal; |
2462 | |
2463 | if (skb_is_gso(skb) || skb_has_frags(skb)) |
2464 | goto normal; |
2465 | |
2466 | rcu_read_lock(); |
2467 | list_for_each_entry_rcu(ptype, head, list) { |
2468 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) |
2469 | continue; |
2470 | |
2471 | skb_set_network_header(skb, skb_gro_offset(skb)); |
2472 | mac_len = skb->network_header - skb->mac_header; |
2473 | skb->mac_len = mac_len; |
2474 | NAPI_GRO_CB(skb)->same_flow = 0; |
2475 | NAPI_GRO_CB(skb)->flush = 0; |
2476 | NAPI_GRO_CB(skb)->free = 0; |
2477 | |
2478 | pp = ptype->gro_receive(&napi->gro_list, skb); |
2479 | break; |
2480 | } |
2481 | rcu_read_unlock(); |
2482 | |
2483 | if (&ptype->list == head) |
2484 | goto normal; |
2485 | |
2486 | same_flow = NAPI_GRO_CB(skb)->same_flow; |
2487 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; |
2488 | |
2489 | if (pp) { |
2490 | struct sk_buff *nskb = *pp; |
2491 | |
2492 | *pp = nskb->next; |
2493 | nskb->next = NULL; |
2494 | napi_gro_complete(nskb); |
2495 | napi->gro_count--; |
2496 | } |
2497 | |
2498 | if (same_flow) |
2499 | goto ok; |
2500 | |
2501 | if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) |
2502 | goto normal; |
2503 | |
2504 | napi->gro_count++; |
2505 | NAPI_GRO_CB(skb)->count = 1; |
2506 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); |
2507 | skb->next = napi->gro_list; |
2508 | napi->gro_list = skb; |
2509 | ret = GRO_HELD; |
2510 | |
2511 | pull: |
2512 | if (skb_headlen(skb) < skb_gro_offset(skb)) { |
2513 | int grow = skb_gro_offset(skb) - skb_headlen(skb); |
2514 | |
2515 | BUG_ON(skb->end - skb->tail < grow); |
2516 | |
2517 | memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); |
2518 | |
2519 | skb->tail += grow; |
2520 | skb->data_len -= grow; |
2521 | |
2522 | skb_shinfo(skb)->frags[0].page_offset += grow; |
2523 | skb_shinfo(skb)->frags[0].size -= grow; |
2524 | |
2525 | if (unlikely(!skb_shinfo(skb)->frags[0].size)) { |
2526 | put_page(skb_shinfo(skb)->frags[0].page); |
2527 | memmove(skb_shinfo(skb)->frags, |
2528 | skb_shinfo(skb)->frags + 1, |
2529 | --skb_shinfo(skb)->nr_frags); |
2530 | } |
2531 | } |
2532 | |
2533 | ok: |
2534 | return ret; |
2535 | |
2536 | normal: |
2537 | ret = GRO_NORMAL; |
2538 | goto pull; |
2539 | } |
2540 | EXPORT_SYMBOL(dev_gro_receive); |
2541 | |
2542 | static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2543 | { |
2544 | struct sk_buff *p; |
2545 | |
2546 | if (netpoll_rx_on(skb)) |
2547 | return GRO_NORMAL; |
2548 | |
2549 | for (p = napi->gro_list; p; p = p->next) { |
2550 | NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) |
2551 | && !compare_ether_header(skb_mac_header(p), |
2552 | skb_gro_mac_header(skb)); |
2553 | NAPI_GRO_CB(p)->flush = 0; |
2554 | } |
2555 | |
2556 | return dev_gro_receive(napi, skb); |
2557 | } |
2558 | |
2559 | int napi_skb_finish(int ret, struct sk_buff *skb) |
2560 | { |
2561 | int err = NET_RX_SUCCESS; |
2562 | |
2563 | switch (ret) { |
2564 | case GRO_NORMAL: |
2565 | return netif_receive_skb(skb); |
2566 | |
2567 | case GRO_DROP: |
2568 | err = NET_RX_DROP; |
2569 | /* fall through */ |
2570 | |
2571 | case GRO_MERGED_FREE: |
2572 | kfree_skb(skb); |
2573 | break; |
2574 | } |
2575 | |
2576 | return err; |
2577 | } |
2578 | EXPORT_SYMBOL(napi_skb_finish); |
2579 | |
2580 | void skb_gro_reset_offset(struct sk_buff *skb) |
2581 | { |
2582 | NAPI_GRO_CB(skb)->data_offset = 0; |
2583 | NAPI_GRO_CB(skb)->frag0 = NULL; |
2584 | NAPI_GRO_CB(skb)->frag0_len = 0; |
2585 | |
2586 | if (skb->mac_header == skb->tail && |
2587 | !PageHighMem(skb_shinfo(skb)->frags[0].page)) { |
2588 | NAPI_GRO_CB(skb)->frag0 = |
2589 | page_address(skb_shinfo(skb)->frags[0].page) + |
2590 | skb_shinfo(skb)->frags[0].page_offset; |
2591 | NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; |
2592 | } |
2593 | } |
2594 | EXPORT_SYMBOL(skb_gro_reset_offset); |
2595 | |
2596 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2597 | { |
2598 | skb_gro_reset_offset(skb); |
2599 | |
2600 | return napi_skb_finish(__napi_gro_receive(napi, skb), skb); |
2601 | } |
2602 | EXPORT_SYMBOL(napi_gro_receive); |
2603 | |
2604 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
2605 | { |
2606 | __skb_pull(skb, skb_headlen(skb)); |
2607 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
2608 | |
2609 | napi->skb = skb; |
2610 | } |
2611 | EXPORT_SYMBOL(napi_reuse_skb); |
2612 | |
2613 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
2614 | { |
2615 | struct net_device *dev = napi->dev; |
2616 | struct sk_buff *skb = napi->skb; |
2617 | |
2618 | if (!skb) { |
2619 | skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); |
2620 | if (!skb) |
2621 | goto out; |
2622 | |
2623 | skb_reserve(skb, NET_IP_ALIGN); |
2624 | |
2625 | napi->skb = skb; |
2626 | } |
2627 | |
2628 | out: |
2629 | return skb; |
2630 | } |
2631 | EXPORT_SYMBOL(napi_get_frags); |
2632 | |
2633 | int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) |
2634 | { |
2635 | int err = NET_RX_SUCCESS; |
2636 | |
2637 | switch (ret) { |
2638 | case GRO_NORMAL: |
2639 | case GRO_HELD: |
2640 | skb->protocol = eth_type_trans(skb, skb->dev); |
2641 | |
2642 | if (ret == GRO_NORMAL) |
2643 | return netif_receive_skb(skb); |
2644 | |
2645 | skb_gro_pull(skb, -ETH_HLEN); |
2646 | break; |
2647 | |
2648 | case GRO_DROP: |
2649 | err = NET_RX_DROP; |
2650 | /* fall through */ |
2651 | |
2652 | case GRO_MERGED_FREE: |
2653 | napi_reuse_skb(napi, skb); |
2654 | break; |
2655 | } |
2656 | |
2657 | return err; |
2658 | } |
2659 | EXPORT_SYMBOL(napi_frags_finish); |
2660 | |
2661 | struct sk_buff *napi_frags_skb(struct napi_struct *napi) |
2662 | { |
2663 | struct sk_buff *skb = napi->skb; |
2664 | struct ethhdr *eth; |
2665 | unsigned int hlen; |
2666 | unsigned int off; |
2667 | |
2668 | napi->skb = NULL; |
2669 | |
2670 | skb_reset_mac_header(skb); |
2671 | skb_gro_reset_offset(skb); |
2672 | |
2673 | off = skb_gro_offset(skb); |
2674 | hlen = off + sizeof(*eth); |
2675 | eth = skb_gro_header_fast(skb, off); |
2676 | if (skb_gro_header_hard(skb, hlen)) { |
2677 | eth = skb_gro_header_slow(skb, hlen, off); |
2678 | if (unlikely(!eth)) { |
2679 | napi_reuse_skb(napi, skb); |
2680 | skb = NULL; |
2681 | goto out; |
2682 | } |
2683 | } |
2684 | |
2685 | skb_gro_pull(skb, sizeof(*eth)); |
2686 | |
2687 | /* |
2688 | * This works because the only protocols we care about don't require |
2689 | * special handling. We'll fix it up properly at the end. |
2690 | */ |
2691 | skb->protocol = eth->h_proto; |
2692 | |
2693 | out: |
2694 | return skb; |
2695 | } |
2696 | EXPORT_SYMBOL(napi_frags_skb); |
2697 | |
2698 | int napi_gro_frags(struct napi_struct *napi) |
2699 | { |
2700 | struct sk_buff *skb = napi_frags_skb(napi); |
2701 | |
2702 | if (!skb) |
2703 | return NET_RX_DROP; |
2704 | |
2705 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); |
2706 | } |
2707 | EXPORT_SYMBOL(napi_gro_frags); |
2708 | |
2709 | static int process_backlog(struct napi_struct *napi, int quota) |
2710 | { |
2711 | int work = 0; |
2712 | struct softnet_data *queue = &__get_cpu_var(softnet_data); |
2713 | unsigned long start_time = jiffies; |
2714 | |
2715 | napi->weight = weight_p; |
2716 | do { |
2717 | struct sk_buff *skb; |
2718 | |
2719 | local_irq_disable(); |
2720 | skb = __skb_dequeue(&queue->input_pkt_queue); |
2721 | if (!skb) { |
2722 | __napi_complete(napi); |
2723 | local_irq_enable(); |
2724 | break; |
2725 | } |
2726 | local_irq_enable(); |
2727 | |
2728 | netif_receive_skb(skb); |
2729 | } while (++work < quota && jiffies == start_time); |
2730 | |
2731 | return work; |
2732 | } |
2733 | |
2734 | /** |
2735 | * __napi_schedule - schedule for receive |
2736 | * @n: entry to schedule |
2737 | * |
2738 | * The entry's receive function will be scheduled to run |
2739 | */ |
2740 | void __napi_schedule(struct napi_struct *n) |
2741 | { |
2742 | unsigned long flags; |
2743 | |
2744 | local_irq_save(flags); |
2745 | list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); |
2746 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2747 | local_irq_restore(flags); |
2748 | } |
2749 | EXPORT_SYMBOL(__napi_schedule); |
2750 | |
2751 | void __napi_complete(struct napi_struct *n) |
2752 | { |
2753 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
2754 | BUG_ON(n->gro_list); |
2755 | |
2756 | list_del(&n->poll_list); |
2757 | smp_mb__before_clear_bit(); |
2758 | clear_bit(NAPI_STATE_SCHED, &n->state); |
2759 | } |
2760 | EXPORT_SYMBOL(__napi_complete); |
2761 | |
2762 | void napi_complete(struct napi_struct *n) |
2763 | { |
2764 | unsigned long flags; |
2765 | |
2766 | /* |
2767 | * don't let napi dequeue from the cpu poll list |
2768 | * just in case its running on a different cpu |
2769 | */ |
2770 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) |
2771 | return; |
2772 | |
2773 | napi_gro_flush(n); |
2774 | local_irq_save(flags); |
2775 | __napi_complete(n); |
2776 | local_irq_restore(flags); |
2777 | } |
2778 | EXPORT_SYMBOL(napi_complete); |
2779 | |
2780 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
2781 | int (*poll)(struct napi_struct *, int), int weight) |
2782 | { |
2783 | INIT_LIST_HEAD(&napi->poll_list); |
2784 | napi->gro_count = 0; |
2785 | napi->gro_list = NULL; |
2786 | napi->skb = NULL; |
2787 | napi->poll = poll; |
2788 | napi->weight = weight; |
2789 | list_add(&napi->dev_list, &dev->napi_list); |
2790 | napi->dev = dev; |
2791 | #ifdef CONFIG_NETPOLL |
2792 | spin_lock_init(&napi->poll_lock); |
2793 | napi->poll_owner = -1; |
2794 | #endif |
2795 | set_bit(NAPI_STATE_SCHED, &napi->state); |
2796 | } |
2797 | EXPORT_SYMBOL(netif_napi_add); |
2798 | |
2799 | void netif_napi_del(struct napi_struct *napi) |
2800 | { |
2801 | struct sk_buff *skb, *next; |
2802 | |
2803 | list_del_init(&napi->dev_list); |
2804 | napi_free_frags(napi); |
2805 | |
2806 | for (skb = napi->gro_list; skb; skb = next) { |
2807 | next = skb->next; |
2808 | skb->next = NULL; |
2809 | kfree_skb(skb); |
2810 | } |
2811 | |
2812 | napi->gro_list = NULL; |
2813 | napi->gro_count = 0; |
2814 | } |
2815 | EXPORT_SYMBOL(netif_napi_del); |
2816 | |
2817 | |
2818 | static void net_rx_action(struct softirq_action *h) |
2819 | { |
2820 | struct list_head *list = &__get_cpu_var(softnet_data).poll_list; |
2821 | unsigned long time_limit = jiffies + 2; |
2822 | int budget = netdev_budget; |
2823 | void *have; |
2824 | |
2825 | local_irq_disable(); |
2826 | |
2827 | while (!list_empty(list)) { |
2828 | struct napi_struct *n; |
2829 | int work, weight; |
2830 | |
2831 | /* If softirq window is exhuasted then punt. |
2832 | * Allow this to run for 2 jiffies since which will allow |
2833 | * an average latency of 1.5/HZ. |
2834 | */ |
2835 | if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) |
2836 | goto softnet_break; |
2837 | |
2838 | local_irq_enable(); |
2839 | |
2840 | /* Even though interrupts have been re-enabled, this |
2841 | * access is safe because interrupts can only add new |
2842 | * entries to the tail of this list, and only ->poll() |
2843 | * calls can remove this head entry from the list. |
2844 | */ |
2845 | n = list_entry(list->next, struct napi_struct, poll_list); |
2846 | |
2847 | have = netpoll_poll_lock(n); |
2848 | |
2849 | weight = n->weight; |
2850 | |
2851 | /* This NAPI_STATE_SCHED test is for avoiding a race |
2852 | * with netpoll's poll_napi(). Only the entity which |
2853 | * obtains the lock and sees NAPI_STATE_SCHED set will |
2854 | * actually make the ->poll() call. Therefore we avoid |
2855 | * accidently calling ->poll() when NAPI is not scheduled. |
2856 | */ |
2857 | work = 0; |
2858 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
2859 | work = n->poll(n, weight); |
2860 | trace_napi_poll(n); |
2861 | } |
2862 | |
2863 | WARN_ON_ONCE(work > weight); |
2864 | |
2865 | budget -= work; |
2866 | |
2867 | local_irq_disable(); |
2868 | |
2869 | /* Drivers must not modify the NAPI state if they |
2870 | * consume the entire weight. In such cases this code |
2871 | * still "owns" the NAPI instance and therefore can |
2872 | * move the instance around on the list at-will. |
2873 | */ |
2874 | if (unlikely(work == weight)) { |
2875 | if (unlikely(napi_disable_pending(n))) { |
2876 | local_irq_enable(); |
2877 | napi_complete(n); |
2878 | local_irq_disable(); |
2879 | } else |
2880 | list_move_tail(&n->poll_list, list); |
2881 | } |
2882 | |
2883 | netpoll_poll_unlock(have); |
2884 | } |
2885 | out: |
2886 | local_irq_enable(); |
2887 | |
2888 | #ifdef CONFIG_NET_DMA |
2889 | /* |
2890 | * There may not be any more sk_buffs coming right now, so push |
2891 | * any pending DMA copies to hardware |
2892 | */ |
2893 | dma_issue_pending_all(); |
2894 | #endif |
2895 | |
2896 | return; |
2897 | |
2898 | softnet_break: |
2899 | __get_cpu_var(netdev_rx_stat).time_squeeze++; |
2900 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2901 | goto out; |
2902 | } |
2903 | |
2904 | static gifconf_func_t *gifconf_list[NPROTO]; |
2905 | |
2906 | /** |
2907 | * register_gifconf - register a SIOCGIF handler |
2908 | * @family: Address family |
2909 | * @gifconf: Function handler |
2910 | * |
2911 | * Register protocol dependent address dumping routines. The handler |
2912 | * that is passed must not be freed or reused until it has been replaced |
2913 | * by another handler. |
2914 | */ |
2915 | int register_gifconf(unsigned int family, gifconf_func_t *gifconf) |
2916 | { |
2917 | if (family >= NPROTO) |
2918 | return -EINVAL; |
2919 | gifconf_list[family] = gifconf; |
2920 | return 0; |
2921 | } |
2922 | EXPORT_SYMBOL(register_gifconf); |
2923 | |
2924 | |
2925 | /* |
2926 | * Map an interface index to its name (SIOCGIFNAME) |
2927 | */ |
2928 | |
2929 | /* |
2930 | * We need this ioctl for efficient implementation of the |
2931 | * if_indextoname() function required by the IPv6 API. Without |
2932 | * it, we would have to search all the interfaces to find a |
2933 | * match. --pb |
2934 | */ |
2935 | |
2936 | static int dev_ifname(struct net *net, struct ifreq __user *arg) |
2937 | { |
2938 | struct net_device *dev; |
2939 | struct ifreq ifr; |
2940 | |
2941 | /* |
2942 | * Fetch the caller's info block. |
2943 | */ |
2944 | |
2945 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
2946 | return -EFAULT; |
2947 | |
2948 | read_lock(&dev_base_lock); |
2949 | dev = __dev_get_by_index(net, ifr.ifr_ifindex); |
2950 | if (!dev) { |
2951 | read_unlock(&dev_base_lock); |
2952 | return -ENODEV; |
2953 | } |
2954 | |
2955 | strcpy(ifr.ifr_name, dev->name); |
2956 | read_unlock(&dev_base_lock); |
2957 | |
2958 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
2959 | return -EFAULT; |
2960 | return 0; |
2961 | } |
2962 | |
2963 | /* |
2964 | * Perform a SIOCGIFCONF call. This structure will change |
2965 | * size eventually, and there is nothing I can do about it. |
2966 | * Thus we will need a 'compatibility mode'. |
2967 | */ |
2968 | |
2969 | static int dev_ifconf(struct net *net, char __user *arg) |
2970 | { |
2971 | struct ifconf ifc; |
2972 | struct net_device *dev; |
2973 | char __user *pos; |
2974 | int len; |
2975 | int total; |
2976 | int i; |
2977 | |
2978 | /* |
2979 | * Fetch the caller's info block. |
2980 | */ |
2981 | |
2982 | if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) |
2983 | return -EFAULT; |
2984 | |
2985 | pos = ifc.ifc_buf; |
2986 | len = ifc.ifc_len; |
2987 | |
2988 | /* |
2989 | * Loop over the interfaces, and write an info block for each. |
2990 | */ |
2991 | |
2992 | total = 0; |
2993 | for_each_netdev(net, dev) { |
2994 | for (i = 0; i < NPROTO; i++) { |
2995 | if (gifconf_list[i]) { |
2996 | int done; |
2997 | if (!pos) |
2998 | done = gifconf_list[i](dev, NULL, 0); |
2999 | else |
3000 | done = gifconf_list[i](dev, pos + total, |
3001 | len - total); |
3002 | if (done < 0) |
3003 | return -EFAULT; |
3004 | total += done; |
3005 | } |
3006 | } |
3007 | } |
3008 | |
3009 | /* |
3010 | * All done. Write the updated control block back to the caller. |
3011 | */ |
3012 | ifc.ifc_len = total; |
3013 | |
3014 | /* |
3015 | * Both BSD and Solaris return 0 here, so we do too. |
3016 | */ |
3017 | return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; |
3018 | } |
3019 | |
3020 | #ifdef CONFIG_PROC_FS |
3021 | /* |
3022 | * This is invoked by the /proc filesystem handler to display a device |
3023 | * in detail. |
3024 | */ |
3025 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) |
3026 | __acquires(dev_base_lock) |
3027 | { |
3028 | struct net *net = seq_file_net(seq); |
3029 | loff_t off; |
3030 | struct net_device *dev; |
3031 | |
3032 | read_lock(&dev_base_lock); |
3033 | if (!*pos) |
3034 | return SEQ_START_TOKEN; |
3035 | |
3036 | off = 1; |
3037 | for_each_netdev(net, dev) |
3038 | if (off++ == *pos) |
3039 | return dev; |
3040 | |
3041 | return NULL; |
3042 | } |
3043 | |
3044 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3045 | { |
3046 | struct net *net = seq_file_net(seq); |
3047 | ++*pos; |
3048 | return v == SEQ_START_TOKEN ? |
3049 | first_net_device(net) : next_net_device((struct net_device *)v); |
3050 | } |
3051 | |
3052 | void dev_seq_stop(struct seq_file *seq, void *v) |
3053 | __releases(dev_base_lock) |
3054 | { |
3055 | read_unlock(&dev_base_lock); |
3056 | } |
3057 | |
3058 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) |
3059 | { |
3060 | const struct net_device_stats *stats = dev_get_stats(dev); |
3061 | |
3062 | seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " |
3063 | "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", |
3064 | dev->name, stats->rx_bytes, stats->rx_packets, |
3065 | stats->rx_errors, |
3066 | stats->rx_dropped + stats->rx_missed_errors, |
3067 | stats->rx_fifo_errors, |
3068 | stats->rx_length_errors + stats->rx_over_errors + |
3069 | stats->rx_crc_errors + stats->rx_frame_errors, |
3070 | stats->rx_compressed, stats->multicast, |
3071 | stats->tx_bytes, stats->tx_packets, |
3072 | stats->tx_errors, stats->tx_dropped, |
3073 | stats->tx_fifo_errors, stats->collisions, |
3074 | stats->tx_carrier_errors + |
3075 | stats->tx_aborted_errors + |
3076 | stats->tx_window_errors + |
3077 | stats->tx_heartbeat_errors, |
3078 | stats->tx_compressed); |
3079 | } |
3080 | |
3081 | /* |
3082 | * Called from the PROCfs module. This now uses the new arbitrary sized |
3083 | * /proc/net interface to create /proc/net/dev |
3084 | */ |
3085 | static int dev_seq_show(struct seq_file *seq, void *v) |
3086 | { |
3087 | if (v == SEQ_START_TOKEN) |
3088 | seq_puts(seq, "Inter-| Receive " |
3089 | " | Transmit\n" |
3090 | " face |bytes packets errs drop fifo frame " |
3091 | "compressed multicast|bytes packets errs " |
3092 | "drop fifo colls carrier compressed\n"); |
3093 | else |
3094 | dev_seq_printf_stats(seq, v); |
3095 | return 0; |
3096 | } |
3097 | |
3098 | static struct netif_rx_stats *softnet_get_online(loff_t *pos) |
3099 | { |
3100 | struct netif_rx_stats *rc = NULL; |
3101 | |
3102 | while (*pos < nr_cpu_ids) |
3103 | if (cpu_online(*pos)) { |
3104 | rc = &per_cpu(netdev_rx_stat, *pos); |
3105 | break; |
3106 | } else |
3107 | ++*pos; |
3108 | return rc; |
3109 | } |
3110 | |
3111 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) |
3112 | { |
3113 | return softnet_get_online(pos); |
3114 | } |
3115 | |
3116 | static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3117 | { |
3118 | ++*pos; |
3119 | return softnet_get_online(pos); |
3120 | } |
3121 | |
3122 | static void softnet_seq_stop(struct seq_file *seq, void *v) |
3123 | { |
3124 | } |
3125 | |
3126 | static int softnet_seq_show(struct seq_file *seq, void *v) |
3127 | { |
3128 | struct netif_rx_stats *s = v; |
3129 | |
3130 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
3131 | s->total, s->dropped, s->time_squeeze, 0, |
3132 | 0, 0, 0, 0, /* was fastroute */ |
3133 | s->cpu_collision); |
3134 | return 0; |
3135 | } |
3136 | |
3137 | static const struct seq_operations dev_seq_ops = { |
3138 | .start = dev_seq_start, |
3139 | .next = dev_seq_next, |
3140 | .stop = dev_seq_stop, |
3141 | .show = dev_seq_show, |
3142 | }; |
3143 | |
3144 | static int dev_seq_open(struct inode *inode, struct file *file) |
3145 | { |
3146 | return seq_open_net(inode, file, &dev_seq_ops, |
3147 | sizeof(struct seq_net_private)); |
3148 | } |
3149 | |
3150 | static const struct file_operations dev_seq_fops = { |
3151 | .owner = THIS_MODULE, |
3152 | .open = dev_seq_open, |
3153 | .read = seq_read, |
3154 | .llseek = seq_lseek, |
3155 | .release = seq_release_net, |
3156 | }; |
3157 | |
3158 | static const struct seq_operations softnet_seq_ops = { |
3159 | .start = softnet_seq_start, |
3160 | .next = softnet_seq_next, |
3161 | .stop = softnet_seq_stop, |
3162 | .show = softnet_seq_show, |
3163 | }; |
3164 | |
3165 | static int softnet_seq_open(struct inode *inode, struct file *file) |
3166 | { |
3167 | return seq_open(file, &softnet_seq_ops); |
3168 | } |
3169 | |
3170 | static const struct file_operations softnet_seq_fops = { |
3171 | .owner = THIS_MODULE, |
3172 | .open = softnet_seq_open, |
3173 | .read = seq_read, |
3174 | .llseek = seq_lseek, |
3175 | .release = seq_release, |
3176 | }; |
3177 | |
3178 | static void *ptype_get_idx(loff_t pos) |
3179 | { |
3180 | struct packet_type *pt = NULL; |
3181 | loff_t i = 0; |
3182 | int t; |
3183 | |
3184 | list_for_each_entry_rcu(pt, &ptype_all, list) { |
3185 | if (i == pos) |
3186 | return pt; |
3187 | ++i; |
3188 | } |
3189 | |
3190 | for (t = 0; t < PTYPE_HASH_SIZE; t++) { |
3191 | list_for_each_entry_rcu(pt, &ptype_base[t], list) { |
3192 | if (i == pos) |
3193 | return pt; |
3194 | ++i; |
3195 | } |
3196 | } |
3197 | return NULL; |
3198 | } |
3199 | |
3200 | static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) |
3201 | __acquires(RCU) |
3202 | { |
3203 | rcu_read_lock(); |
3204 | return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; |
3205 | } |
3206 | |
3207 | static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3208 | { |
3209 | struct packet_type *pt; |
3210 | struct list_head *nxt; |
3211 | int hash; |
3212 | |
3213 | ++*pos; |
3214 | if (v == SEQ_START_TOKEN) |
3215 | return ptype_get_idx(0); |
3216 | |
3217 | pt = v; |
3218 | nxt = pt->list.next; |
3219 | if (pt->type == htons(ETH_P_ALL)) { |
3220 | if (nxt != &ptype_all) |
3221 | goto found; |
3222 | hash = 0; |
3223 | nxt = ptype_base[0].next; |
3224 | } else |
3225 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; |
3226 | |
3227 | while (nxt == &ptype_base[hash]) { |
3228 | if (++hash >= PTYPE_HASH_SIZE) |
3229 | return NULL; |
3230 | nxt = ptype_base[hash].next; |
3231 | } |
3232 | found: |
3233 | return list_entry(nxt, struct packet_type, list); |
3234 | } |
3235 | |
3236 | static void ptype_seq_stop(struct seq_file *seq, void *v) |
3237 | __releases(RCU) |
3238 | { |
3239 | rcu_read_unlock(); |
3240 | } |
3241 | |
3242 | static int ptype_seq_show(struct seq_file *seq, void *v) |
3243 | { |
3244 | struct packet_type *pt = v; |
3245 | |
3246 | if (v == SEQ_START_TOKEN) |
3247 | seq_puts(seq, "Type Device Function\n"); |
3248 | else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { |
3249 | if (pt->type == htons(ETH_P_ALL)) |
3250 | seq_puts(seq, "ALL "); |
3251 | else |
3252 | seq_printf(seq, "%04x", ntohs(pt->type)); |
3253 | |
3254 | seq_printf(seq, " %-8s %pF\n", |
3255 | pt->dev ? pt->dev->name : "", pt->func); |
3256 | } |
3257 | |
3258 | return 0; |
3259 | } |
3260 | |
3261 | static const struct seq_operations ptype_seq_ops = { |
3262 | .start = ptype_seq_start, |
3263 | .next = ptype_seq_next, |
3264 | .stop = ptype_seq_stop, |
3265 | .show = ptype_seq_show, |
3266 | }; |
3267 | |
3268 | static int ptype_seq_open(struct inode *inode, struct file *file) |
3269 | { |
3270 | return seq_open_net(inode, file, &ptype_seq_ops, |
3271 | sizeof(struct seq_net_private)); |
3272 | } |
3273 | |
3274 | static const struct file_operations ptype_seq_fops = { |
3275 | .owner = THIS_MODULE, |
3276 | .open = ptype_seq_open, |
3277 | .read = seq_read, |
3278 | .llseek = seq_lseek, |
3279 | .release = seq_release_net, |
3280 | }; |
3281 | |
3282 | |
3283 | static int __net_init dev_proc_net_init(struct net *net) |
3284 | { |
3285 | int rc = -ENOMEM; |
3286 | |
3287 | if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) |
3288 | goto out; |
3289 | if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) |
3290 | goto out_dev; |
3291 | if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) |
3292 | goto out_softnet; |
3293 | |
3294 | if (wext_proc_init(net)) |
3295 | goto out_ptype; |
3296 | rc = 0; |
3297 | out: |
3298 | return rc; |
3299 | out_ptype: |
3300 | proc_net_remove(net, "ptype"); |
3301 | out_softnet: |
3302 | proc_net_remove(net, "softnet_stat"); |
3303 | out_dev: |
3304 | proc_net_remove(net, "dev"); |
3305 | goto out; |
3306 | } |
3307 | |
3308 | static void __net_exit dev_proc_net_exit(struct net *net) |
3309 | { |
3310 | wext_proc_exit(net); |
3311 | |
3312 | proc_net_remove(net, "ptype"); |
3313 | proc_net_remove(net, "softnet_stat"); |
3314 | proc_net_remove(net, "dev"); |
3315 | } |
3316 | |
3317 | static struct pernet_operations __net_initdata dev_proc_ops = { |
3318 | .init = dev_proc_net_init, |
3319 | .exit = dev_proc_net_exit, |
3320 | }; |
3321 | |
3322 | static int __init dev_proc_init(void) |
3323 | { |
3324 | return register_pernet_subsys(&dev_proc_ops); |
3325 | } |
3326 | #else |
3327 | #define dev_proc_init() 0 |
3328 | #endif /* CONFIG_PROC_FS */ |
3329 | |
3330 | |
3331 | /** |
3332 | * netdev_set_master - set up master/slave pair |
3333 | * @slave: slave device |
3334 | * @master: new master device |
3335 | * |
3336 | * Changes the master device of the slave. Pass %NULL to break the |
3337 | * bonding. The caller must hold the RTNL semaphore. On a failure |
3338 | * a negative errno code is returned. On success the reference counts |
3339 | * are adjusted, %RTM_NEWLINK is sent to the routing socket and the |
3340 | * function returns zero. |
3341 | */ |
3342 | int netdev_set_master(struct net_device *slave, struct net_device *master) |
3343 | { |
3344 | struct net_device *old = slave->master; |
3345 | |
3346 | ASSERT_RTNL(); |
3347 | |
3348 | if (master) { |
3349 | if (old) |
3350 | return -EBUSY; |
3351 | dev_hold(master); |
3352 | } |
3353 | |
3354 | slave->master = master; |
3355 | |
3356 | synchronize_net(); |
3357 | |
3358 | if (old) |
3359 | dev_put(old); |
3360 | |
3361 | if (master) |
3362 | slave->flags |= IFF_SLAVE; |
3363 | else |
3364 | slave->flags &= ~IFF_SLAVE; |
3365 | |
3366 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); |
3367 | return 0; |
3368 | } |
3369 | EXPORT_SYMBOL(netdev_set_master); |
3370 | |
3371 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
3372 | { |
3373 | const struct net_device_ops *ops = dev->netdev_ops; |
3374 | |
3375 | if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) |
3376 | ops->ndo_change_rx_flags(dev, flags); |
3377 | } |
3378 | |
3379 | static int __dev_set_promiscuity(struct net_device *dev, int inc) |
3380 | { |
3381 | unsigned short old_flags = dev->flags; |
3382 | uid_t uid; |
3383 | gid_t gid; |
3384 | |
3385 | ASSERT_RTNL(); |
3386 | |
3387 | dev->flags |= IFF_PROMISC; |
3388 | dev->promiscuity += inc; |
3389 | if (dev->promiscuity == 0) { |
3390 | /* |
3391 | * Avoid overflow. |
3392 | * If inc causes overflow, untouch promisc and return error. |
3393 | */ |
3394 | if (inc < 0) |
3395 | dev->flags &= ~IFF_PROMISC; |
3396 | else { |
3397 | dev->promiscuity -= inc; |
3398 | printk(KERN_WARNING "%s: promiscuity touches roof, " |
3399 | "set promiscuity failed, promiscuity feature " |
3400 | "of device might be broken.\n", dev->name); |
3401 | return -EOVERFLOW; |
3402 | } |
3403 | } |
3404 | if (dev->flags != old_flags) { |
3405 | printk(KERN_INFO "device %s %s promiscuous mode\n", |
3406 | dev->name, (dev->flags & IFF_PROMISC) ? "entered" : |
3407 | "left"); |
3408 | if (audit_enabled) { |
3409 | current_uid_gid(&uid, &gid); |
3410 | audit_log(current->audit_context, GFP_ATOMIC, |
3411 | AUDIT_ANOM_PROMISCUOUS, |
3412 | "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", |
3413 | dev->name, (dev->flags & IFF_PROMISC), |
3414 | (old_flags & IFF_PROMISC), |
3415 | audit_get_loginuid(current), |
3416 | uid, gid, |
3417 | audit_get_sessionid(current)); |
3418 | } |
3419 | |
3420 | dev_change_rx_flags(dev, IFF_PROMISC); |
3421 | } |
3422 | return 0; |
3423 | } |
3424 | |
3425 | /** |
3426 | * dev_set_promiscuity - update promiscuity count on a device |
3427 | * @dev: device |
3428 | * @inc: modifier |
3429 | * |
3430 | * Add or remove promiscuity from a device. While the count in the device |
3431 | * remains above zero the interface remains promiscuous. Once it hits zero |
3432 | * the device reverts back to normal filtering operation. A negative inc |
3433 | * value is used to drop promiscuity on the device. |
3434 | * Return 0 if successful or a negative errno code on error. |
3435 | */ |
3436 | int dev_set_promiscuity(struct net_device *dev, int inc) |
3437 | { |
3438 | unsigned short old_flags = dev->flags; |
3439 | int err; |
3440 | |
3441 | err = __dev_set_promiscuity(dev, inc); |
3442 | if (err < 0) |
3443 | return err; |
3444 | if (dev->flags != old_flags) |
3445 | dev_set_rx_mode(dev); |
3446 | return err; |
3447 | } |
3448 | EXPORT_SYMBOL(dev_set_promiscuity); |
3449 | |
3450 | /** |
3451 | * dev_set_allmulti - update allmulti count on a device |
3452 | * @dev: device |
3453 | * @inc: modifier |
3454 | * |
3455 | * Add or remove reception of all multicast frames to a device. While the |
3456 | * count in the device remains above zero the interface remains listening |
3457 | * to all interfaces. Once it hits zero the device reverts back to normal |
3458 | * filtering operation. A negative @inc value is used to drop the counter |
3459 | * when releasing a resource needing all multicasts. |
3460 | * Return 0 if successful or a negative errno code on error. |
3461 | */ |
3462 | |
3463 | int dev_set_allmulti(struct net_device *dev, int inc) |
3464 | { |
3465 | unsigned short old_flags = dev->flags; |
3466 | |
3467 | ASSERT_RTNL(); |
3468 | |
3469 | dev->flags |= IFF_ALLMULTI; |
3470 | dev->allmulti += inc; |
3471 | if (dev->allmulti == 0) { |
3472 | /* |
3473 | * Avoid overflow. |
3474 | * If inc causes overflow, untouch allmulti and return error. |
3475 | */ |
3476 | if (inc < 0) |
3477 | dev->flags &= ~IFF_ALLMULTI; |
3478 | else { |
3479 | dev->allmulti -= inc; |
3480 | printk(KERN_WARNING "%s: allmulti touches roof, " |
3481 | "set allmulti failed, allmulti feature of " |
3482 | "device might be broken.\n", dev->name); |
3483 | return -EOVERFLOW; |
3484 | } |
3485 | } |
3486 | if (dev->flags ^ old_flags) { |
3487 | dev_change_rx_flags(dev, IFF_ALLMULTI); |
3488 | dev_set_rx_mode(dev); |
3489 | } |
3490 | return 0; |
3491 | } |
3492 | EXPORT_SYMBOL(dev_set_allmulti); |
3493 | |
3494 | /* |
3495 | * Upload unicast and multicast address lists to device and |
3496 | * configure RX filtering. When the device doesn't support unicast |
3497 | * filtering it is put in promiscuous mode while unicast addresses |
3498 | * are present. |
3499 | */ |
3500 | void __dev_set_rx_mode(struct net_device *dev) |
3501 | { |
3502 | const struct net_device_ops *ops = dev->netdev_ops; |
3503 | |
3504 | /* dev_open will call this function so the list will stay sane. */ |
3505 | if (!(dev->flags&IFF_UP)) |
3506 | return; |
3507 | |
3508 | if (!netif_device_present(dev)) |
3509 | return; |
3510 | |
3511 | if (ops->ndo_set_rx_mode) |
3512 | ops->ndo_set_rx_mode(dev); |
3513 | else { |
3514 | /* Unicast addresses changes may only happen under the rtnl, |
3515 | * therefore calling __dev_set_promiscuity here is safe. |
3516 | */ |
3517 | if (dev->uc.count > 0 && !dev->uc_promisc) { |
3518 | __dev_set_promiscuity(dev, 1); |
3519 | dev->uc_promisc = 1; |
3520 | } else if (dev->uc.count == 0 && dev->uc_promisc) { |
3521 | __dev_set_promiscuity(dev, -1); |
3522 | dev->uc_promisc = 0; |
3523 | } |
3524 | |
3525 | if (ops->ndo_set_multicast_list) |
3526 | ops->ndo_set_multicast_list(dev); |
3527 | } |
3528 | } |
3529 | |
3530 | void dev_set_rx_mode(struct net_device *dev) |
3531 | { |
3532 | netif_addr_lock_bh(dev); |
3533 | __dev_set_rx_mode(dev); |
3534 | netif_addr_unlock_bh(dev); |
3535 | } |
3536 | |
3537 | /* hw addresses list handling functions */ |
3538 | |
3539 | static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, |
3540 | int addr_len, unsigned char addr_type) |
3541 | { |
3542 | struct netdev_hw_addr *ha; |
3543 | int alloc_size; |
3544 | |
3545 | if (addr_len > MAX_ADDR_LEN) |
3546 | return -EINVAL; |
3547 | |
3548 | list_for_each_entry(ha, &list->list, list) { |
3549 | if (!memcmp(ha->addr, addr, addr_len) && |
3550 | ha->type == addr_type) { |
3551 | ha->refcount++; |
3552 | return 0; |
3553 | } |
3554 | } |
3555 | |
3556 | |
3557 | alloc_size = sizeof(*ha); |
3558 | if (alloc_size < L1_CACHE_BYTES) |
3559 | alloc_size = L1_CACHE_BYTES; |
3560 | ha = kmalloc(alloc_size, GFP_ATOMIC); |
3561 | if (!ha) |
3562 | return -ENOMEM; |
3563 | memcpy(ha->addr, addr, addr_len); |
3564 | ha->type = addr_type; |
3565 | ha->refcount = 1; |
3566 | ha->synced = false; |
3567 | list_add_tail_rcu(&ha->list, &list->list); |
3568 | list->count++; |
3569 | return 0; |
3570 | } |
3571 | |
3572 | static void ha_rcu_free(struct rcu_head *head) |
3573 | { |
3574 | struct netdev_hw_addr *ha; |
3575 | |
3576 | ha = container_of(head, struct netdev_hw_addr, rcu_head); |
3577 | kfree(ha); |
3578 | } |
3579 | |
3580 | static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, |
3581 | int addr_len, unsigned char addr_type) |
3582 | { |
3583 | struct netdev_hw_addr *ha; |
3584 | |
3585 | list_for_each_entry(ha, &list->list, list) { |
3586 | if (!memcmp(ha->addr, addr, addr_len) && |
3587 | (ha->type == addr_type || !addr_type)) { |
3588 | if (--ha->refcount) |
3589 | return 0; |
3590 | list_del_rcu(&ha->list); |
3591 | call_rcu(&ha->rcu_head, ha_rcu_free); |
3592 | list->count--; |
3593 | return 0; |
3594 | } |
3595 | } |
3596 | return -ENOENT; |
3597 | } |
3598 | |
3599 | static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, |
3600 | struct netdev_hw_addr_list *from_list, |
3601 | int addr_len, |
3602 | unsigned char addr_type) |
3603 | { |
3604 | int err; |
3605 | struct netdev_hw_addr *ha, *ha2; |
3606 | unsigned char type; |
3607 | |
3608 | list_for_each_entry(ha, &from_list->list, list) { |
3609 | type = addr_type ? addr_type : ha->type; |
3610 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); |
3611 | if (err) |
3612 | goto unroll; |
3613 | } |
3614 | return 0; |
3615 | |
3616 | unroll: |
3617 | list_for_each_entry(ha2, &from_list->list, list) { |
3618 | if (ha2 == ha) |
3619 | break; |
3620 | type = addr_type ? addr_type : ha2->type; |
3621 | __hw_addr_del(to_list, ha2->addr, addr_len, type); |
3622 | } |
3623 | return err; |
3624 | } |
3625 | |
3626 | static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, |
3627 | struct netdev_hw_addr_list *from_list, |
3628 | int addr_len, |
3629 | unsigned char addr_type) |
3630 | { |
3631 | struct netdev_hw_addr *ha; |
3632 | unsigned char type; |
3633 | |
3634 | list_for_each_entry(ha, &from_list->list, list) { |
3635 | type = addr_type ? addr_type : ha->type; |
3636 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); |
3637 | } |
3638 | } |
3639 | |
3640 | static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, |
3641 | struct netdev_hw_addr_list *from_list, |
3642 | int addr_len) |
3643 | { |
3644 | int err = 0; |
3645 | struct netdev_hw_addr *ha, *tmp; |
3646 | |
3647 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { |
3648 | if (!ha->synced) { |
3649 | err = __hw_addr_add(to_list, ha->addr, |
3650 | addr_len, ha->type); |
3651 | if (err) |
3652 | break; |
3653 | ha->synced = true; |
3654 | ha->refcount++; |
3655 | } else if (ha->refcount == 1) { |
3656 | __hw_addr_del(to_list, ha->addr, addr_len, ha->type); |
3657 | __hw_addr_del(from_list, ha->addr, addr_len, ha->type); |
3658 | } |
3659 | } |
3660 | return err; |
3661 | } |
3662 | |
3663 | static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, |
3664 | struct netdev_hw_addr_list *from_list, |
3665 | int addr_len) |
3666 | { |
3667 | struct netdev_hw_addr *ha, *tmp; |
3668 | |
3669 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { |
3670 | if (ha->synced) { |
3671 | __hw_addr_del(to_list, ha->addr, |
3672 | addr_len, ha->type); |
3673 | ha->synced = false; |
3674 | __hw_addr_del(from_list, ha->addr, |
3675 | addr_len, ha->type); |
3676 | } |
3677 | } |
3678 | } |
3679 | |
3680 | static void __hw_addr_flush(struct netdev_hw_addr_list *list) |
3681 | { |
3682 | struct netdev_hw_addr *ha, *tmp; |
3683 | |
3684 | list_for_each_entry_safe(ha, tmp, &list->list, list) { |
3685 | list_del_rcu(&ha->list); |
3686 | call_rcu(&ha->rcu_head, ha_rcu_free); |
3687 | } |
3688 | list->count = 0; |
3689 | } |
3690 | |
3691 | static void __hw_addr_init(struct netdev_hw_addr_list *list) |
3692 | { |
3693 | INIT_LIST_HEAD(&list->list); |
3694 | list->count = 0; |
3695 | } |
3696 | |
3697 | /* Device addresses handling functions */ |
3698 | |
3699 | static void dev_addr_flush(struct net_device *dev) |
3700 | { |
3701 | /* rtnl_mutex must be held here */ |
3702 | |
3703 | __hw_addr_flush(&dev->dev_addrs); |
3704 | dev->dev_addr = NULL; |
3705 | } |
3706 | |
3707 | static int dev_addr_init(struct net_device *dev) |
3708 | { |
3709 | unsigned char addr[MAX_ADDR_LEN]; |
3710 | struct netdev_hw_addr *ha; |
3711 | int err; |
3712 | |
3713 | /* rtnl_mutex must be held here */ |
3714 | |
3715 | __hw_addr_init(&dev->dev_addrs); |
3716 | memset(addr, 0, sizeof(addr)); |
3717 | err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), |
3718 | NETDEV_HW_ADDR_T_LAN); |
3719 | if (!err) { |
3720 | /* |
3721 | * Get the first (previously created) address from the list |
3722 | * and set dev_addr pointer to this location. |
3723 | */ |
3724 | ha = list_first_entry(&dev->dev_addrs.list, |
3725 | struct netdev_hw_addr, list); |
3726 | dev->dev_addr = ha->addr; |
3727 | } |
3728 | return err; |
3729 | } |
3730 | |
3731 | /** |
3732 | * dev_addr_add - Add a device address |
3733 | * @dev: device |
3734 | * @addr: address to add |
3735 | * @addr_type: address type |
3736 | * |
3737 | * Add a device address to the device or increase the reference count if |
3738 | * it already exists. |
3739 | * |
3740 | * The caller must hold the rtnl_mutex. |
3741 | */ |
3742 | int dev_addr_add(struct net_device *dev, unsigned char *addr, |
3743 | unsigned char addr_type) |
3744 | { |
3745 | int err; |
3746 | |
3747 | ASSERT_RTNL(); |
3748 | |
3749 | err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); |
3750 | if (!err) |
3751 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
3752 | return err; |
3753 | } |
3754 | EXPORT_SYMBOL(dev_addr_add); |
3755 | |
3756 | /** |
3757 | * dev_addr_del - Release a device address. |
3758 | * @dev: device |
3759 | * @addr: address to delete |
3760 | * @addr_type: address type |
3761 | * |
3762 | * Release reference to a device address and remove it from the device |
3763 | * if the reference count drops to zero. |
3764 | * |
3765 | * The caller must hold the rtnl_mutex. |
3766 | */ |
3767 | int dev_addr_del(struct net_device *dev, unsigned char *addr, |
3768 | unsigned char addr_type) |
3769 | { |
3770 | int err; |
3771 | struct netdev_hw_addr *ha; |
3772 | |
3773 | ASSERT_RTNL(); |
3774 | |
3775 | /* |
3776 | * We can not remove the first address from the list because |
3777 | * dev->dev_addr points to that. |
3778 | */ |
3779 | ha = list_first_entry(&dev->dev_addrs.list, |
3780 | struct netdev_hw_addr, list); |
3781 | if (ha->addr == dev->dev_addr && ha->refcount == 1) |
3782 | return -ENOENT; |
3783 | |
3784 | err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, |
3785 | addr_type); |
3786 | if (!err) |
3787 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
3788 | return err; |
3789 | } |
3790 | EXPORT_SYMBOL(dev_addr_del); |
3791 | |
3792 | /** |
3793 | * dev_addr_add_multiple - Add device addresses from another device |
3794 | * @to_dev: device to which addresses will be added |
3795 | * @from_dev: device from which addresses will be added |
3796 | * @addr_type: address type - 0 means type will be used from from_dev |
3797 | * |
3798 | * Add device addresses of the one device to another. |
3799 | ** |
3800 | * The caller must hold the rtnl_mutex. |
3801 | */ |
3802 | int dev_addr_add_multiple(struct net_device *to_dev, |
3803 | struct net_device *from_dev, |
3804 | unsigned char addr_type) |
3805 | { |
3806 | int err; |
3807 | |
3808 | ASSERT_RTNL(); |
3809 | |
3810 | if (from_dev->addr_len != to_dev->addr_len) |
3811 | return -EINVAL; |
3812 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, |
3813 | to_dev->addr_len, addr_type); |
3814 | if (!err) |
3815 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); |
3816 | return err; |
3817 | } |
3818 | EXPORT_SYMBOL(dev_addr_add_multiple); |
3819 | |
3820 | /** |
3821 | * dev_addr_del_multiple - Delete device addresses by another device |
3822 | * @to_dev: device where the addresses will be deleted |
3823 | * @from_dev: device by which addresses the addresses will be deleted |
3824 | * @addr_type: address type - 0 means type will used from from_dev |
3825 | * |
3826 | * Deletes addresses in to device by the list of addresses in from device. |
3827 | * |
3828 | * The caller must hold the rtnl_mutex. |
3829 | */ |
3830 | int dev_addr_del_multiple(struct net_device *to_dev, |
3831 | struct net_device *from_dev, |
3832 | unsigned char addr_type) |
3833 | { |
3834 | ASSERT_RTNL(); |
3835 | |
3836 | if (from_dev->addr_len != to_dev->addr_len) |
3837 | return -EINVAL; |
3838 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, |
3839 | to_dev->addr_len, addr_type); |
3840 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); |
3841 | return 0; |
3842 | } |
3843 | EXPORT_SYMBOL(dev_addr_del_multiple); |
3844 | |
3845 | /* multicast addresses handling functions */ |
3846 | |
3847 | int __dev_addr_delete(struct dev_addr_list **list, int *count, |
3848 | void *addr, int alen, int glbl) |
3849 | { |
3850 | struct dev_addr_list *da; |
3851 | |
3852 | for (; (da = *list) != NULL; list = &da->next) { |
3853 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && |
3854 | alen == da->da_addrlen) { |
3855 | if (glbl) { |
3856 | int old_glbl = da->da_gusers; |
3857 | da->da_gusers = 0; |
3858 | if (old_glbl == 0) |
3859 | break; |
3860 | } |
3861 | if (--da->da_users) |
3862 | return 0; |
3863 | |
3864 | *list = da->next; |
3865 | kfree(da); |
3866 | (*count)--; |
3867 | return 0; |
3868 | } |
3869 | } |
3870 | return -ENOENT; |
3871 | } |
3872 | |
3873 | int __dev_addr_add(struct dev_addr_list **list, int *count, |
3874 | void *addr, int alen, int glbl) |
3875 | { |
3876 | struct dev_addr_list *da; |
3877 | |
3878 | for (da = *list; da != NULL; da = da->next) { |
3879 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && |
3880 | da->da_addrlen == alen) { |
3881 | if (glbl) { |
3882 | int old_glbl = da->da_gusers; |
3883 | da->da_gusers = 1; |
3884 | if (old_glbl) |
3885 | return 0; |
3886 | } |
3887 | da->da_users++; |
3888 | return 0; |
3889 | } |
3890 | } |
3891 | |
3892 | da = kzalloc(sizeof(*da), GFP_ATOMIC); |
3893 | if (da == NULL) |
3894 | return -ENOMEM; |
3895 | memcpy(da->da_addr, addr, alen); |
3896 | da->da_addrlen = alen; |
3897 | da->da_users = 1; |
3898 | da->da_gusers = glbl ? 1 : 0; |
3899 | da->next = *list; |
3900 | *list = da; |
3901 | (*count)++; |
3902 | return 0; |
3903 | } |
3904 | |
3905 | /** |
3906 | * dev_unicast_delete - Release secondary unicast address. |
3907 | * @dev: device |
3908 | * @addr: address to delete |
3909 | * |
3910 | * Release reference to a secondary unicast address and remove it |
3911 | * from the device if the reference count drops to zero. |
3912 | * |
3913 | * The caller must hold the rtnl_mutex. |
3914 | */ |
3915 | int dev_unicast_delete(struct net_device *dev, void *addr) |
3916 | { |
3917 | int err; |
3918 | |
3919 | ASSERT_RTNL(); |
3920 | |
3921 | netif_addr_lock_bh(dev); |
3922 | err = __hw_addr_del(&dev->uc, addr, dev->addr_len, |
3923 | NETDEV_HW_ADDR_T_UNICAST); |
3924 | if (!err) |
3925 | __dev_set_rx_mode(dev); |
3926 | netif_addr_unlock_bh(dev); |
3927 | return err; |
3928 | } |
3929 | EXPORT_SYMBOL(dev_unicast_delete); |
3930 | |
3931 | /** |
3932 | * dev_unicast_add - add a secondary unicast address |
3933 | * @dev: device |
3934 | * @addr: address to add |
3935 | * |
3936 | * Add a secondary unicast address to the device or increase |
3937 | * the reference count if it already exists. |
3938 | * |
3939 | * The caller must hold the rtnl_mutex. |
3940 | */ |
3941 | int dev_unicast_add(struct net_device *dev, void *addr) |
3942 | { |
3943 | int err; |
3944 | |
3945 | ASSERT_RTNL(); |
3946 | |
3947 | netif_addr_lock_bh(dev); |
3948 | err = __hw_addr_add(&dev->uc, addr, dev->addr_len, |
3949 | NETDEV_HW_ADDR_T_UNICAST); |
3950 | if (!err) |
3951 | __dev_set_rx_mode(dev); |
3952 | netif_addr_unlock_bh(dev); |
3953 | return err; |
3954 | } |
3955 | EXPORT_SYMBOL(dev_unicast_add); |
3956 | |
3957 | int __dev_addr_sync(struct dev_addr_list **to, int *to_count, |
3958 | struct dev_addr_list **from, int *from_count) |
3959 | { |
3960 | struct dev_addr_list *da, *next; |
3961 | int err = 0; |
3962 | |
3963 | da = *from; |
3964 | while (da != NULL) { |
3965 | next = da->next; |
3966 | if (!da->da_synced) { |
3967 | err = __dev_addr_add(to, to_count, |
3968 | da->da_addr, da->da_addrlen, 0); |
3969 | if (err < 0) |
3970 | break; |
3971 | da->da_synced = 1; |
3972 | da->da_users++; |
3973 | } else if (da->da_users == 1) { |
3974 | __dev_addr_delete(to, to_count, |
3975 | da->da_addr, da->da_addrlen, 0); |
3976 | __dev_addr_delete(from, from_count, |
3977 | da->da_addr, da->da_addrlen, 0); |
3978 | } |
3979 | da = next; |
3980 | } |
3981 | return err; |
3982 | } |
3983 | EXPORT_SYMBOL_GPL(__dev_addr_sync); |
3984 | |
3985 | void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, |
3986 | struct dev_addr_list **from, int *from_count) |
3987 | { |
3988 | struct dev_addr_list *da, *next; |
3989 | |
3990 | da = *from; |
3991 | while (da != NULL) { |
3992 | next = da->next; |
3993 | if (da->da_synced) { |
3994 | __dev_addr_delete(to, to_count, |
3995 | da->da_addr, da->da_addrlen, 0); |
3996 | da->da_synced = 0; |
3997 | __dev_addr_delete(from, from_count, |
3998 | da->da_addr, da->da_addrlen, 0); |
3999 | } |
4000 | da = next; |
4001 | } |
4002 | } |
4003 | EXPORT_SYMBOL_GPL(__dev_addr_unsync); |
4004 | |
4005 | /** |
4006 | * dev_unicast_sync - Synchronize device's unicast list to another device |
4007 | * @to: destination device |
4008 | * @from: source device |
4009 | * |
4010 | * Add newly added addresses to the destination device and release |
4011 | * addresses that have no users left. The source device must be |
4012 | * locked by netif_tx_lock_bh. |
4013 | * |
4014 | * This function is intended to be called from the dev->set_rx_mode |
4015 | * function of layered software devices. |
4016 | */ |
4017 | int dev_unicast_sync(struct net_device *to, struct net_device *from) |
4018 | { |
4019 | int err = 0; |
4020 | |
4021 | if (to->addr_len != from->addr_len) |
4022 | return -EINVAL; |
4023 | |
4024 | netif_addr_lock_bh(to); |
4025 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); |
4026 | if (!err) |
4027 | __dev_set_rx_mode(to); |
4028 | netif_addr_unlock_bh(to); |
4029 | return err; |
4030 | } |
4031 | EXPORT_SYMBOL(dev_unicast_sync); |
4032 | |
4033 | /** |
4034 | * dev_unicast_unsync - Remove synchronized addresses from the destination device |
4035 | * @to: destination device |
4036 | * @from: source device |
4037 | * |
4038 | * Remove all addresses that were added to the destination device by |
4039 | * dev_unicast_sync(). This function is intended to be called from the |
4040 | * dev->stop function of layered software devices. |
4041 | */ |
4042 | void dev_unicast_unsync(struct net_device *to, struct net_device *from) |
4043 | { |
4044 | if (to->addr_len != from->addr_len) |
4045 | return; |
4046 | |
4047 | netif_addr_lock_bh(from); |
4048 | netif_addr_lock(to); |
4049 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); |
4050 | __dev_set_rx_mode(to); |
4051 | netif_addr_unlock(to); |
4052 | netif_addr_unlock_bh(from); |
4053 | } |
4054 | EXPORT_SYMBOL(dev_unicast_unsync); |
4055 | |
4056 | static void dev_unicast_flush(struct net_device *dev) |
4057 | { |
4058 | netif_addr_lock_bh(dev); |
4059 | __hw_addr_flush(&dev->uc); |
4060 | netif_addr_unlock_bh(dev); |
4061 | } |
4062 | |
4063 | static void dev_unicast_init(struct net_device *dev) |
4064 | { |
4065 | __hw_addr_init(&dev->uc); |
4066 | } |
4067 | |
4068 | |
4069 | static void __dev_addr_discard(struct dev_addr_list **list) |
4070 | { |
4071 | struct dev_addr_list *tmp; |
4072 | |
4073 | while (*list != NULL) { |
4074 | tmp = *list; |
4075 | *list = tmp->next; |
4076 | if (tmp->da_users > tmp->da_gusers) |
4077 | printk("__dev_addr_discard: address leakage! " |
4078 | "da_users=%d\n", tmp->da_users); |
4079 | kfree(tmp); |
4080 | } |
4081 | } |
4082 | |
4083 | static void dev_addr_discard(struct net_device *dev) |
4084 | { |
4085 | netif_addr_lock_bh(dev); |
4086 | |
4087 | __dev_addr_discard(&dev->mc_list); |
4088 | dev->mc_count = 0; |
4089 | |
4090 | netif_addr_unlock_bh(dev); |
4091 | } |
4092 | |
4093 | /** |
4094 | * dev_get_flags - get flags reported to userspace |
4095 | * @dev: device |
4096 | * |
4097 | * Get the combination of flag bits exported through APIs to userspace. |
4098 | */ |
4099 | unsigned dev_get_flags(const struct net_device *dev) |
4100 | { |
4101 | unsigned flags; |
4102 | |
4103 | flags = (dev->flags & ~(IFF_PROMISC | |
4104 | IFF_ALLMULTI | |
4105 | IFF_RUNNING | |
4106 | IFF_LOWER_UP | |
4107 | IFF_DORMANT)) | |
4108 | (dev->gflags & (IFF_PROMISC | |
4109 | IFF_ALLMULTI)); |
4110 | |
4111 | if (netif_running(dev)) { |
4112 | if (netif_oper_up(dev)) |
4113 | flags |= IFF_RUNNING; |
4114 | if (netif_carrier_ok(dev)) |
4115 | flags |= IFF_LOWER_UP; |
4116 | if (netif_dormant(dev)) |
4117 | flags |= IFF_DORMANT; |
4118 | } |
4119 | |
4120 | return flags; |
4121 | } |
4122 | EXPORT_SYMBOL(dev_get_flags); |
4123 | |
4124 | /** |
4125 | * dev_change_flags - change device settings |
4126 | * @dev: device |
4127 | * @flags: device state flags |
4128 | * |
4129 | * Change settings on device based state flags. The flags are |
4130 | * in the userspace exported format. |
4131 | */ |
4132 | int dev_change_flags(struct net_device *dev, unsigned flags) |
4133 | { |
4134 | int ret, changes; |
4135 | int old_flags = dev->flags; |
4136 | |
4137 | ASSERT_RTNL(); |
4138 | |
4139 | /* |
4140 | * Set the flags on our device. |
4141 | */ |
4142 | |
4143 | dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | |
4144 | IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | |
4145 | IFF_AUTOMEDIA)) | |
4146 | (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | |
4147 | IFF_ALLMULTI)); |
4148 | |
4149 | /* |
4150 | * Load in the correct multicast list now the flags have changed. |
4151 | */ |
4152 | |
4153 | if ((old_flags ^ flags) & IFF_MULTICAST) |
4154 | dev_change_rx_flags(dev, IFF_MULTICAST); |
4155 | |
4156 | dev_set_rx_mode(dev); |
4157 | |
4158 | /* |
4159 | * Have we downed the interface. We handle IFF_UP ourselves |
4160 | * according to user attempts to set it, rather than blindly |
4161 | * setting it. |
4162 | */ |
4163 | |
4164 | ret = 0; |
4165 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ |
4166 | ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); |
4167 | |
4168 | if (!ret) |
4169 | dev_set_rx_mode(dev); |
4170 | } |
4171 | |
4172 | if (dev->flags & IFF_UP && |
4173 | ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | |
4174 | IFF_VOLATILE))) |
4175 | call_netdevice_notifiers(NETDEV_CHANGE, dev); |
4176 | |
4177 | if ((flags ^ dev->gflags) & IFF_PROMISC) { |
4178 | int inc = (flags & IFF_PROMISC) ? 1 : -1; |
4179 | |
4180 | dev->gflags ^= IFF_PROMISC; |
4181 | dev_set_promiscuity(dev, inc); |
4182 | } |
4183 | |
4184 | /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI |
4185 | is important. Some (broken) drivers set IFF_PROMISC, when |
4186 | IFF_ALLMULTI is requested not asking us and not reporting. |
4187 | */ |
4188 | if ((flags ^ dev->gflags) & IFF_ALLMULTI) { |
4189 | int inc = (flags & IFF_ALLMULTI) ? 1 : -1; |
4190 | |
4191 | dev->gflags ^= IFF_ALLMULTI; |
4192 | dev_set_allmulti(dev, inc); |
4193 | } |
4194 | |
4195 | /* Exclude state transition flags, already notified */ |
4196 | changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); |
4197 | if (changes) |
4198 | rtmsg_ifinfo(RTM_NEWLINK, dev, changes); |
4199 | |
4200 | return ret; |
4201 | } |
4202 | EXPORT_SYMBOL(dev_change_flags); |
4203 | |
4204 | /** |
4205 | * dev_set_mtu - Change maximum transfer unit |
4206 | * @dev: device |
4207 | * @new_mtu: new transfer unit |
4208 | * |
4209 | * Change the maximum transfer size of the network device. |
4210 | */ |
4211 | int dev_set_mtu(struct net_device *dev, int new_mtu) |
4212 | { |
4213 | const struct net_device_ops *ops = dev->netdev_ops; |
4214 | int err; |
4215 | |
4216 | if (new_mtu == dev->mtu) |
4217 | return 0; |
4218 | |
4219 | /* MTU must be positive. */ |
4220 | if (new_mtu < 0) |
4221 | return -EINVAL; |
4222 | |
4223 | if (!netif_device_present(dev)) |
4224 | return -ENODEV; |
4225 | |
4226 | err = 0; |
4227 | if (ops->ndo_change_mtu) |
4228 | err = ops->ndo_change_mtu(dev, new_mtu); |
4229 | else |
4230 | dev->mtu = new_mtu; |
4231 | |
4232 | if (!err && dev->flags & IFF_UP) |
4233 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); |
4234 | return err; |
4235 | } |
4236 | EXPORT_SYMBOL(dev_set_mtu); |
4237 | |
4238 | /** |
4239 | * dev_set_mac_address - Change Media Access Control Address |
4240 | * @dev: device |
4241 | * @sa: new address |
4242 | * |
4243 | * Change the hardware (MAC) address of the device |
4244 | */ |
4245 | int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) |
4246 | { |
4247 | const struct net_device_ops *ops = dev->netdev_ops; |
4248 | int err; |
4249 | |
4250 | if (!ops->ndo_set_mac_address) |
4251 | return -EOPNOTSUPP; |
4252 | if (sa->sa_family != dev->type) |
4253 | return -EINVAL; |
4254 | if (!netif_device_present(dev)) |
4255 | return -ENODEV; |
4256 | err = ops->ndo_set_mac_address(dev, sa); |
4257 | if (!err) |
4258 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
4259 | return err; |
4260 | } |
4261 | EXPORT_SYMBOL(dev_set_mac_address); |
4262 | |
4263 | /* |
4264 | * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) |
4265 | */ |
4266 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) |
4267 | { |
4268 | int err; |
4269 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); |
4270 | |
4271 | if (!dev) |
4272 | return -ENODEV; |
4273 | |
4274 | switch (cmd) { |
4275 | case SIOCGIFFLAGS: /* Get interface flags */ |
4276 | ifr->ifr_flags = (short) dev_get_flags(dev); |
4277 | return 0; |
4278 | |
4279 | case SIOCGIFMETRIC: /* Get the metric on the interface |
4280 | (currently unused) */ |
4281 | ifr->ifr_metric = 0; |
4282 | return 0; |
4283 | |
4284 | case SIOCGIFMTU: /* Get the MTU of a device */ |
4285 | ifr->ifr_mtu = dev->mtu; |
4286 | return 0; |
4287 | |
4288 | case SIOCGIFHWADDR: |
4289 | if (!dev->addr_len) |
4290 | memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); |
4291 | else |
4292 | memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, |
4293 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
4294 | ifr->ifr_hwaddr.sa_family = dev->type; |
4295 | return 0; |
4296 | |
4297 | case SIOCGIFSLAVE: |
4298 | err = -EINVAL; |
4299 | break; |
4300 | |
4301 | case SIOCGIFMAP: |
4302 | ifr->ifr_map.mem_start = dev->mem_start; |
4303 | ifr->ifr_map.mem_end = dev->mem_end; |
4304 | ifr->ifr_map.base_addr = dev->base_addr; |
4305 | ifr->ifr_map.irq = dev->irq; |
4306 | ifr->ifr_map.dma = dev->dma; |
4307 | ifr->ifr_map.port = dev->if_port; |
4308 | return 0; |
4309 | |
4310 | case SIOCGIFINDEX: |
4311 | ifr->ifr_ifindex = dev->ifindex; |
4312 | return 0; |
4313 | |
4314 | case SIOCGIFTXQLEN: |
4315 | ifr->ifr_qlen = dev->tx_queue_len; |
4316 | return 0; |
4317 | |
4318 | default: |
4319 | /* dev_ioctl() should ensure this case |
4320 | * is never reached |
4321 | */ |
4322 | WARN_ON(1); |
4323 | err = -EINVAL; |
4324 | break; |
4325 | |
4326 | } |
4327 | return err; |
4328 | } |
4329 | |
4330 | /* |
4331 | * Perform the SIOCxIFxxx calls, inside rtnl_lock() |
4332 | */ |
4333 | static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) |
4334 | { |
4335 | int err; |
4336 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); |
4337 | const struct net_device_ops *ops; |
4338 | |
4339 | if (!dev) |
4340 | return -ENODEV; |
4341 | |
4342 | ops = dev->netdev_ops; |
4343 | |
4344 | switch (cmd) { |
4345 | case SIOCSIFFLAGS: /* Set interface flags */ |
4346 | return dev_change_flags(dev, ifr->ifr_flags); |
4347 | |
4348 | case SIOCSIFMETRIC: /* Set the metric on the interface |
4349 | (currently unused) */ |
4350 | return -EOPNOTSUPP; |
4351 | |
4352 | case SIOCSIFMTU: /* Set the MTU of a device */ |
4353 | return dev_set_mtu(dev, ifr->ifr_mtu); |
4354 | |
4355 | case SIOCSIFHWADDR: |
4356 | return dev_set_mac_address(dev, &ifr->ifr_hwaddr); |
4357 | |
4358 | case SIOCSIFHWBROADCAST: |
4359 | if (ifr->ifr_hwaddr.sa_family != dev->type) |
4360 | return -EINVAL; |
4361 | memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, |
4362 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
4363 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
4364 | return 0; |
4365 | |
4366 | case SIOCSIFMAP: |
4367 | if (ops->ndo_set_config) { |
4368 | if (!netif_device_present(dev)) |
4369 | return -ENODEV; |
4370 | return ops->ndo_set_config(dev, &ifr->ifr_map); |
4371 | } |
4372 | return -EOPNOTSUPP; |
4373 | |
4374 | case SIOCADDMULTI: |
4375 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
4376 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
4377 | return -EINVAL; |
4378 | if (!netif_device_present(dev)) |
4379 | return -ENODEV; |
4380 | return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, |
4381 | dev->addr_len, 1); |
4382 | |
4383 | case SIOCDELMULTI: |
4384 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
4385 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
4386 | return -EINVAL; |
4387 | if (!netif_device_present(dev)) |
4388 | return -ENODEV; |
4389 | return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, |
4390 | dev->addr_len, 1); |
4391 | |
4392 | case SIOCSIFTXQLEN: |
4393 | if (ifr->ifr_qlen < 0) |
4394 | return -EINVAL; |
4395 | dev->tx_queue_len = ifr->ifr_qlen; |
4396 | return 0; |
4397 | |
4398 | case SIOCSIFNAME: |
4399 | ifr->ifr_newname[IFNAMSIZ-1] = '\0'; |
4400 | return dev_change_name(dev, ifr->ifr_newname); |
4401 | |
4402 | /* |
4403 | * Unknown or private ioctl |
4404 | */ |
4405 | default: |
4406 | if ((cmd >= SIOCDEVPRIVATE && |
4407 | cmd <= SIOCDEVPRIVATE + 15) || |
4408 | cmd == SIOCBONDENSLAVE || |
4409 | cmd == SIOCBONDRELEASE || |
4410 | cmd == SIOCBONDSETHWADDR || |
4411 | cmd == SIOCBONDSLAVEINFOQUERY || |
4412 | cmd == SIOCBONDINFOQUERY || |
4413 | cmd == SIOCBONDCHANGEACTIVE || |
4414 | cmd == SIOCGMIIPHY || |
4415 | cmd == SIOCGMIIREG || |
4416 | cmd == SIOCSMIIREG || |
4417 | cmd == SIOCBRADDIF || |
4418 | cmd == SIOCBRDELIF || |
4419 | cmd == SIOCSHWTSTAMP || |
4420 | cmd == SIOCWANDEV) { |
4421 | err = -EOPNOTSUPP; |
4422 | if (ops->ndo_do_ioctl) { |
4423 | if (netif_device_present(dev)) |
4424 | err = ops->ndo_do_ioctl(dev, ifr, cmd); |
4425 | else |
4426 | err = -ENODEV; |
4427 | } |
4428 | } else |
4429 | err = -EINVAL; |
4430 | |
4431 | } |
4432 | return err; |
4433 | } |
4434 | |
4435 | /* |
4436 | * This function handles all "interface"-type I/O control requests. The actual |
4437 | * 'doing' part of this is dev_ifsioc above. |
4438 | */ |
4439 | |
4440 | /** |
4441 | * dev_ioctl - network device ioctl |
4442 | * @net: the applicable net namespace |
4443 | * @cmd: command to issue |
4444 | * @arg: pointer to a struct ifreq in user space |
4445 | * |
4446 | * Issue ioctl functions to devices. This is normally called by the |
4447 | * user space syscall interfaces but can sometimes be useful for |
4448 | * other purposes. The return value is the return from the syscall if |
4449 | * positive or a negative errno code on error. |
4450 | */ |
4451 | |
4452 | int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) |
4453 | { |
4454 | struct ifreq ifr; |
4455 | int ret; |
4456 | char *colon; |
4457 | |
4458 | /* One special case: SIOCGIFCONF takes ifconf argument |
4459 | and requires shared lock, because it sleeps writing |
4460 | to user space. |
4461 | */ |
4462 | |
4463 | if (cmd == SIOCGIFCONF) { |
4464 | rtnl_lock(); |
4465 | ret = dev_ifconf(net, (char __user *) arg); |
4466 | rtnl_unlock(); |
4467 | return ret; |
4468 | } |
4469 | if (cmd == SIOCGIFNAME) |
4470 | return dev_ifname(net, (struct ifreq __user *)arg); |
4471 | |
4472 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
4473 | return -EFAULT; |
4474 | |
4475 | ifr.ifr_name[IFNAMSIZ-1] = 0; |
4476 | |
4477 | colon = strchr(ifr.ifr_name, ':'); |
4478 | if (colon) |
4479 | *colon = 0; |
4480 | |
4481 | /* |
4482 | * See which interface the caller is talking about. |
4483 | */ |
4484 | |
4485 | switch (cmd) { |
4486 | /* |
4487 | * These ioctl calls: |
4488 | * - can be done by all. |
4489 | * - atomic and do not require locking. |
4490 | * - return a value |
4491 | */ |
4492 | case SIOCGIFFLAGS: |
4493 | case SIOCGIFMETRIC: |
4494 | case SIOCGIFMTU: |
4495 | case SIOCGIFHWADDR: |
4496 | case SIOCGIFSLAVE: |
4497 | case SIOCGIFMAP: |
4498 | case SIOCGIFINDEX: |
4499 | case SIOCGIFTXQLEN: |
4500 | dev_load(net, ifr.ifr_name); |
4501 | read_lock(&dev_base_lock); |
4502 | ret = dev_ifsioc_locked(net, &ifr, cmd); |
4503 | read_unlock(&dev_base_lock); |
4504 | if (!ret) { |
4505 | if (colon) |
4506 | *colon = ':'; |
4507 | if (copy_to_user(arg, &ifr, |
4508 | sizeof(struct ifreq))) |
4509 | ret = -EFAULT; |
4510 | } |
4511 | return ret; |
4512 | |
4513 | case SIOCETHTOOL: |
4514 | dev_load(net, ifr.ifr_name); |
4515 | rtnl_lock(); |
4516 | ret = dev_ethtool(net, &ifr); |
4517 | rtnl_unlock(); |
4518 | if (!ret) { |
4519 | if (colon) |
4520 | *colon = ':'; |
4521 | if (copy_to_user(arg, &ifr, |
4522 | sizeof(struct ifreq))) |
4523 | ret = -EFAULT; |
4524 | } |
4525 | return ret; |
4526 | |
4527 | /* |
4528 | * These ioctl calls: |
4529 | * - require superuser power. |
4530 | * - require strict serialization. |
4531 | * - return a value |
4532 | */ |
4533 | case SIOCGMIIPHY: |
4534 | case SIOCGMIIREG: |
4535 | case SIOCSIFNAME: |
4536 | if (!capable(CAP_NET_ADMIN)) |
4537 | return -EPERM; |
4538 | dev_load(net, ifr.ifr_name); |
4539 | rtnl_lock(); |
4540 | ret = dev_ifsioc(net, &ifr, cmd); |
4541 | rtnl_unlock(); |
4542 | if (!ret) { |
4543 | if (colon) |
4544 | *colon = ':'; |
4545 | if (copy_to_user(arg, &ifr, |
4546 | sizeof(struct ifreq))) |
4547 | ret = -EFAULT; |
4548 | } |
4549 | return ret; |
4550 | |
4551 | /* |
4552 | * These ioctl calls: |
4553 | * - require superuser power. |
4554 | * - require strict serialization. |
4555 | * - do not return a value |
4556 | */ |
4557 | case SIOCSIFFLAGS: |
4558 | case SIOCSIFMETRIC: |
4559 | case SIOCSIFMTU: |
4560 | case SIOCSIFMAP: |
4561 | case SIOCSIFHWADDR: |
4562 | case SIOCSIFSLAVE: |
4563 | case SIOCADDMULTI: |
4564 | case SIOCDELMULTI: |
4565 | case SIOCSIFHWBROADCAST: |
4566 | case SIOCSIFTXQLEN: |
4567 | case SIOCSMIIREG: |
4568 | case SIOCBONDENSLAVE: |
4569 | case SIOCBONDRELEASE: |
4570 | case SIOCBONDSETHWADDR: |
4571 | case SIOCBONDCHANGEACTIVE: |
4572 | case SIOCBRADDIF: |
4573 | case SIOCBRDELIF: |
4574 | case SIOCSHWTSTAMP: |
4575 | if (!capable(CAP_NET_ADMIN)) |
4576 | return -EPERM; |
4577 | /* fall through */ |
4578 | case SIOCBONDSLAVEINFOQUERY: |
4579 | case SIOCBONDINFOQUERY: |
4580 | dev_load(net, ifr.ifr_name); |
4581 | rtnl_lock(); |
4582 | ret = dev_ifsioc(net, &ifr, cmd); |
4583 | rtnl_unlock(); |
4584 | return ret; |
4585 | |
4586 | case SIOCGIFMEM: |
4587 | /* Get the per device memory space. We can add this but |
4588 | * currently do not support it */ |
4589 | case SIOCSIFMEM: |
4590 | /* Set the per device memory buffer space. |
4591 | * Not applicable in our case */ |
4592 | case SIOCSIFLINK: |
4593 | return -EINVAL; |
4594 | |
4595 | /* |
4596 | * Unknown or private ioctl. |
4597 | */ |
4598 | default: |
4599 | if (cmd == SIOCWANDEV || |
4600 | (cmd >= SIOCDEVPRIVATE && |
4601 | cmd <= SIOCDEVPRIVATE + 15)) { |
4602 | dev_load(net, ifr.ifr_name); |
4603 | rtnl_lock(); |
4604 | ret = dev_ifsioc(net, &ifr, cmd); |
4605 | rtnl_unlock(); |
4606 | if (!ret && copy_to_user(arg, &ifr, |
4607 | sizeof(struct ifreq))) |
4608 | ret = -EFAULT; |
4609 | return ret; |
4610 | } |
4611 | /* Take care of Wireless Extensions */ |
4612 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) |
4613 | return wext_handle_ioctl(net, &ifr, cmd, arg); |
4614 | return -EINVAL; |
4615 | } |
4616 | } |
4617 | |
4618 | |
4619 | /** |
4620 | * dev_new_index - allocate an ifindex |
4621 | * @net: the applicable net namespace |
4622 | * |
4623 | * Returns a suitable unique value for a new device interface |
4624 | * number. The caller must hold the rtnl semaphore or the |
4625 | * dev_base_lock to be sure it remains unique. |
4626 | */ |
4627 | static int dev_new_index(struct net *net) |
4628 | { |
4629 | static int ifindex; |
4630 | for (;;) { |
4631 | if (++ifindex <= 0) |
4632 | ifindex = 1; |
4633 | if (!__dev_get_by_index(net, ifindex)) |
4634 | return ifindex; |
4635 | } |
4636 | } |
4637 | |
4638 | /* Delayed registration/unregisteration */ |
4639 | static LIST_HEAD(net_todo_list); |
4640 | |
4641 | static void net_set_todo(struct net_device *dev) |
4642 | { |
4643 | list_add_tail(&dev->todo_list, &net_todo_list); |
4644 | } |
4645 | |
4646 | static void rollback_registered(struct net_device *dev) |
4647 | { |
4648 | BUG_ON(dev_boot_phase); |
4649 | ASSERT_RTNL(); |
4650 | |
4651 | /* Some devices call without registering for initialization unwind. */ |
4652 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
4653 | printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " |
4654 | "was registered\n", dev->name, dev); |
4655 | |
4656 | WARN_ON(1); |
4657 | return; |
4658 | } |
4659 | |
4660 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
4661 | |
4662 | /* If device is running, close it first. */ |
4663 | dev_close(dev); |
4664 | |
4665 | /* And unlink it from device chain. */ |
4666 | unlist_netdevice(dev); |
4667 | |
4668 | dev->reg_state = NETREG_UNREGISTERING; |
4669 | |
4670 | synchronize_net(); |
4671 | |
4672 | /* Shutdown queueing discipline. */ |
4673 | dev_shutdown(dev); |
4674 | |
4675 | |
4676 | /* Notify protocols, that we are about to destroy |
4677 | this device. They should clean all the things. |
4678 | */ |
4679 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
4680 | |
4681 | /* |
4682 | * Flush the unicast and multicast chains |
4683 | */ |
4684 | dev_unicast_flush(dev); |
4685 | dev_addr_discard(dev); |
4686 | |
4687 | if (dev->netdev_ops->ndo_uninit) |
4688 | dev->netdev_ops->ndo_uninit(dev); |
4689 | |
4690 | /* Notifier chain MUST detach us from master device. */ |
4691 | WARN_ON(dev->master); |
4692 | |
4693 | /* Remove entries from kobject tree */ |
4694 | netdev_unregister_kobject(dev); |
4695 | |
4696 | synchronize_net(); |
4697 | |
4698 | dev_put(dev); |
4699 | } |
4700 | |
4701 | static void __netdev_init_queue_locks_one(struct net_device *dev, |
4702 | struct netdev_queue *dev_queue, |
4703 | void *_unused) |
4704 | { |
4705 | spin_lock_init(&dev_queue->_xmit_lock); |
4706 | netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); |
4707 | dev_queue->xmit_lock_owner = -1; |
4708 | } |
4709 | |
4710 | static void netdev_init_queue_locks(struct net_device *dev) |
4711 | { |
4712 | netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); |
4713 | __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); |
4714 | } |
4715 | |
4716 | unsigned long netdev_fix_features(unsigned long features, const char *name) |
4717 | { |
4718 | /* Fix illegal SG+CSUM combinations. */ |
4719 | if ((features & NETIF_F_SG) && |
4720 | !(features & NETIF_F_ALL_CSUM)) { |
4721 | if (name) |
4722 | printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " |
4723 | "checksum feature.\n", name); |
4724 | features &= ~NETIF_F_SG; |
4725 | } |
4726 | |
4727 | /* TSO requires that SG is present as well. */ |
4728 | if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { |
4729 | if (name) |
4730 | printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " |
4731 | "SG feature.\n", name); |
4732 | features &= ~NETIF_F_TSO; |
4733 | } |
4734 | |
4735 | if (features & NETIF_F_UFO) { |
4736 | if (!(features & NETIF_F_GEN_CSUM)) { |
4737 | if (name) |
4738 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " |
4739 | "since no NETIF_F_HW_CSUM feature.\n", |
4740 | name); |
4741 | features &= ~NETIF_F_UFO; |
4742 | } |
4743 | |
4744 | if (!(features & NETIF_F_SG)) { |
4745 | if (name) |
4746 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " |
4747 | "since no NETIF_F_SG feature.\n", name); |
4748 | features &= ~NETIF_F_UFO; |
4749 | } |
4750 | } |
4751 | |
4752 | return features; |
4753 | } |
4754 | EXPORT_SYMBOL(netdev_fix_features); |
4755 | |
4756 | /** |
4757 | * register_netdevice - register a network device |
4758 | * @dev: device to register |
4759 | * |
4760 | * Take a completed network device structure and add it to the kernel |
4761 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
4762 | * chain. 0 is returned on success. A negative errno code is returned |
4763 | * on a failure to set up the device, or if the name is a duplicate. |
4764 | * |
4765 | * Callers must hold the rtnl semaphore. You may want |
4766 | * register_netdev() instead of this. |
4767 | * |
4768 | * BUGS: |
4769 | * The locking appears insufficient to guarantee two parallel registers |
4770 | * will not get the same name. |
4771 | */ |
4772 | |
4773 | int register_netdevice(struct net_device *dev) |
4774 | { |
4775 | struct hlist_head *head; |
4776 | struct hlist_node *p; |
4777 | int ret; |
4778 | struct net *net = dev_net(dev); |
4779 | |
4780 | BUG_ON(dev_boot_phase); |
4781 | ASSERT_RTNL(); |
4782 | |
4783 | might_sleep(); |
4784 | |
4785 | /* When net_device's are persistent, this will be fatal. */ |
4786 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
4787 | BUG_ON(!net); |
4788 | |
4789 | spin_lock_init(&dev->addr_list_lock); |
4790 | netdev_set_addr_lockdep_class(dev); |
4791 | netdev_init_queue_locks(dev); |
4792 | |
4793 | dev->iflink = -1; |
4794 | |
4795 | /* Init, if this function is available */ |
4796 | if (dev->netdev_ops->ndo_init) { |
4797 | ret = dev->netdev_ops->ndo_init(dev); |
4798 | if (ret) { |
4799 | if (ret > 0) |
4800 | ret = -EIO; |
4801 | goto out; |
4802 | } |
4803 | } |
4804 | |
4805 | if (!dev_valid_name(dev->name)) { |
4806 | ret = -EINVAL; |
4807 | goto err_uninit; |
4808 | } |
4809 | |
4810 | dev->ifindex = dev_new_index(net); |
4811 | if (dev->iflink == -1) |
4812 | dev->iflink = dev->ifindex; |
4813 | |
4814 | /* Check for existence of name */ |
4815 | head = dev_name_hash(net, dev->name); |
4816 | hlist_for_each(p, head) { |
4817 | struct net_device *d |
4818 | = hlist_entry(p, struct net_device, name_hlist); |
4819 | if (!strncmp(d->name, dev->name, IFNAMSIZ)) { |
4820 | ret = -EEXIST; |
4821 | goto err_uninit; |
4822 | } |
4823 | } |
4824 | |
4825 | /* Fix illegal checksum combinations */ |
4826 | if ((dev->features & NETIF_F_HW_CSUM) && |
4827 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4828 | printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", |
4829 | dev->name); |
4830 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); |
4831 | } |
4832 | |
4833 | if ((dev->features & NETIF_F_NO_CSUM) && |
4834 | (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4835 | printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", |
4836 | dev->name); |
4837 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); |
4838 | } |
4839 | |
4840 | dev->features = netdev_fix_features(dev->features, dev->name); |
4841 | |
4842 | /* Enable software GSO if SG is supported. */ |
4843 | if (dev->features & NETIF_F_SG) |
4844 | dev->features |= NETIF_F_GSO; |
4845 | |
4846 | netdev_initialize_kobject(dev); |
4847 | ret = netdev_register_kobject(dev); |
4848 | if (ret) |
4849 | goto err_uninit; |
4850 | dev->reg_state = NETREG_REGISTERED; |
4851 | |
4852 | /* |
4853 | * Default initial state at registry is that the |
4854 | * device is present. |
4855 | */ |
4856 | |
4857 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
4858 | |
4859 | dev_init_scheduler(dev); |
4860 | dev_hold(dev); |
4861 | list_netdevice(dev); |
4862 | |
4863 | /* Notify protocols, that a new device appeared. */ |
4864 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); |
4865 | ret = notifier_to_errno(ret); |
4866 | if (ret) { |
4867 | rollback_registered(dev); |
4868 | dev->reg_state = NETREG_UNREGISTERED; |
4869 | } |
4870 | /* |
4871 | * Prevent userspace races by waiting until the network |
4872 | * device is fully setup before sending notifications. |
4873 | */ |
4874 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); |
4875 | |
4876 | out: |
4877 | return ret; |
4878 | |
4879 | err_uninit: |
4880 | if (dev->netdev_ops->ndo_uninit) |
4881 | dev->netdev_ops->ndo_uninit(dev); |
4882 | goto out; |
4883 | } |
4884 | EXPORT_SYMBOL(register_netdevice); |
4885 | |
4886 | /** |
4887 | * init_dummy_netdev - init a dummy network device for NAPI |
4888 | * @dev: device to init |
4889 | * |
4890 | * This takes a network device structure and initialize the minimum |
4891 | * amount of fields so it can be used to schedule NAPI polls without |
4892 | * registering a full blown interface. This is to be used by drivers |
4893 | * that need to tie several hardware interfaces to a single NAPI |
4894 | * poll scheduler due to HW limitations. |
4895 | */ |
4896 | int init_dummy_netdev(struct net_device *dev) |
4897 | { |
4898 | /* Clear everything. Note we don't initialize spinlocks |
4899 | * are they aren't supposed to be taken by any of the |
4900 | * NAPI code and this dummy netdev is supposed to be |
4901 | * only ever used for NAPI polls |
4902 | */ |
4903 | memset(dev, 0, sizeof(struct net_device)); |
4904 | |
4905 | /* make sure we BUG if trying to hit standard |
4906 | * register/unregister code path |
4907 | */ |
4908 | dev->reg_state = NETREG_DUMMY; |
4909 | |
4910 | /* initialize the ref count */ |
4911 | atomic_set(&dev->refcnt, 1); |
4912 | |
4913 | /* NAPI wants this */ |
4914 | INIT_LIST_HEAD(&dev->napi_list); |
4915 | |
4916 | /* a dummy interface is started by default */ |
4917 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
4918 | set_bit(__LINK_STATE_START, &dev->state); |
4919 | |
4920 | return 0; |
4921 | } |
4922 | EXPORT_SYMBOL_GPL(init_dummy_netdev); |
4923 | |
4924 | |
4925 | /** |
4926 | * register_netdev - register a network device |
4927 | * @dev: device to register |
4928 | * |
4929 | * Take a completed network device structure and add it to the kernel |
4930 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
4931 | * chain. 0 is returned on success. A negative errno code is returned |
4932 | * on a failure to set up the device, or if the name is a duplicate. |
4933 | * |
4934 | * This is a wrapper around register_netdevice that takes the rtnl semaphore |
4935 | * and expands the device name if you passed a format string to |
4936 | * alloc_netdev. |
4937 | */ |
4938 | int register_netdev(struct net_device *dev) |
4939 | { |
4940 | int err; |
4941 | |
4942 | rtnl_lock(); |
4943 | |
4944 | /* |
4945 | * If the name is a format string the caller wants us to do a |
4946 | * name allocation. |
4947 | */ |
4948 | if (strchr(dev->name, '%')) { |
4949 | err = dev_alloc_name(dev, dev->name); |
4950 | if (err < 0) |
4951 | goto out; |
4952 | } |
4953 | |
4954 | err = register_netdevice(dev); |
4955 | out: |
4956 | rtnl_unlock(); |
4957 | return err; |
4958 | } |
4959 | EXPORT_SYMBOL(register_netdev); |
4960 | |
4961 | /* |
4962 | * netdev_wait_allrefs - wait until all references are gone. |
4963 | * |
4964 | * This is called when unregistering network devices. |
4965 | * |
4966 | * Any protocol or device that holds a reference should register |
4967 | * for netdevice notification, and cleanup and put back the |
4968 | * reference if they receive an UNREGISTER event. |
4969 | * We can get stuck here if buggy protocols don't correctly |
4970 | * call dev_put. |
4971 | */ |
4972 | static void netdev_wait_allrefs(struct net_device *dev) |
4973 | { |
4974 | unsigned long rebroadcast_time, warning_time; |
4975 | |
4976 | rebroadcast_time = warning_time = jiffies; |
4977 | while (atomic_read(&dev->refcnt) != 0) { |
4978 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
4979 | rtnl_lock(); |
4980 | |
4981 | /* Rebroadcast unregister notification */ |
4982 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
4983 | |
4984 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, |
4985 | &dev->state)) { |
4986 | /* We must not have linkwatch events |
4987 | * pending on unregister. If this |
4988 | * happens, we simply run the queue |
4989 | * unscheduled, resulting in a noop |
4990 | * for this device. |
4991 | */ |
4992 | linkwatch_run_queue(); |
4993 | } |
4994 | |
4995 | __rtnl_unlock(); |
4996 | |
4997 | rebroadcast_time = jiffies; |
4998 | } |
4999 | |
5000 | msleep(250); |
5001 | |
5002 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
5003 | printk(KERN_EMERG "unregister_netdevice: " |
5004 | "waiting for %s to become free. Usage " |
5005 | "count = %d\n", |
5006 | dev->name, atomic_read(&dev->refcnt)); |
5007 | warning_time = jiffies; |
5008 | } |
5009 | } |
5010 | } |
5011 | |
5012 | /* The sequence is: |
5013 | * |
5014 | * rtnl_lock(); |
5015 | * ... |
5016 | * register_netdevice(x1); |
5017 | * register_netdevice(x2); |
5018 | * ... |
5019 | * unregister_netdevice(y1); |
5020 | * unregister_netdevice(y2); |
5021 | * ... |
5022 | * rtnl_unlock(); |
5023 | * free_netdev(y1); |
5024 | * free_netdev(y2); |
5025 | * |
5026 | * We are invoked by rtnl_unlock(). |
5027 | * This allows us to deal with problems: |
5028 | * 1) We can delete sysfs objects which invoke hotplug |
5029 | * without deadlocking with linkwatch via keventd. |
5030 | * 2) Since we run with the RTNL semaphore not held, we can sleep |
5031 | * safely in order to wait for the netdev refcnt to drop to zero. |
5032 | * |
5033 | * We must not return until all unregister events added during |
5034 | * the interval the lock was held have been completed. |
5035 | */ |
5036 | void netdev_run_todo(void) |
5037 | { |
5038 | struct list_head list; |
5039 | |
5040 | /* Snapshot list, allow later requests */ |
5041 | list_replace_init(&net_todo_list, &list); |
5042 | |
5043 | __rtnl_unlock(); |
5044 | |
5045 | while (!list_empty(&list)) { |
5046 | struct net_device *dev |
5047 | = list_entry(list.next, struct net_device, todo_list); |
5048 | list_del(&dev->todo_list); |
5049 | |
5050 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { |
5051 | printk(KERN_ERR "network todo '%s' but state %d\n", |
5052 | dev->name, dev->reg_state); |
5053 | dump_stack(); |
5054 | continue; |
5055 | } |
5056 | |
5057 | dev->reg_state = NETREG_UNREGISTERED; |
5058 | |
5059 | on_each_cpu(flush_backlog, dev, 1); |
5060 | |
5061 | netdev_wait_allrefs(dev); |
5062 | |
5063 | /* paranoia */ |
5064 | BUG_ON(atomic_read(&dev->refcnt)); |
5065 | WARN_ON(dev->ip_ptr); |
5066 | WARN_ON(dev->ip6_ptr); |
5067 | WARN_ON(dev->dn_ptr); |
5068 | |
5069 | if (dev->destructor) |
5070 | dev->destructor(dev); |
5071 | |
5072 | /* Free network device */ |
5073 | kobject_put(&dev->dev.kobj); |
5074 | } |
5075 | } |
5076 | |
5077 | /** |
5078 | * dev_get_stats - get network device statistics |
5079 | * @dev: device to get statistics from |
5080 | * |
5081 | * Get network statistics from device. The device driver may provide |
5082 | * its own method by setting dev->netdev_ops->get_stats; otherwise |
5083 | * the internal statistics structure is used. |
5084 | */ |
5085 | const struct net_device_stats *dev_get_stats(struct net_device *dev) |
5086 | { |
5087 | const struct net_device_ops *ops = dev->netdev_ops; |
5088 | |
5089 | if (ops->ndo_get_stats) |
5090 | return ops->ndo_get_stats(dev); |
5091 | else { |
5092 | unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; |
5093 | struct net_device_stats *stats = &dev->stats; |
5094 | unsigned int i; |
5095 | struct netdev_queue *txq; |
5096 | |
5097 | for (i = 0; i < dev->num_tx_queues; i++) { |
5098 | txq = netdev_get_tx_queue(dev, i); |
5099 | tx_bytes += txq->tx_bytes; |
5100 | tx_packets += txq->tx_packets; |
5101 | tx_dropped += txq->tx_dropped; |
5102 | } |
5103 | if (tx_bytes || tx_packets || tx_dropped) { |
5104 | stats->tx_bytes = tx_bytes; |
5105 | stats->tx_packets = tx_packets; |
5106 | stats->tx_dropped = tx_dropped; |
5107 | } |
5108 | return stats; |
5109 | } |
5110 | } |
5111 | EXPORT_SYMBOL(dev_get_stats); |
5112 | |
5113 | static void netdev_init_one_queue(struct net_device *dev, |
5114 | struct netdev_queue *queue, |
5115 | void *_unused) |
5116 | { |
5117 | queue->dev = dev; |
5118 | } |
5119 | |
5120 | static void netdev_init_queues(struct net_device *dev) |
5121 | { |
5122 | netdev_init_one_queue(dev, &dev->rx_queue, NULL); |
5123 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); |
5124 | spin_lock_init(&dev->tx_global_lock); |
5125 | } |
5126 | |
5127 | /** |
5128 | * alloc_netdev_mq - allocate network device |
5129 | * @sizeof_priv: size of private data to allocate space for |
5130 | * @name: device name format string |
5131 | * @setup: callback to initialize device |
5132 | * @queue_count: the number of subqueues to allocate |
5133 | * |
5134 | * Allocates a struct net_device with private data area for driver use |
5135 | * and performs basic initialization. Also allocates subquue structs |
5136 | * for each queue on the device at the end of the netdevice. |
5137 | */ |
5138 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, |
5139 | void (*setup)(struct net_device *), unsigned int queue_count) |
5140 | { |
5141 | struct netdev_queue *tx; |
5142 | struct net_device *dev; |
5143 | size_t alloc_size; |
5144 | struct net_device *p; |
5145 | |
5146 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5147 | |
5148 | alloc_size = sizeof(struct net_device); |
5149 | if (sizeof_priv) { |
5150 | /* ensure 32-byte alignment of private area */ |
5151 | alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); |
5152 | alloc_size += sizeof_priv; |
5153 | } |
5154 | /* ensure 32-byte alignment of whole construct */ |
5155 | alloc_size += NETDEV_ALIGN - 1; |
5156 | |
5157 | p = kzalloc(alloc_size, GFP_KERNEL); |
5158 | if (!p) { |
5159 | printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); |
5160 | return NULL; |
5161 | } |
5162 | |
5163 | tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); |
5164 | if (!tx) { |
5165 | printk(KERN_ERR "alloc_netdev: Unable to allocate " |
5166 | "tx qdiscs.\n"); |
5167 | goto free_p; |
5168 | } |
5169 | |
5170 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5171 | dev->padded = (char *)dev - (char *)p; |
5172 | |
5173 | if (dev_addr_init(dev)) |
5174 | goto free_tx; |
5175 | |
5176 | dev_unicast_init(dev); |
5177 | |
5178 | dev_net_set(dev, &init_net); |
5179 | |
5180 | dev->_tx = tx; |
5181 | dev->num_tx_queues = queue_count; |
5182 | dev->real_num_tx_queues = queue_count; |
5183 | |
5184 | dev->gso_max_size = GSO_MAX_SIZE; |
5185 | |
5186 | netdev_init_queues(dev); |
5187 | |
5188 | INIT_LIST_HEAD(&dev->napi_list); |
5189 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5190 | setup(dev); |
5191 | strcpy(dev->name, name); |
5192 | return dev; |
5193 | |
5194 | free_tx: |
5195 | kfree(tx); |
5196 | |
5197 | free_p: |
5198 | kfree(p); |
5199 | return NULL; |
5200 | } |
5201 | EXPORT_SYMBOL(alloc_netdev_mq); |
5202 | |
5203 | /** |
5204 | * free_netdev - free network device |
5205 | * @dev: device |
5206 | * |
5207 | * This function does the last stage of destroying an allocated device |
5208 | * interface. The reference to the device object is released. |
5209 | * If this is the last reference then it will be freed. |
5210 | */ |
5211 | void free_netdev(struct net_device *dev) |
5212 | { |
5213 | struct napi_struct *p, *n; |
5214 | |
5215 | release_net(dev_net(dev)); |
5216 | |
5217 | kfree(dev->_tx); |
5218 | |
5219 | /* Flush device addresses */ |
5220 | dev_addr_flush(dev); |
5221 | |
5222 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
5223 | netif_napi_del(p); |
5224 | |
5225 | /* Compatibility with error handling in drivers */ |
5226 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
5227 | kfree((char *)dev - dev->padded); |
5228 | return; |
5229 | } |
5230 | |
5231 | BUG_ON(dev->reg_state != NETREG_UNREGISTERED); |
5232 | dev->reg_state = NETREG_RELEASED; |
5233 | |
5234 | /* will free via device release */ |
5235 | put_device(&dev->dev); |
5236 | } |
5237 | EXPORT_SYMBOL(free_netdev); |
5238 | |
5239 | /** |
5240 | * synchronize_net - Synchronize with packet receive processing |
5241 | * |
5242 | * Wait for packets currently being received to be done. |
5243 | * Does not block later packets from starting. |
5244 | */ |
5245 | void synchronize_net(void) |
5246 | { |
5247 | might_sleep(); |
5248 | synchronize_rcu(); |
5249 | } |
5250 | EXPORT_SYMBOL(synchronize_net); |
5251 | |
5252 | /** |
5253 | * unregister_netdevice - remove device from the kernel |
5254 | * @dev: device |
5255 | * |
5256 | * This function shuts down a device interface and removes it |
5257 | * from the kernel tables. |
5258 | * |
5259 | * Callers must hold the rtnl semaphore. You may want |
5260 | * unregister_netdev() instead of this. |
5261 | */ |
5262 | |
5263 | void unregister_netdevice(struct net_device *dev) |
5264 | { |
5265 | ASSERT_RTNL(); |
5266 | |
5267 | rollback_registered(dev); |
5268 | /* Finish processing unregister after unlock */ |
5269 | net_set_todo(dev); |
5270 | } |
5271 | EXPORT_SYMBOL(unregister_netdevice); |
5272 | |
5273 | /** |
5274 | * unregister_netdev - remove device from the kernel |
5275 | * @dev: device |
5276 | * |
5277 | * This function shuts down a device interface and removes it |
5278 | * from the kernel tables. |
5279 | * |
5280 | * This is just a wrapper for unregister_netdevice that takes |
5281 | * the rtnl semaphore. In general you want to use this and not |
5282 | * unregister_netdevice. |
5283 | */ |
5284 | void unregister_netdev(struct net_device *dev) |
5285 | { |
5286 | rtnl_lock(); |
5287 | unregister_netdevice(dev); |
5288 | rtnl_unlock(); |
5289 | } |
5290 | EXPORT_SYMBOL(unregister_netdev); |
5291 | |
5292 | /** |
5293 | * dev_change_net_namespace - move device to different nethost namespace |
5294 | * @dev: device |
5295 | * @net: network namespace |
5296 | * @pat: If not NULL name pattern to try if the current device name |
5297 | * is already taken in the destination network namespace. |
5298 | * |
5299 | * This function shuts down a device interface and moves it |
5300 | * to a new network namespace. On success 0 is returned, on |
5301 | * a failure a netagive errno code is returned. |
5302 | * |
5303 | * Callers must hold the rtnl semaphore. |
5304 | */ |
5305 | |
5306 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) |
5307 | { |
5308 | char buf[IFNAMSIZ]; |
5309 | const char *destname; |
5310 | int err; |
5311 | |
5312 | ASSERT_RTNL(); |
5313 | |
5314 | /* Don't allow namespace local devices to be moved. */ |
5315 | err = -EINVAL; |
5316 | if (dev->features & NETIF_F_NETNS_LOCAL) |
5317 | goto out; |
5318 | |
5319 | #ifdef CONFIG_SYSFS |
5320 | /* Don't allow real devices to be moved when sysfs |
5321 | * is enabled. |
5322 | */ |
5323 | err = -EINVAL; |
5324 | if (dev->dev.parent) |
5325 | goto out; |
5326 | #endif |
5327 | |
5328 | /* Ensure the device has been registrered */ |
5329 | err = -EINVAL; |
5330 | if (dev->reg_state != NETREG_REGISTERED) |
5331 | goto out; |
5332 | |
5333 | /* Get out if there is nothing todo */ |
5334 | err = 0; |
5335 | if (net_eq(dev_net(dev), net)) |
5336 | goto out; |
5337 | |
5338 | /* Pick the destination device name, and ensure |
5339 | * we can use it in the destination network namespace. |
5340 | */ |
5341 | err = -EEXIST; |
5342 | destname = dev->name; |
5343 | if (__dev_get_by_name(net, destname)) { |
5344 | /* We get here if we can't use the current device name */ |
5345 | if (!pat) |
5346 | goto out; |
5347 | if (!dev_valid_name(pat)) |
5348 | goto out; |
5349 | if (strchr(pat, '%')) { |
5350 | if (__dev_alloc_name(net, pat, buf) < 0) |
5351 | goto out; |
5352 | destname = buf; |
5353 | } else |
5354 | destname = pat; |
5355 | if (__dev_get_by_name(net, destname)) |
5356 | goto out; |
5357 | } |
5358 | |
5359 | /* |
5360 | * And now a mini version of register_netdevice unregister_netdevice. |
5361 | */ |
5362 | |
5363 | /* If device is running close it first. */ |
5364 | dev_close(dev); |
5365 | |
5366 | /* And unlink it from device chain */ |
5367 | err = -ENODEV; |
5368 | unlist_netdevice(dev); |
5369 | |
5370 | synchronize_net(); |
5371 | |
5372 | /* Shutdown queueing discipline. */ |
5373 | dev_shutdown(dev); |
5374 | |
5375 | /* Notify protocols, that we are about to destroy |
5376 | this device. They should clean all the things. |
5377 | */ |
5378 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5379 | |
5380 | /* |
5381 | * Flush the unicast and multicast chains |
5382 | */ |
5383 | dev_unicast_flush(dev); |
5384 | dev_addr_discard(dev); |
5385 | |
5386 | netdev_unregister_kobject(dev); |
5387 | |
5388 | /* Actually switch the network namespace */ |
5389 | dev_net_set(dev, net); |
5390 | |
5391 | /* Assign the new device name */ |
5392 | if (destname != dev->name) |
5393 | strcpy(dev->name, destname); |
5394 | |
5395 | /* If there is an ifindex conflict assign a new one */ |
5396 | if (__dev_get_by_index(net, dev->ifindex)) { |
5397 | int iflink = (dev->iflink == dev->ifindex); |
5398 | dev->ifindex = dev_new_index(net); |
5399 | if (iflink) |
5400 | dev->iflink = dev->ifindex; |
5401 | } |
5402 | |
5403 | /* Fixup kobjects */ |
5404 | err = netdev_register_kobject(dev); |
5405 | WARN_ON(err); |
5406 | |
5407 | /* Add the device back in the hashes */ |
5408 | list_netdevice(dev); |
5409 | |
5410 | /* Notify protocols, that a new device appeared. */ |
5411 | call_netdevice_notifiers(NETDEV_REGISTER, dev); |
5412 | |
5413 | /* |
5414 | * Prevent userspace races by waiting until the network |
5415 | * device is fully setup before sending notifications. |
5416 | */ |
5417 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); |
5418 | |
5419 | synchronize_net(); |
5420 | err = 0; |
5421 | out: |
5422 | return err; |
5423 | } |
5424 | EXPORT_SYMBOL_GPL(dev_change_net_namespace); |
5425 | |
5426 | static int dev_cpu_callback(struct notifier_block *nfb, |
5427 | unsigned long action, |
5428 | void *ocpu) |
5429 | { |
5430 | struct sk_buff **list_skb; |
5431 | struct Qdisc **list_net; |
5432 | struct sk_buff *skb; |
5433 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
5434 | struct softnet_data *sd, *oldsd; |
5435 | |
5436 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) |
5437 | return NOTIFY_OK; |
5438 | |
5439 | local_irq_disable(); |
5440 | cpu = smp_processor_id(); |
5441 | sd = &per_cpu(softnet_data, cpu); |
5442 | oldsd = &per_cpu(softnet_data, oldcpu); |
5443 | |
5444 | /* Find end of our completion_queue. */ |
5445 | list_skb = &sd->completion_queue; |
5446 | while (*list_skb) |
5447 | list_skb = &(*list_skb)->next; |
5448 | /* Append completion queue from offline CPU. */ |
5449 | *list_skb = oldsd->completion_queue; |
5450 | oldsd->completion_queue = NULL; |
5451 | |
5452 | /* Find end of our output_queue. */ |
5453 | list_net = &sd->output_queue; |
5454 | while (*list_net) |
5455 | list_net = &(*list_net)->next_sched; |
5456 | /* Append output queue from offline CPU. */ |
5457 | *list_net = oldsd->output_queue; |
5458 | oldsd->output_queue = NULL; |
5459 | |
5460 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
5461 | local_irq_enable(); |
5462 | |
5463 | /* Process offline CPU's input_pkt_queue */ |
5464 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) |
5465 | netif_rx(skb); |
5466 | |
5467 | return NOTIFY_OK; |
5468 | } |
5469 | |
5470 | |
5471 | /** |
5472 | * netdev_increment_features - increment feature set by one |
5473 | * @all: current feature set |
5474 | * @one: new feature set |
5475 | * @mask: mask feature set |
5476 | * |
5477 | * Computes a new feature set after adding a device with feature set |
5478 | * @one to the master device with current feature set @all. Will not |
5479 | * enable anything that is off in @mask. Returns the new feature set. |
5480 | */ |
5481 | unsigned long netdev_increment_features(unsigned long all, unsigned long one, |
5482 | unsigned long mask) |
5483 | { |
5484 | /* If device needs checksumming, downgrade to it. */ |
5485 | if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) |
5486 | all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); |
5487 | else if (mask & NETIF_F_ALL_CSUM) { |
5488 | /* If one device supports v4/v6 checksumming, set for all. */ |
5489 | if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && |
5490 | !(all & NETIF_F_GEN_CSUM)) { |
5491 | all &= ~NETIF_F_ALL_CSUM; |
5492 | all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); |
5493 | } |
5494 | |
5495 | /* If one device supports hw checksumming, set for all. */ |
5496 | if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { |
5497 | all &= ~NETIF_F_ALL_CSUM; |
5498 | all |= NETIF_F_HW_CSUM; |
5499 | } |
5500 | } |
5501 | |
5502 | one |= NETIF_F_ALL_CSUM; |
5503 | |
5504 | one |= all & NETIF_F_ONE_FOR_ALL; |
5505 | all &= one | NETIF_F_LLTX | NETIF_F_GSO; |
5506 | all |= one & mask & NETIF_F_ONE_FOR_ALL; |
5507 | |
5508 | return all; |
5509 | } |
5510 | EXPORT_SYMBOL(netdev_increment_features); |
5511 | |
5512 | static struct hlist_head *netdev_create_hash(void) |
5513 | { |
5514 | int i; |
5515 | struct hlist_head *hash; |
5516 | |
5517 | hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); |
5518 | if (hash != NULL) |
5519 | for (i = 0; i < NETDEV_HASHENTRIES; i++) |
5520 | INIT_HLIST_HEAD(&hash[i]); |
5521 | |
5522 | return hash; |
5523 | } |
5524 | |
5525 | /* Initialize per network namespace state */ |
5526 | static int __net_init netdev_init(struct net *net) |
5527 | { |
5528 | INIT_LIST_HEAD(&net->dev_base_head); |
5529 | |
5530 | net->dev_name_head = netdev_create_hash(); |
5531 | if (net->dev_name_head == NULL) |
5532 | goto err_name; |
5533 | |
5534 | net->dev_index_head = netdev_create_hash(); |
5535 | if (net->dev_index_head == NULL) |
5536 | goto err_idx; |
5537 | |
5538 | return 0; |
5539 | |
5540 | err_idx: |
5541 | kfree(net->dev_name_head); |
5542 | err_name: |
5543 | return -ENOMEM; |
5544 | } |
5545 | |
5546 | /** |
5547 | * netdev_drivername - network driver for the device |
5548 | * @dev: network device |
5549 | * @buffer: buffer for resulting name |
5550 | * @len: size of buffer |
5551 | * |
5552 | * Determine network driver for device. |
5553 | */ |
5554 | char *netdev_drivername(const struct net_device *dev, char *buffer, int len) |
5555 | { |
5556 | const struct device_driver *driver; |
5557 | const struct device *parent; |
5558 | |
5559 | if (len <= 0 || !buffer) |
5560 | return buffer; |
5561 | buffer[0] = 0; |
5562 | |
5563 | parent = dev->dev.parent; |
5564 | |
5565 | if (!parent) |
5566 | return buffer; |
5567 | |
5568 | driver = parent->driver; |
5569 | if (driver && driver->name) |
5570 | strlcpy(buffer, driver->name, len); |
5571 | return buffer; |
5572 | } |
5573 | |
5574 | static void __net_exit netdev_exit(struct net *net) |
5575 | { |
5576 | kfree(net->dev_name_head); |
5577 | kfree(net->dev_index_head); |
5578 | } |
5579 | |
5580 | static struct pernet_operations __net_initdata netdev_net_ops = { |
5581 | .init = netdev_init, |
5582 | .exit = netdev_exit, |
5583 | }; |
5584 | |
5585 | static void __net_exit default_device_exit(struct net *net) |
5586 | { |
5587 | struct net_device *dev; |
5588 | /* |
5589 | * Push all migratable of the network devices back to the |
5590 | * initial network namespace |
5591 | */ |
5592 | rtnl_lock(); |
5593 | restart: |
5594 | for_each_netdev(net, dev) { |
5595 | int err; |
5596 | char fb_name[IFNAMSIZ]; |
5597 | |
5598 | /* Ignore unmoveable devices (i.e. loopback) */ |
5599 | if (dev->features & NETIF_F_NETNS_LOCAL) |
5600 | continue; |
5601 | |
5602 | /* Delete virtual devices */ |
5603 | if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { |
5604 | dev->rtnl_link_ops->dellink(dev); |
5605 | goto restart; |
5606 | } |
5607 | |
5608 | /* Push remaing network devices to init_net */ |
5609 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
5610 | err = dev_change_net_namespace(dev, &init_net, fb_name); |
5611 | if (err) { |
5612 | printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", |
5613 | __func__, dev->name, err); |
5614 | BUG(); |
5615 | } |
5616 | goto restart; |
5617 | } |
5618 | rtnl_unlock(); |
5619 | } |
5620 | |
5621 | static struct pernet_operations __net_initdata default_device_ops = { |
5622 | .exit = default_device_exit, |
5623 | }; |
5624 | |
5625 | /* |
5626 | * Initialize the DEV module. At boot time this walks the device list and |
5627 | * unhooks any devices that fail to initialise (normally hardware not |
5628 | * present) and leaves us with a valid list of present and active devices. |
5629 | * |
5630 | */ |
5631 | |
5632 | /* |
5633 | * This is called single threaded during boot, so no need |
5634 | * to take the rtnl semaphore. |
5635 | */ |
5636 | static int __init net_dev_init(void) |
5637 | { |
5638 | int i, rc = -ENOMEM; |
5639 | |
5640 | BUG_ON(!dev_boot_phase); |
5641 | |
5642 | if (dev_proc_init()) |
5643 | goto out; |
5644 | |
5645 | if (netdev_kobject_init()) |
5646 | goto out; |
5647 | |
5648 | INIT_LIST_HEAD(&ptype_all); |
5649 | for (i = 0; i < PTYPE_HASH_SIZE; i++) |
5650 | INIT_LIST_HEAD(&ptype_base[i]); |
5651 | |
5652 | if (register_pernet_subsys(&netdev_net_ops)) |
5653 | goto out; |
5654 | |
5655 | /* |
5656 | * Initialise the packet receive queues. |
5657 | */ |
5658 | |
5659 | for_each_possible_cpu(i) { |
5660 | struct softnet_data *queue; |
5661 | |
5662 | queue = &per_cpu(softnet_data, i); |
5663 | skb_queue_head_init(&queue->input_pkt_queue); |
5664 | queue->completion_queue = NULL; |
5665 | INIT_LIST_HEAD(&queue->poll_list); |
5666 | |
5667 | queue->backlog.poll = process_backlog; |
5668 | queue->backlog.weight = weight_p; |
5669 | queue->backlog.gro_list = NULL; |
5670 | queue->backlog.gro_count = 0; |
5671 | } |
5672 | |
5673 | dev_boot_phase = 0; |
5674 | |
5675 | /* The loopback device is special if any other network devices |
5676 | * is present in a network namespace the loopback device must |
5677 | * be present. Since we now dynamically allocate and free the |
5678 | * loopback device ensure this invariant is maintained by |
5679 | * keeping the loopback device as the first device on the |
5680 | * list of network devices. Ensuring the loopback devices |
5681 | * is the first device that appears and the last network device |
5682 | * that disappears. |
5683 | */ |
5684 | if (register_pernet_device(&loopback_net_ops)) |
5685 | goto out; |
5686 | |
5687 | if (register_pernet_device(&default_device_ops)) |
5688 | goto out; |
5689 | |
5690 | open_softirq(NET_TX_SOFTIRQ, net_tx_action); |
5691 | open_softirq(NET_RX_SOFTIRQ, net_rx_action); |
5692 | |
5693 | hotcpu_notifier(dev_cpu_callback, 0); |
5694 | dst_init(); |
5695 | dev_mcast_init(); |
5696 | rc = 0; |
5697 | out: |
5698 | return rc; |
5699 | } |
5700 | |
5701 | subsys_initcall(net_dev_init); |
5702 | |
5703 | static int __init initialize_hashrnd(void) |
5704 | { |
5705 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); |
5706 | return 0; |
5707 | } |
5708 | |
5709 | late_initcall_sync(initialize_hashrnd); |
5710 | |
5711 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9