1 | /* |
---|
2 | * NET3 Protocol independent device support routines. |
---|
3 | * |
---|
4 | * This program is free software; you can redistribute it and/or |
---|
5 | * modify it under the terms of the GNU General Public License |
---|
6 | * as published by the Free Software Foundation; either version |
---|
7 | * 2 of the License, or (at your option) any later version. |
---|
8 | * |
---|
9 | * Derived from the non IP parts of dev.c 1.0.19 |
---|
10 | * Authors: Ross Biro |
---|
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
---|
12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> |
---|
13 | * |
---|
14 | * Additional Authors: |
---|
15 | * Florian la Roche <rzsfl@rz.uni-sb.de> |
---|
16 | * Alan Cox <gw4pts@gw4pts.ampr.org> |
---|
17 | * David Hinds <dahinds@users.sourceforge.net> |
---|
18 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
---|
19 | * Adam Sulmicki <adam@cfar.umd.edu> |
---|
20 | * Pekka Riikonen <priikone@poesidon.pspt.fi> |
---|
21 | * |
---|
22 | * Changes: |
---|
23 | * D.J. Barrow : Fixed bug where dev->refcnt gets set |
---|
24 | * to 2 if register_netdev gets called |
---|
25 | * before net_dev_init & also removed a |
---|
26 | * few lines of code in the process. |
---|
27 | * Alan Cox : device private ioctl copies fields back. |
---|
28 | * Alan Cox : Transmit queue code does relevant |
---|
29 | * stunts to keep the queue safe. |
---|
30 | * Alan Cox : Fixed double lock. |
---|
31 | * Alan Cox : Fixed promisc NULL pointer trap |
---|
32 | * ???????? : Support the full private ioctl range |
---|
33 | * Alan Cox : Moved ioctl permission check into |
---|
34 | * drivers |
---|
35 | * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI |
---|
36 | * Alan Cox : 100 backlog just doesn't cut it when |
---|
37 | * you start doing multicast video 8) |
---|
38 | * Alan Cox : Rewrote net_bh and list manager. |
---|
39 | * Alan Cox : Fix ETH_P_ALL echoback lengths. |
---|
40 | * Alan Cox : Took out transmit every packet pass |
---|
41 | * Saved a few bytes in the ioctl handler |
---|
42 | * Alan Cox : Network driver sets packet type before |
---|
43 | * calling netif_rx. Saves a function |
---|
44 | * call a packet. |
---|
45 | * Alan Cox : Hashed net_bh() |
---|
46 | * Richard Kooijman: Timestamp fixes. |
---|
47 | * Alan Cox : Wrong field in SIOCGIFDSTADDR |
---|
48 | * Alan Cox : Device lock protection. |
---|
49 | * Alan Cox : Fixed nasty side effect of device close |
---|
50 | * changes. |
---|
51 | * Rudi Cilibrasi : Pass the right thing to |
---|
52 | * set_mac_address() |
---|
53 | * Dave Miller : 32bit quantity for the device lock to |
---|
54 | * make it work out on a Sparc. |
---|
55 | * Bjorn Ekwall : Added KERNELD hack. |
---|
56 | * Alan Cox : Cleaned up the backlog initialise. |
---|
57 | * Craig Metz : SIOCGIFCONF fix if space for under |
---|
58 | * 1 device. |
---|
59 | * Thomas Bogendoerfer : Return ENODEV for dev_open, if there |
---|
60 | * is no device open function. |
---|
61 | * Andi Kleen : Fix error reporting for SIOCGIFCONF |
---|
62 | * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF |
---|
63 | * Cyrus Durgin : Cleaned for KMOD |
---|
64 | * Adam Sulmicki : Bug Fix : Network Device Unload |
---|
65 | * A network device unload needs to purge |
---|
66 | * the backlog queue. |
---|
67 | * Paul Rusty Russell : SIOCSIFNAME |
---|
68 | * Pekka Riikonen : Netdev boot-time settings code |
---|
69 | * Andrew Morton : Make unregister_netdevice wait |
---|
70 | * indefinitely on dev->refcnt |
---|
71 | * J Hadi Salim : - Backlog queue sampling |
---|
72 | * - netif_rx() feedback |
---|
73 | */ |
---|
74 | |
---|
75 | #include <asm/uaccess.h> |
---|
76 | #include <asm/system.h> |
---|
77 | #include <linux/bitops.h> |
---|
78 | #include <linux/capability.h> |
---|
79 | #include <linux/cpu.h> |
---|
80 | #include <linux/types.h> |
---|
81 | #include <linux/kernel.h> |
---|
82 | #include <linux/sched.h> |
---|
83 | #include <linux/mutex.h> |
---|
84 | #include <linux/string.h> |
---|
85 | #include <linux/mm.h> |
---|
86 | #include <linux/socket.h> |
---|
87 | #include <linux/sockios.h> |
---|
88 | #include <linux/errno.h> |
---|
89 | #include <linux/interrupt.h> |
---|
90 | #include <linux/if_ether.h> |
---|
91 | #include <linux/netdevice.h> |
---|
92 | #include <linux/etherdevice.h> |
---|
93 | #include <linux/notifier.h> |
---|
94 | #include <linux/skbuff.h> |
---|
95 | #include <net/sock.h> |
---|
96 | #include <linux/rtnetlink.h> |
---|
97 | #include <linux/proc_fs.h> |
---|
98 | #include <linux/seq_file.h> |
---|
99 | #include <linux/stat.h> |
---|
100 | #include <linux/if_bridge.h> |
---|
101 | #include <linux/divert.h> |
---|
102 | #include <net/dst.h> |
---|
103 | #include <net/pkt_sched.h> |
---|
104 | #include <net/checksum.h> |
---|
105 | #include <linux/highmem.h> |
---|
106 | #include <linux/init.h> |
---|
107 | #include <linux/kmod.h> |
---|
108 | #include <linux/module.h> |
---|
109 | #include <linux/kallsyms.h> |
---|
110 | #include <linux/netpoll.h> |
---|
111 | #include <linux/rcupdate.h> |
---|
112 | #include <linux/delay.h> |
---|
113 | #include <linux/wireless.h> |
---|
114 | #include <net/iw_handler.h> |
---|
115 | #include <asm/current.h> |
---|
116 | #include <linux/err.h> |
---|
117 | #include <linux/audit.h> |
---|
118 | #include <linux/dmaengine.h> |
---|
119 | #include <linux/err.h> |
---|
120 | #include <linux/ctype.h> |
---|
121 | |
---|
122 | #ifdef CONFIG_XEN |
---|
123 | #include <net/ip.h> |
---|
124 | #include <linux/tcp.h> |
---|
125 | #include <linux/udp.h> |
---|
126 | #endif |
---|
127 | |
---|
128 | /* |
---|
129 | * The list of packet types we will receive (as opposed to discard) |
---|
130 | * and the routines to invoke. |
---|
131 | * |
---|
132 | * Why 16. Because with 16 the only overlap we get on a hash of the |
---|
133 | * low nibble of the protocol value is RARP/SNAP/X.25. |
---|
134 | * |
---|
135 | * NOTE: That is no longer true with the addition of VLAN tags. Not |
---|
136 | * sure which should go first, but I bet it won't make much |
---|
137 | * difference if we are running VLANs. The good news is that |
---|
138 | * this protocol won't be in the list unless compiled in, so |
---|
139 | * the average user (w/out VLANs) will not be adversely affected. |
---|
140 | * --BLG |
---|
141 | * |
---|
142 | * 0800 IP |
---|
143 | * 8100 802.1Q VLAN |
---|
144 | * 0001 802.3 |
---|
145 | * 0002 AX.25 |
---|
146 | * 0004 802.2 |
---|
147 | * 8035 RARP |
---|
148 | * 0005 SNAP |
---|
149 | * 0805 X.25 |
---|
150 | * 0806 ARP |
---|
151 | * 8137 IPX |
---|
152 | * 0009 Localtalk |
---|
153 | * 86DD IPv6 |
---|
154 | */ |
---|
155 | |
---|
156 | static DEFINE_SPINLOCK(ptype_lock); |
---|
157 | static struct list_head ptype_base[16]; /* 16 way hashed list */ |
---|
158 | static struct list_head ptype_all; /* Taps */ |
---|
159 | |
---|
160 | #ifdef CONFIG_NET_DMA |
---|
161 | static struct dma_client *net_dma_client; |
---|
162 | static unsigned int net_dma_count; |
---|
163 | static spinlock_t net_dma_event_lock; |
---|
164 | #endif |
---|
165 | |
---|
166 | /* |
---|
167 | * The @dev_base list is protected by @dev_base_lock and the rtnl |
---|
168 | * semaphore. |
---|
169 | * |
---|
170 | * Pure readers hold dev_base_lock for reading. |
---|
171 | * |
---|
172 | * Writers must hold the rtnl semaphore while they loop through the |
---|
173 | * dev_base list, and hold dev_base_lock for writing when they do the |
---|
174 | * actual updates. This allows pure readers to access the list even |
---|
175 | * while a writer is preparing to update it. |
---|
176 | * |
---|
177 | * To put it another way, dev_base_lock is held for writing only to |
---|
178 | * protect against pure readers; the rtnl semaphore provides the |
---|
179 | * protection against other writers. |
---|
180 | * |
---|
181 | * See, for example usages, register_netdevice() and |
---|
182 | * unregister_netdevice(), which must be called with the rtnl |
---|
183 | * semaphore held. |
---|
184 | */ |
---|
185 | struct net_device *dev_base; |
---|
186 | static struct net_device **dev_tail = &dev_base; |
---|
187 | DEFINE_RWLOCK(dev_base_lock); |
---|
188 | |
---|
189 | EXPORT_SYMBOL(dev_base); |
---|
190 | EXPORT_SYMBOL(dev_base_lock); |
---|
191 | |
---|
192 | #define NETDEV_HASHBITS 8 |
---|
193 | static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; |
---|
194 | static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; |
---|
195 | |
---|
196 | static inline struct hlist_head *dev_name_hash(const char *name) |
---|
197 | { |
---|
198 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); |
---|
199 | return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; |
---|
200 | } |
---|
201 | |
---|
202 | static inline struct hlist_head *dev_index_hash(int ifindex) |
---|
203 | { |
---|
204 | return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; |
---|
205 | } |
---|
206 | |
---|
207 | /* |
---|
208 | * Our notifier list |
---|
209 | */ |
---|
210 | |
---|
211 | static RAW_NOTIFIER_HEAD(netdev_chain); |
---|
212 | |
---|
213 | /* |
---|
214 | * Device drivers call our routines to queue packets here. We empty the |
---|
215 | * queue in the local softnet handler. |
---|
216 | */ |
---|
217 | DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; |
---|
218 | |
---|
219 | #ifdef CONFIG_SYSFS |
---|
220 | extern int netdev_sysfs_init(void); |
---|
221 | extern int netdev_register_sysfs(struct net_device *); |
---|
222 | extern void netdev_unregister_sysfs(struct net_device *); |
---|
223 | #else |
---|
224 | #define netdev_sysfs_init() (0) |
---|
225 | #define netdev_register_sysfs(dev) (0) |
---|
226 | #define netdev_unregister_sysfs(dev) do { } while(0) |
---|
227 | #endif |
---|
228 | |
---|
229 | |
---|
230 | /******************************************************************************* |
---|
231 | |
---|
232 | Protocol management and registration routines |
---|
233 | |
---|
234 | *******************************************************************************/ |
---|
235 | |
---|
236 | /* |
---|
237 | * For efficiency |
---|
238 | */ |
---|
239 | |
---|
240 | static int netdev_nit; |
---|
241 | |
---|
242 | /* |
---|
243 | * Add a protocol ID to the list. Now that the input handler is |
---|
244 | * smarter we can dispense with all the messy stuff that used to be |
---|
245 | * here. |
---|
246 | * |
---|
247 | * BEWARE!!! Protocol handlers, mangling input packets, |
---|
248 | * MUST BE last in hash buckets and checking protocol handlers |
---|
249 | * MUST start from promiscuous ptype_all chain in net_bh. |
---|
250 | * It is true now, do not change it. |
---|
251 | * Explanation follows: if protocol handler, mangling packet, will |
---|
252 | * be the first on list, it is not able to sense, that packet |
---|
253 | * is cloned and should be copied-on-write, so that it will |
---|
254 | * change it and subsequent readers will get broken packet. |
---|
255 | * --ANK (980803) |
---|
256 | */ |
---|
257 | |
---|
258 | /** |
---|
259 | * dev_add_pack - add packet handler |
---|
260 | * @pt: packet type declaration |
---|
261 | * |
---|
262 | * Add a protocol handler to the networking stack. The passed &packet_type |
---|
263 | * is linked into kernel lists and may not be freed until it has been |
---|
264 | * removed from the kernel lists. |
---|
265 | * |
---|
266 | * This call does not sleep therefore it can not |
---|
267 | * guarantee all CPU's that are in middle of receiving packets |
---|
268 | * will see the new packet type (until the next received packet). |
---|
269 | */ |
---|
270 | |
---|
271 | void dev_add_pack(struct packet_type *pt) |
---|
272 | { |
---|
273 | int hash; |
---|
274 | |
---|
275 | spin_lock_bh(&ptype_lock); |
---|
276 | if (pt->type == htons(ETH_P_ALL)) { |
---|
277 | netdev_nit++; |
---|
278 | list_add_rcu(&pt->list, &ptype_all); |
---|
279 | } else { |
---|
280 | hash = ntohs(pt->type) & 15; |
---|
281 | list_add_rcu(&pt->list, &ptype_base[hash]); |
---|
282 | } |
---|
283 | spin_unlock_bh(&ptype_lock); |
---|
284 | } |
---|
285 | |
---|
286 | /** |
---|
287 | * __dev_remove_pack - remove packet handler |
---|
288 | * @pt: packet type declaration |
---|
289 | * |
---|
290 | * Remove a protocol handler that was previously added to the kernel |
---|
291 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed |
---|
292 | * from the kernel lists and can be freed or reused once this function |
---|
293 | * returns. |
---|
294 | * |
---|
295 | * The packet type might still be in use by receivers |
---|
296 | * and must not be freed until after all the CPU's have gone |
---|
297 | * through a quiescent state. |
---|
298 | */ |
---|
299 | void __dev_remove_pack(struct packet_type *pt) |
---|
300 | { |
---|
301 | struct list_head *head; |
---|
302 | struct packet_type *pt1; |
---|
303 | |
---|
304 | spin_lock_bh(&ptype_lock); |
---|
305 | |
---|
306 | if (pt->type == htons(ETH_P_ALL)) { |
---|
307 | netdev_nit--; |
---|
308 | head = &ptype_all; |
---|
309 | } else |
---|
310 | head = &ptype_base[ntohs(pt->type) & 15]; |
---|
311 | |
---|
312 | list_for_each_entry(pt1, head, list) { |
---|
313 | if (pt == pt1) { |
---|
314 | list_del_rcu(&pt->list); |
---|
315 | goto out; |
---|
316 | } |
---|
317 | } |
---|
318 | |
---|
319 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
---|
320 | out: |
---|
321 | spin_unlock_bh(&ptype_lock); |
---|
322 | } |
---|
323 | /** |
---|
324 | * dev_remove_pack - remove packet handler |
---|
325 | * @pt: packet type declaration |
---|
326 | * |
---|
327 | * Remove a protocol handler that was previously added to the kernel |
---|
328 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed |
---|
329 | * from the kernel lists and can be freed or reused once this function |
---|
330 | * returns. |
---|
331 | * |
---|
332 | * This call sleeps to guarantee that no CPU is looking at the packet |
---|
333 | * type after return. |
---|
334 | */ |
---|
335 | void dev_remove_pack(struct packet_type *pt) |
---|
336 | { |
---|
337 | __dev_remove_pack(pt); |
---|
338 | |
---|
339 | synchronize_net(); |
---|
340 | } |
---|
341 | |
---|
342 | /****************************************************************************** |
---|
343 | |
---|
344 | Device Boot-time Settings Routines |
---|
345 | |
---|
346 | *******************************************************************************/ |
---|
347 | |
---|
348 | /* Boot time configuration table */ |
---|
349 | static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; |
---|
350 | |
---|
351 | /** |
---|
352 | * netdev_boot_setup_add - add new setup entry |
---|
353 | * @name: name of the device |
---|
354 | * @map: configured settings for the device |
---|
355 | * |
---|
356 | * Adds new setup entry to the dev_boot_setup list. The function |
---|
357 | * returns 0 on error and 1 on success. This is a generic routine to |
---|
358 | * all netdevices. |
---|
359 | */ |
---|
360 | static int netdev_boot_setup_add(char *name, struct ifmap *map) |
---|
361 | { |
---|
362 | struct netdev_boot_setup *s; |
---|
363 | int i; |
---|
364 | |
---|
365 | s = dev_boot_setup; |
---|
366 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
---|
367 | if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { |
---|
368 | memset(s[i].name, 0, sizeof(s[i].name)); |
---|
369 | strcpy(s[i].name, name); |
---|
370 | memcpy(&s[i].map, map, sizeof(s[i].map)); |
---|
371 | break; |
---|
372 | } |
---|
373 | } |
---|
374 | |
---|
375 | return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; |
---|
376 | } |
---|
377 | |
---|
378 | /** |
---|
379 | * netdev_boot_setup_check - check boot time settings |
---|
380 | * @dev: the netdevice |
---|
381 | * |
---|
382 | * Check boot time settings for the device. |
---|
383 | * The found settings are set for the device to be used |
---|
384 | * later in the device probing. |
---|
385 | * Returns 0 if no settings found, 1 if they are. |
---|
386 | */ |
---|
387 | int netdev_boot_setup_check(struct net_device *dev) |
---|
388 | { |
---|
389 | struct netdev_boot_setup *s = dev_boot_setup; |
---|
390 | int i; |
---|
391 | |
---|
392 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
---|
393 | if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && |
---|
394 | !strncmp(dev->name, s[i].name, strlen(s[i].name))) { |
---|
395 | dev->irq = s[i].map.irq; |
---|
396 | dev->base_addr = s[i].map.base_addr; |
---|
397 | dev->mem_start = s[i].map.mem_start; |
---|
398 | dev->mem_end = s[i].map.mem_end; |
---|
399 | return 1; |
---|
400 | } |
---|
401 | } |
---|
402 | return 0; |
---|
403 | } |
---|
404 | |
---|
405 | |
---|
406 | /** |
---|
407 | * netdev_boot_base - get address from boot time settings |
---|
408 | * @prefix: prefix for network device |
---|
409 | * @unit: id for network device |
---|
410 | * |
---|
411 | * Check boot time settings for the base address of device. |
---|
412 | * The found settings are set for the device to be used |
---|
413 | * later in the device probing. |
---|
414 | * Returns 0 if no settings found. |
---|
415 | */ |
---|
416 | unsigned long netdev_boot_base(const char *prefix, int unit) |
---|
417 | { |
---|
418 | const struct netdev_boot_setup *s = dev_boot_setup; |
---|
419 | char name[IFNAMSIZ]; |
---|
420 | int i; |
---|
421 | |
---|
422 | sprintf(name, "%s%d", prefix, unit); |
---|
423 | |
---|
424 | /* |
---|
425 | * If device already registered then return base of 1 |
---|
426 | * to indicate not to probe for this interface |
---|
427 | */ |
---|
428 | if (__dev_get_by_name(name)) |
---|
429 | return 1; |
---|
430 | |
---|
431 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) |
---|
432 | if (!strcmp(name, s[i].name)) |
---|
433 | return s[i].map.base_addr; |
---|
434 | return 0; |
---|
435 | } |
---|
436 | |
---|
437 | /* |
---|
438 | * Saves at boot time configured settings for any netdevice. |
---|
439 | */ |
---|
440 | int __init netdev_boot_setup(char *str) |
---|
441 | { |
---|
442 | int ints[5]; |
---|
443 | struct ifmap map; |
---|
444 | |
---|
445 | str = get_options(str, ARRAY_SIZE(ints), ints); |
---|
446 | if (!str || !*str) |
---|
447 | return 0; |
---|
448 | |
---|
449 | /* Save settings */ |
---|
450 | memset(&map, 0, sizeof(map)); |
---|
451 | if (ints[0] > 0) |
---|
452 | map.irq = ints[1]; |
---|
453 | if (ints[0] > 1) |
---|
454 | map.base_addr = ints[2]; |
---|
455 | if (ints[0] > 2) |
---|
456 | map.mem_start = ints[3]; |
---|
457 | if (ints[0] > 3) |
---|
458 | map.mem_end = ints[4]; |
---|
459 | |
---|
460 | /* Add new entry to the list */ |
---|
461 | return netdev_boot_setup_add(str, &map); |
---|
462 | } |
---|
463 | |
---|
464 | __setup("netdev=", netdev_boot_setup); |
---|
465 | |
---|
466 | /******************************************************************************* |
---|
467 | |
---|
468 | Device Interface Subroutines |
---|
469 | |
---|
470 | *******************************************************************************/ |
---|
471 | |
---|
472 | /** |
---|
473 | * __dev_get_by_name - find a device by its name |
---|
474 | * @name: name to find |
---|
475 | * |
---|
476 | * Find an interface by name. Must be called under RTNL semaphore |
---|
477 | * or @dev_base_lock. If the name is found a pointer to the device |
---|
478 | * is returned. If the name is not found then %NULL is returned. The |
---|
479 | * reference counters are not incremented so the caller must be |
---|
480 | * careful with locks. |
---|
481 | */ |
---|
482 | |
---|
483 | struct net_device *__dev_get_by_name(const char *name) |
---|
484 | { |
---|
485 | struct hlist_node *p; |
---|
486 | |
---|
487 | hlist_for_each(p, dev_name_hash(name)) { |
---|
488 | struct net_device *dev |
---|
489 | = hlist_entry(p, struct net_device, name_hlist); |
---|
490 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
---|
491 | return dev; |
---|
492 | } |
---|
493 | return NULL; |
---|
494 | } |
---|
495 | |
---|
496 | /** |
---|
497 | * dev_get_by_name - find a device by its name |
---|
498 | * @name: name to find |
---|
499 | * |
---|
500 | * Find an interface by name. This can be called from any |
---|
501 | * context and does its own locking. The returned handle has |
---|
502 | * the usage count incremented and the caller must use dev_put() to |
---|
503 | * release it when it is no longer needed. %NULL is returned if no |
---|
504 | * matching device is found. |
---|
505 | */ |
---|
506 | |
---|
507 | struct net_device *dev_get_by_name(const char *name) |
---|
508 | { |
---|
509 | struct net_device *dev; |
---|
510 | |
---|
511 | read_lock(&dev_base_lock); |
---|
512 | dev = __dev_get_by_name(name); |
---|
513 | if (dev) |
---|
514 | dev_hold(dev); |
---|
515 | read_unlock(&dev_base_lock); |
---|
516 | return dev; |
---|
517 | } |
---|
518 | |
---|
519 | /** |
---|
520 | * __dev_get_by_index - find a device by its ifindex |
---|
521 | * @ifindex: index of device |
---|
522 | * |
---|
523 | * Search for an interface by index. Returns %NULL if the device |
---|
524 | * is not found or a pointer to the device. The device has not |
---|
525 | * had its reference counter increased so the caller must be careful |
---|
526 | * about locking. The caller must hold either the RTNL semaphore |
---|
527 | * or @dev_base_lock. |
---|
528 | */ |
---|
529 | |
---|
530 | struct net_device *__dev_get_by_index(int ifindex) |
---|
531 | { |
---|
532 | struct hlist_node *p; |
---|
533 | |
---|
534 | hlist_for_each(p, dev_index_hash(ifindex)) { |
---|
535 | struct net_device *dev |
---|
536 | = hlist_entry(p, struct net_device, index_hlist); |
---|
537 | if (dev->ifindex == ifindex) |
---|
538 | return dev; |
---|
539 | } |
---|
540 | return NULL; |
---|
541 | } |
---|
542 | |
---|
543 | |
---|
544 | /** |
---|
545 | * dev_get_by_index - find a device by its ifindex |
---|
546 | * @ifindex: index of device |
---|
547 | * |
---|
548 | * Search for an interface by index. Returns NULL if the device |
---|
549 | * is not found or a pointer to the device. The device returned has |
---|
550 | * had a reference added and the pointer is safe until the user calls |
---|
551 | * dev_put to indicate they have finished with it. |
---|
552 | */ |
---|
553 | |
---|
554 | struct net_device *dev_get_by_index(int ifindex) |
---|
555 | { |
---|
556 | struct net_device *dev; |
---|
557 | |
---|
558 | read_lock(&dev_base_lock); |
---|
559 | dev = __dev_get_by_index(ifindex); |
---|
560 | if (dev) |
---|
561 | dev_hold(dev); |
---|
562 | read_unlock(&dev_base_lock); |
---|
563 | return dev; |
---|
564 | } |
---|
565 | |
---|
566 | /** |
---|
567 | * dev_getbyhwaddr - find a device by its hardware address |
---|
568 | * @type: media type of device |
---|
569 | * @ha: hardware address |
---|
570 | * |
---|
571 | * Search for an interface by MAC address. Returns NULL if the device |
---|
572 | * is not found or a pointer to the device. The caller must hold the |
---|
573 | * rtnl semaphore. The returned device has not had its ref count increased |
---|
574 | * and the caller must therefore be careful about locking |
---|
575 | * |
---|
576 | * BUGS: |
---|
577 | * If the API was consistent this would be __dev_get_by_hwaddr |
---|
578 | */ |
---|
579 | |
---|
580 | struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) |
---|
581 | { |
---|
582 | struct net_device *dev; |
---|
583 | |
---|
584 | ASSERT_RTNL(); |
---|
585 | |
---|
586 | for (dev = dev_base; dev; dev = dev->next) |
---|
587 | if (dev->type == type && |
---|
588 | !memcmp(dev->dev_addr, ha, dev->addr_len)) |
---|
589 | break; |
---|
590 | return dev; |
---|
591 | } |
---|
592 | |
---|
593 | EXPORT_SYMBOL(dev_getbyhwaddr); |
---|
594 | |
---|
595 | struct net_device *dev_getfirstbyhwtype(unsigned short type) |
---|
596 | { |
---|
597 | struct net_device *dev; |
---|
598 | |
---|
599 | rtnl_lock(); |
---|
600 | for (dev = dev_base; dev; dev = dev->next) { |
---|
601 | if (dev->type == type) { |
---|
602 | dev_hold(dev); |
---|
603 | break; |
---|
604 | } |
---|
605 | } |
---|
606 | rtnl_unlock(); |
---|
607 | return dev; |
---|
608 | } |
---|
609 | |
---|
610 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
---|
611 | |
---|
612 | /** |
---|
613 | * dev_get_by_flags - find any device with given flags |
---|
614 | * @if_flags: IFF_* values |
---|
615 | * @mask: bitmask of bits in if_flags to check |
---|
616 | * |
---|
617 | * Search for any interface with the given flags. Returns NULL if a device |
---|
618 | * is not found or a pointer to the device. The device returned has |
---|
619 | * had a reference added and the pointer is safe until the user calls |
---|
620 | * dev_put to indicate they have finished with it. |
---|
621 | */ |
---|
622 | |
---|
623 | struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) |
---|
624 | { |
---|
625 | struct net_device *dev; |
---|
626 | |
---|
627 | read_lock(&dev_base_lock); |
---|
628 | for (dev = dev_base; dev != NULL; dev = dev->next) { |
---|
629 | if (((dev->flags ^ if_flags) & mask) == 0) { |
---|
630 | dev_hold(dev); |
---|
631 | break; |
---|
632 | } |
---|
633 | } |
---|
634 | read_unlock(&dev_base_lock); |
---|
635 | return dev; |
---|
636 | } |
---|
637 | |
---|
638 | /** |
---|
639 | * dev_valid_name - check if name is okay for network device |
---|
640 | * @name: name string |
---|
641 | * |
---|
642 | * Network device names need to be valid file names to |
---|
643 | * to allow sysfs to work. We also disallow any kind of |
---|
644 | * whitespace. |
---|
645 | */ |
---|
646 | int dev_valid_name(const char *name) |
---|
647 | { |
---|
648 | if (*name == '\0') |
---|
649 | return 0; |
---|
650 | if (!strcmp(name, ".") || !strcmp(name, "..")) |
---|
651 | return 0; |
---|
652 | |
---|
653 | while (*name) { |
---|
654 | if (*name == '/' || isspace(*name)) |
---|
655 | return 0; |
---|
656 | name++; |
---|
657 | } |
---|
658 | return 1; |
---|
659 | } |
---|
660 | |
---|
661 | /** |
---|
662 | * dev_alloc_name - allocate a name for a device |
---|
663 | * @dev: device |
---|
664 | * @name: name format string |
---|
665 | * |
---|
666 | * Passed a format string - eg "lt%d" it will try and find a suitable |
---|
667 | * id. It scans list of devices to build up a free map, then chooses |
---|
668 | * the first empty slot. The caller must hold the dev_base or rtnl lock |
---|
669 | * while allocating the name and adding the device in order to avoid |
---|
670 | * duplicates. |
---|
671 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). |
---|
672 | * Returns the number of the unit assigned or a negative errno code. |
---|
673 | */ |
---|
674 | |
---|
675 | int dev_alloc_name(struct net_device *dev, const char *name) |
---|
676 | { |
---|
677 | int i = 0; |
---|
678 | char buf[IFNAMSIZ]; |
---|
679 | const char *p; |
---|
680 | const int max_netdevices = 8*PAGE_SIZE; |
---|
681 | long *inuse; |
---|
682 | struct net_device *d; |
---|
683 | |
---|
684 | p = strnchr(name, IFNAMSIZ-1, '%'); |
---|
685 | if (p) { |
---|
686 | /* |
---|
687 | * Verify the string as this thing may have come from |
---|
688 | * the user. There must be either one "%d" and no other "%" |
---|
689 | * characters. |
---|
690 | */ |
---|
691 | if (p[1] != 'd' || strchr(p + 2, '%')) |
---|
692 | return -EINVAL; |
---|
693 | |
---|
694 | /* Use one page as a bit array of possible slots */ |
---|
695 | inuse = (long *) get_zeroed_page(GFP_ATOMIC); |
---|
696 | if (!inuse) |
---|
697 | return -ENOMEM; |
---|
698 | |
---|
699 | for (d = dev_base; d; d = d->next) { |
---|
700 | if (!sscanf(d->name, name, &i)) |
---|
701 | continue; |
---|
702 | if (i < 0 || i >= max_netdevices) |
---|
703 | continue; |
---|
704 | |
---|
705 | /* avoid cases where sscanf is not exact inverse of printf */ |
---|
706 | snprintf(buf, sizeof(buf), name, i); |
---|
707 | if (!strncmp(buf, d->name, IFNAMSIZ)) |
---|
708 | set_bit(i, inuse); |
---|
709 | } |
---|
710 | |
---|
711 | i = find_first_zero_bit(inuse, max_netdevices); |
---|
712 | free_page((unsigned long) inuse); |
---|
713 | } |
---|
714 | |
---|
715 | snprintf(buf, sizeof(buf), name, i); |
---|
716 | if (!__dev_get_by_name(buf)) { |
---|
717 | strlcpy(dev->name, buf, IFNAMSIZ); |
---|
718 | return i; |
---|
719 | } |
---|
720 | |
---|
721 | /* It is possible to run out of possible slots |
---|
722 | * when the name is long and there isn't enough space left |
---|
723 | * for the digits, or if all bits are used. |
---|
724 | */ |
---|
725 | return -ENFILE; |
---|
726 | } |
---|
727 | |
---|
728 | |
---|
729 | /** |
---|
730 | * dev_change_name - change name of a device |
---|
731 | * @dev: device |
---|
732 | * @newname: name (or format string) must be at least IFNAMSIZ |
---|
733 | * |
---|
734 | * Change name of a device, can pass format strings "eth%d". |
---|
735 | * for wildcarding. |
---|
736 | */ |
---|
737 | int dev_change_name(struct net_device *dev, char *newname) |
---|
738 | { |
---|
739 | int err = 0; |
---|
740 | |
---|
741 | ASSERT_RTNL(); |
---|
742 | |
---|
743 | if (dev->flags & IFF_UP) |
---|
744 | return -EBUSY; |
---|
745 | |
---|
746 | if (!dev_valid_name(newname)) |
---|
747 | return -EINVAL; |
---|
748 | |
---|
749 | if (strchr(newname, '%')) { |
---|
750 | err = dev_alloc_name(dev, newname); |
---|
751 | if (err < 0) |
---|
752 | return err; |
---|
753 | strcpy(newname, dev->name); |
---|
754 | } |
---|
755 | else if (__dev_get_by_name(newname)) |
---|
756 | return -EEXIST; |
---|
757 | else |
---|
758 | strlcpy(dev->name, newname, IFNAMSIZ); |
---|
759 | |
---|
760 | err = class_device_rename(&dev->class_dev, dev->name); |
---|
761 | if (!err) { |
---|
762 | hlist_del(&dev->name_hlist); |
---|
763 | hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); |
---|
764 | raw_notifier_call_chain(&netdev_chain, |
---|
765 | NETDEV_CHANGENAME, dev); |
---|
766 | } |
---|
767 | |
---|
768 | return err; |
---|
769 | } |
---|
770 | |
---|
771 | /** |
---|
772 | * netdev_features_change - device changes features |
---|
773 | * @dev: device to cause notification |
---|
774 | * |
---|
775 | * Called to indicate a device has changed features. |
---|
776 | */ |
---|
777 | void netdev_features_change(struct net_device *dev) |
---|
778 | { |
---|
779 | raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); |
---|
780 | } |
---|
781 | EXPORT_SYMBOL(netdev_features_change); |
---|
782 | |
---|
783 | /** |
---|
784 | * netdev_state_change - device changes state |
---|
785 | * @dev: device to cause notification |
---|
786 | * |
---|
787 | * Called to indicate a device has changed state. This function calls |
---|
788 | * the notifier chains for netdev_chain and sends a NEWLINK message |
---|
789 | * to the routing socket. |
---|
790 | */ |
---|
791 | void netdev_state_change(struct net_device *dev) |
---|
792 | { |
---|
793 | if (dev->flags & IFF_UP) { |
---|
794 | raw_notifier_call_chain(&netdev_chain, |
---|
795 | NETDEV_CHANGE, dev); |
---|
796 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); |
---|
797 | } |
---|
798 | } |
---|
799 | |
---|
800 | /** |
---|
801 | * dev_load - load a network module |
---|
802 | * @name: name of interface |
---|
803 | * |
---|
804 | * If a network interface is not present and the process has suitable |
---|
805 | * privileges this function loads the module. If module loading is not |
---|
806 | * available in this kernel then it becomes a nop. |
---|
807 | */ |
---|
808 | |
---|
809 | void dev_load(const char *name) |
---|
810 | { |
---|
811 | struct net_device *dev; |
---|
812 | |
---|
813 | read_lock(&dev_base_lock); |
---|
814 | dev = __dev_get_by_name(name); |
---|
815 | read_unlock(&dev_base_lock); |
---|
816 | |
---|
817 | if (!dev && capable(CAP_SYS_MODULE)) |
---|
818 | request_module("%s", name); |
---|
819 | } |
---|
820 | |
---|
821 | static int default_rebuild_header(struct sk_buff *skb) |
---|
822 | { |
---|
823 | printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", |
---|
824 | skb->dev ? skb->dev->name : "NULL!!!"); |
---|
825 | kfree_skb(skb); |
---|
826 | return 1; |
---|
827 | } |
---|
828 | |
---|
829 | |
---|
830 | /** |
---|
831 | * dev_open - prepare an interface for use. |
---|
832 | * @dev: device to open |
---|
833 | * |
---|
834 | * Takes a device from down to up state. The device's private open |
---|
835 | * function is invoked and then the multicast lists are loaded. Finally |
---|
836 | * the device is moved into the up state and a %NETDEV_UP message is |
---|
837 | * sent to the netdev notifier chain. |
---|
838 | * |
---|
839 | * Calling this function on an active interface is a nop. On a failure |
---|
840 | * a negative errno code is returned. |
---|
841 | */ |
---|
842 | int dev_open(struct net_device *dev) |
---|
843 | { |
---|
844 | int ret = 0; |
---|
845 | |
---|
846 | /* |
---|
847 | * Is it already up? |
---|
848 | */ |
---|
849 | |
---|
850 | if (dev->flags & IFF_UP) |
---|
851 | return 0; |
---|
852 | |
---|
853 | /* |
---|
854 | * Is it even present? |
---|
855 | */ |
---|
856 | if (!netif_device_present(dev)) |
---|
857 | return -ENODEV; |
---|
858 | |
---|
859 | /* |
---|
860 | * Call device private open method |
---|
861 | */ |
---|
862 | set_bit(__LINK_STATE_START, &dev->state); |
---|
863 | if (dev->open) { |
---|
864 | ret = dev->open(dev); |
---|
865 | if (ret) |
---|
866 | clear_bit(__LINK_STATE_START, &dev->state); |
---|
867 | } |
---|
868 | |
---|
869 | /* |
---|
870 | * If it went open OK then: |
---|
871 | */ |
---|
872 | |
---|
873 | if (!ret) { |
---|
874 | /* |
---|
875 | * Set the flags. |
---|
876 | */ |
---|
877 | dev->flags |= IFF_UP; |
---|
878 | |
---|
879 | /* |
---|
880 | * Initialize multicasting status |
---|
881 | */ |
---|
882 | dev_mc_upload(dev); |
---|
883 | |
---|
884 | /* |
---|
885 | * Wakeup transmit queue engine |
---|
886 | */ |
---|
887 | dev_activate(dev); |
---|
888 | |
---|
889 | /* |
---|
890 | * ... and announce new interface. |
---|
891 | */ |
---|
892 | raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); |
---|
893 | } |
---|
894 | return ret; |
---|
895 | } |
---|
896 | |
---|
897 | /** |
---|
898 | * dev_close - shutdown an interface. |
---|
899 | * @dev: device to shutdown |
---|
900 | * |
---|
901 | * This function moves an active device into down state. A |
---|
902 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device |
---|
903 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier |
---|
904 | * chain. |
---|
905 | */ |
---|
906 | int dev_close(struct net_device *dev) |
---|
907 | { |
---|
908 | if (!(dev->flags & IFF_UP)) |
---|
909 | return 0; |
---|
910 | |
---|
911 | /* |
---|
912 | * Tell people we are going down, so that they can |
---|
913 | * prepare to death, when device is still operating. |
---|
914 | */ |
---|
915 | raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); |
---|
916 | |
---|
917 | dev_deactivate(dev); |
---|
918 | |
---|
919 | clear_bit(__LINK_STATE_START, &dev->state); |
---|
920 | |
---|
921 | /* Synchronize to scheduled poll. We cannot touch poll list, |
---|
922 | * it can be even on different cpu. So just clear netif_running(), |
---|
923 | * and wait when poll really will happen. Actually, the best place |
---|
924 | * for this is inside dev->stop() after device stopped its irq |
---|
925 | * engine, but this requires more changes in devices. */ |
---|
926 | |
---|
927 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ |
---|
928 | while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { |
---|
929 | /* No hurry. */ |
---|
930 | msleep(1); |
---|
931 | } |
---|
932 | |
---|
933 | /* |
---|
934 | * Call the device specific close. This cannot fail. |
---|
935 | * Only if device is UP |
---|
936 | * |
---|
937 | * We allow it to be called even after a DETACH hot-plug |
---|
938 | * event. |
---|
939 | */ |
---|
940 | if (dev->stop) |
---|
941 | dev->stop(dev); |
---|
942 | |
---|
943 | /* |
---|
944 | * Device is now down. |
---|
945 | */ |
---|
946 | |
---|
947 | dev->flags &= ~IFF_UP; |
---|
948 | |
---|
949 | /* |
---|
950 | * Tell people we are down |
---|
951 | */ |
---|
952 | raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); |
---|
953 | |
---|
954 | return 0; |
---|
955 | } |
---|
956 | |
---|
957 | |
---|
958 | /* |
---|
959 | * Device change register/unregister. These are not inline or static |
---|
960 | * as we export them to the world. |
---|
961 | */ |
---|
962 | |
---|
963 | /** |
---|
964 | * register_netdevice_notifier - register a network notifier block |
---|
965 | * @nb: notifier |
---|
966 | * |
---|
967 | * Register a notifier to be called when network device events occur. |
---|
968 | * The notifier passed is linked into the kernel structures and must |
---|
969 | * not be reused until it has been unregistered. A negative errno code |
---|
970 | * is returned on a failure. |
---|
971 | * |
---|
972 | * When registered all registration and up events are replayed |
---|
973 | * to the new notifier to allow device to have a race free |
---|
974 | * view of the network device list. |
---|
975 | */ |
---|
976 | |
---|
977 | int register_netdevice_notifier(struct notifier_block *nb) |
---|
978 | { |
---|
979 | struct net_device *dev; |
---|
980 | int err; |
---|
981 | |
---|
982 | rtnl_lock(); |
---|
983 | err = raw_notifier_chain_register(&netdev_chain, nb); |
---|
984 | if (!err) { |
---|
985 | for (dev = dev_base; dev; dev = dev->next) { |
---|
986 | nb->notifier_call(nb, NETDEV_REGISTER, dev); |
---|
987 | |
---|
988 | if (dev->flags & IFF_UP) |
---|
989 | nb->notifier_call(nb, NETDEV_UP, dev); |
---|
990 | } |
---|
991 | } |
---|
992 | rtnl_unlock(); |
---|
993 | return err; |
---|
994 | } |
---|
995 | |
---|
996 | /** |
---|
997 | * unregister_netdevice_notifier - unregister a network notifier block |
---|
998 | * @nb: notifier |
---|
999 | * |
---|
1000 | * Unregister a notifier previously registered by |
---|
1001 | * register_netdevice_notifier(). The notifier is unlinked into the |
---|
1002 | * kernel structures and may then be reused. A negative errno code |
---|
1003 | * is returned on a failure. |
---|
1004 | */ |
---|
1005 | |
---|
1006 | int unregister_netdevice_notifier(struct notifier_block *nb) |
---|
1007 | { |
---|
1008 | int err; |
---|
1009 | |
---|
1010 | rtnl_lock(); |
---|
1011 | err = raw_notifier_chain_unregister(&netdev_chain, nb); |
---|
1012 | rtnl_unlock(); |
---|
1013 | return err; |
---|
1014 | } |
---|
1015 | |
---|
1016 | /** |
---|
1017 | * call_netdevice_notifiers - call all network notifier blocks |
---|
1018 | * @val: value passed unmodified to notifier function |
---|
1019 | * @v: pointer passed unmodified to notifier function |
---|
1020 | * |
---|
1021 | * Call all network notifier blocks. Parameters and return value |
---|
1022 | * are as for raw_notifier_call_chain(). |
---|
1023 | */ |
---|
1024 | |
---|
1025 | int call_netdevice_notifiers(unsigned long val, void *v) |
---|
1026 | { |
---|
1027 | return raw_notifier_call_chain(&netdev_chain, val, v); |
---|
1028 | } |
---|
1029 | |
---|
1030 | /* When > 0 there are consumers of rx skb time stamps */ |
---|
1031 | static atomic_t netstamp_needed = ATOMIC_INIT(0); |
---|
1032 | |
---|
1033 | void net_enable_timestamp(void) |
---|
1034 | { |
---|
1035 | atomic_inc(&netstamp_needed); |
---|
1036 | } |
---|
1037 | |
---|
1038 | void net_disable_timestamp(void) |
---|
1039 | { |
---|
1040 | atomic_dec(&netstamp_needed); |
---|
1041 | } |
---|
1042 | |
---|
1043 | void __net_timestamp(struct sk_buff *skb) |
---|
1044 | { |
---|
1045 | struct timeval tv; |
---|
1046 | |
---|
1047 | do_gettimeofday(&tv); |
---|
1048 | skb_set_timestamp(skb, &tv); |
---|
1049 | } |
---|
1050 | EXPORT_SYMBOL(__net_timestamp); |
---|
1051 | |
---|
1052 | static inline void net_timestamp(struct sk_buff *skb) |
---|
1053 | { |
---|
1054 | if (atomic_read(&netstamp_needed)) |
---|
1055 | __net_timestamp(skb); |
---|
1056 | else { |
---|
1057 | skb->tstamp.off_sec = 0; |
---|
1058 | skb->tstamp.off_usec = 0; |
---|
1059 | } |
---|
1060 | } |
---|
1061 | |
---|
1062 | /* |
---|
1063 | * Support routine. Sends outgoing frames to any network |
---|
1064 | * taps currently in use. |
---|
1065 | */ |
---|
1066 | |
---|
1067 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
---|
1068 | { |
---|
1069 | struct packet_type *ptype; |
---|
1070 | |
---|
1071 | net_timestamp(skb); |
---|
1072 | |
---|
1073 | rcu_read_lock(); |
---|
1074 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
---|
1075 | /* Never send packets back to the socket |
---|
1076 | * they originated from - MvS (miquels@drinkel.ow.org) |
---|
1077 | */ |
---|
1078 | if ((ptype->dev == dev || !ptype->dev) && |
---|
1079 | (ptype->af_packet_priv == NULL || |
---|
1080 | (struct sock *)ptype->af_packet_priv != skb->sk)) { |
---|
1081 | struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); |
---|
1082 | if (!skb2) |
---|
1083 | break; |
---|
1084 | |
---|
1085 | /* skb->nh should be correctly |
---|
1086 | set by sender, so that the second statement is |
---|
1087 | just protection against buggy protocols. |
---|
1088 | */ |
---|
1089 | skb2->mac.raw = skb2->data; |
---|
1090 | |
---|
1091 | if (skb2->nh.raw < skb2->data || |
---|
1092 | skb2->nh.raw > skb2->tail) { |
---|
1093 | if (net_ratelimit()) |
---|
1094 | printk(KERN_CRIT "protocol %04x is " |
---|
1095 | "buggy, dev %s\n", |
---|
1096 | skb2->protocol, dev->name); |
---|
1097 | skb2->nh.raw = skb2->data; |
---|
1098 | } |
---|
1099 | |
---|
1100 | skb2->h.raw = skb2->nh.raw; |
---|
1101 | skb2->pkt_type = PACKET_OUTGOING; |
---|
1102 | ptype->func(skb2, skb->dev, ptype, skb->dev); |
---|
1103 | } |
---|
1104 | } |
---|
1105 | rcu_read_unlock(); |
---|
1106 | } |
---|
1107 | |
---|
1108 | |
---|
1109 | void __netif_schedule(struct net_device *dev) |
---|
1110 | { |
---|
1111 | if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { |
---|
1112 | unsigned long flags; |
---|
1113 | struct softnet_data *sd; |
---|
1114 | |
---|
1115 | local_irq_save(flags); |
---|
1116 | sd = &__get_cpu_var(softnet_data); |
---|
1117 | dev->next_sched = sd->output_queue; |
---|
1118 | sd->output_queue = dev; |
---|
1119 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
---|
1120 | local_irq_restore(flags); |
---|
1121 | } |
---|
1122 | } |
---|
1123 | EXPORT_SYMBOL(__netif_schedule); |
---|
1124 | |
---|
1125 | void __netif_rx_schedule(struct net_device *dev) |
---|
1126 | { |
---|
1127 | unsigned long flags; |
---|
1128 | |
---|
1129 | local_irq_save(flags); |
---|
1130 | dev_hold(dev); |
---|
1131 | list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); |
---|
1132 | if (dev->quota < 0) |
---|
1133 | dev->quota += dev->weight; |
---|
1134 | else |
---|
1135 | dev->quota = dev->weight; |
---|
1136 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
---|
1137 | local_irq_restore(flags); |
---|
1138 | } |
---|
1139 | EXPORT_SYMBOL(__netif_rx_schedule); |
---|
1140 | |
---|
1141 | void dev_kfree_skb_any(struct sk_buff *skb) |
---|
1142 | { |
---|
1143 | if (in_irq() || irqs_disabled()) |
---|
1144 | dev_kfree_skb_irq(skb); |
---|
1145 | else |
---|
1146 | dev_kfree_skb(skb); |
---|
1147 | } |
---|
1148 | EXPORT_SYMBOL(dev_kfree_skb_any); |
---|
1149 | |
---|
1150 | |
---|
1151 | /* Hot-plugging. */ |
---|
1152 | void netif_device_detach(struct net_device *dev) |
---|
1153 | { |
---|
1154 | if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && |
---|
1155 | netif_running(dev)) { |
---|
1156 | netif_stop_queue(dev); |
---|
1157 | } |
---|
1158 | } |
---|
1159 | EXPORT_SYMBOL(netif_device_detach); |
---|
1160 | |
---|
1161 | void netif_device_attach(struct net_device *dev) |
---|
1162 | { |
---|
1163 | if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && |
---|
1164 | netif_running(dev)) { |
---|
1165 | netif_wake_queue(dev); |
---|
1166 | __netdev_watchdog_up(dev); |
---|
1167 | } |
---|
1168 | } |
---|
1169 | EXPORT_SYMBOL(netif_device_attach); |
---|
1170 | |
---|
1171 | |
---|
1172 | /* |
---|
1173 | * Invalidate hardware checksum when packet is to be mangled, and |
---|
1174 | * complete checksum manually on outgoing path. |
---|
1175 | */ |
---|
1176 | int skb_checksum_help(struct sk_buff *skb, int inward) |
---|
1177 | { |
---|
1178 | unsigned int csum; |
---|
1179 | int ret = 0, offset = skb->h.raw - skb->data; |
---|
1180 | |
---|
1181 | if (inward) |
---|
1182 | goto out_set_summed; |
---|
1183 | |
---|
1184 | if (unlikely(skb_shinfo(skb)->gso_size)) { |
---|
1185 | /* Let GSO fix up the checksum. */ |
---|
1186 | goto out_set_summed; |
---|
1187 | } |
---|
1188 | |
---|
1189 | if (skb_cloned(skb)) { |
---|
1190 | ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
---|
1191 | if (ret) |
---|
1192 | goto out; |
---|
1193 | } |
---|
1194 | |
---|
1195 | BUG_ON(offset > (int)skb->len); |
---|
1196 | csum = skb_checksum(skb, offset, skb->len-offset, 0); |
---|
1197 | |
---|
1198 | offset = skb->tail - skb->h.raw; |
---|
1199 | BUG_ON(offset <= 0); |
---|
1200 | BUG_ON(skb->csum + 2 > offset); |
---|
1201 | |
---|
1202 | *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); |
---|
1203 | |
---|
1204 | out_set_summed: |
---|
1205 | skb->ip_summed = CHECKSUM_NONE; |
---|
1206 | out: |
---|
1207 | return ret; |
---|
1208 | } |
---|
1209 | |
---|
1210 | /** |
---|
1211 | * skb_gso_segment - Perform segmentation on skb. |
---|
1212 | * @skb: buffer to segment |
---|
1213 | * @features: features for the output path (see dev->features) |
---|
1214 | * |
---|
1215 | * This function segments the given skb and returns a list of segments. |
---|
1216 | * |
---|
1217 | * It may return NULL if the skb requires no segmentation. This is |
---|
1218 | * only possible when GSO is used for verifying header integrity. |
---|
1219 | */ |
---|
1220 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) |
---|
1221 | { |
---|
1222 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
---|
1223 | struct packet_type *ptype; |
---|
1224 | int type = skb->protocol; |
---|
1225 | int err; |
---|
1226 | |
---|
1227 | BUG_ON(skb_shinfo(skb)->frag_list); |
---|
1228 | |
---|
1229 | skb->mac.raw = skb->data; |
---|
1230 | skb->mac_len = skb->nh.raw - skb->data; |
---|
1231 | __skb_pull(skb, skb->mac_len); |
---|
1232 | |
---|
1233 | if (unlikely(skb->ip_summed != CHECKSUM_HW)) { |
---|
1234 | if (skb_header_cloned(skb) && |
---|
1235 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
---|
1236 | return ERR_PTR(err); |
---|
1237 | } |
---|
1238 | |
---|
1239 | rcu_read_lock(); |
---|
1240 | list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { |
---|
1241 | if (ptype->type == type && !ptype->dev && ptype->gso_segment) { |
---|
1242 | if (unlikely(skb->ip_summed != CHECKSUM_HW)) { |
---|
1243 | err = ptype->gso_send_check(skb); |
---|
1244 | segs = ERR_PTR(err); |
---|
1245 | if (err || skb_gso_ok(skb, features)) |
---|
1246 | break; |
---|
1247 | __skb_push(skb, skb->data - skb->nh.raw); |
---|
1248 | } |
---|
1249 | segs = ptype->gso_segment(skb, features); |
---|
1250 | break; |
---|
1251 | } |
---|
1252 | } |
---|
1253 | rcu_read_unlock(); |
---|
1254 | |
---|
1255 | __skb_push(skb, skb->data - skb->mac.raw); |
---|
1256 | |
---|
1257 | return segs; |
---|
1258 | } |
---|
1259 | |
---|
1260 | EXPORT_SYMBOL(skb_gso_segment); |
---|
1261 | |
---|
1262 | /* Take action when hardware reception checksum errors are detected. */ |
---|
1263 | #ifdef CONFIG_BUG |
---|
1264 | void netdev_rx_csum_fault(struct net_device *dev) |
---|
1265 | { |
---|
1266 | if (net_ratelimit()) { |
---|
1267 | printk(KERN_ERR "%s: hw csum failure.\n", |
---|
1268 | dev ? dev->name : "<unknown>"); |
---|
1269 | dump_stack(); |
---|
1270 | } |
---|
1271 | } |
---|
1272 | EXPORT_SYMBOL(netdev_rx_csum_fault); |
---|
1273 | #endif |
---|
1274 | |
---|
1275 | /* Actually, we should eliminate this check as soon as we know, that: |
---|
1276 | * 1. IOMMU is present and allows to map all the memory. |
---|
1277 | * 2. No high memory really exists on this machine. |
---|
1278 | */ |
---|
1279 | |
---|
1280 | static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
---|
1281 | { |
---|
1282 | #ifdef CONFIG_HIGHMEM |
---|
1283 | int i; |
---|
1284 | |
---|
1285 | if (dev->features & NETIF_F_HIGHDMA) |
---|
1286 | return 0; |
---|
1287 | |
---|
1288 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) |
---|
1289 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) |
---|
1290 | return 1; |
---|
1291 | |
---|
1292 | #endif |
---|
1293 | return 0; |
---|
1294 | } |
---|
1295 | |
---|
1296 | struct dev_gso_cb { |
---|
1297 | void (*destructor)(struct sk_buff *skb); |
---|
1298 | }; |
---|
1299 | |
---|
1300 | #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) |
---|
1301 | |
---|
1302 | static void dev_gso_skb_destructor(struct sk_buff *skb) |
---|
1303 | { |
---|
1304 | struct dev_gso_cb *cb; |
---|
1305 | |
---|
1306 | do { |
---|
1307 | struct sk_buff *nskb = skb->next; |
---|
1308 | |
---|
1309 | skb->next = nskb->next; |
---|
1310 | nskb->next = NULL; |
---|
1311 | kfree_skb(nskb); |
---|
1312 | } while (skb->next); |
---|
1313 | |
---|
1314 | cb = DEV_GSO_CB(skb); |
---|
1315 | if (cb->destructor) |
---|
1316 | cb->destructor(skb); |
---|
1317 | } |
---|
1318 | |
---|
1319 | /** |
---|
1320 | * dev_gso_segment - Perform emulated hardware segmentation on skb. |
---|
1321 | * @skb: buffer to segment |
---|
1322 | * |
---|
1323 | * This function segments the given skb and stores the list of segments |
---|
1324 | * in skb->next. |
---|
1325 | */ |
---|
1326 | static int dev_gso_segment(struct sk_buff *skb) |
---|
1327 | { |
---|
1328 | struct net_device *dev = skb->dev; |
---|
1329 | struct sk_buff *segs; |
---|
1330 | int features = dev->features & ~(illegal_highdma(dev, skb) ? |
---|
1331 | NETIF_F_SG : 0); |
---|
1332 | |
---|
1333 | segs = skb_gso_segment(skb, features); |
---|
1334 | |
---|
1335 | /* Verifying header integrity only. */ |
---|
1336 | if (!segs) |
---|
1337 | return 0; |
---|
1338 | |
---|
1339 | if (unlikely(IS_ERR(segs))) |
---|
1340 | return PTR_ERR(segs); |
---|
1341 | |
---|
1342 | skb->next = segs; |
---|
1343 | DEV_GSO_CB(skb)->destructor = skb->destructor; |
---|
1344 | skb->destructor = dev_gso_skb_destructor; |
---|
1345 | |
---|
1346 | return 0; |
---|
1347 | } |
---|
1348 | |
---|
1349 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) |
---|
1350 | { |
---|
1351 | if (likely(!skb->next)) { |
---|
1352 | if (netdev_nit) |
---|
1353 | dev_queue_xmit_nit(skb, dev); |
---|
1354 | |
---|
1355 | if (netif_needs_gso(dev, skb)) { |
---|
1356 | if (unlikely(dev_gso_segment(skb))) |
---|
1357 | goto out_kfree_skb; |
---|
1358 | if (skb->next) |
---|
1359 | goto gso; |
---|
1360 | } |
---|
1361 | |
---|
1362 | return dev->hard_start_xmit(skb, dev); |
---|
1363 | } |
---|
1364 | |
---|
1365 | gso: |
---|
1366 | do { |
---|
1367 | struct sk_buff *nskb = skb->next; |
---|
1368 | int rc; |
---|
1369 | |
---|
1370 | skb->next = nskb->next; |
---|
1371 | nskb->next = NULL; |
---|
1372 | rc = dev->hard_start_xmit(nskb, dev); |
---|
1373 | if (unlikely(rc)) { |
---|
1374 | nskb->next = skb->next; |
---|
1375 | skb->next = nskb; |
---|
1376 | return rc; |
---|
1377 | } |
---|
1378 | if (unlikely(netif_queue_stopped(dev) && skb->next)) |
---|
1379 | return NETDEV_TX_BUSY; |
---|
1380 | } while (skb->next); |
---|
1381 | |
---|
1382 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
---|
1383 | |
---|
1384 | out_kfree_skb: |
---|
1385 | kfree_skb(skb); |
---|
1386 | return 0; |
---|
1387 | } |
---|
1388 | |
---|
1389 | #define HARD_TX_LOCK(dev, cpu) { \ |
---|
1390 | if ((dev->features & NETIF_F_LLTX) == 0) { \ |
---|
1391 | netif_tx_lock(dev); \ |
---|
1392 | } \ |
---|
1393 | } |
---|
1394 | |
---|
1395 | #define HARD_TX_UNLOCK(dev) { \ |
---|
1396 | if ((dev->features & NETIF_F_LLTX) == 0) { \ |
---|
1397 | netif_tx_unlock(dev); \ |
---|
1398 | } \ |
---|
1399 | } |
---|
1400 | |
---|
1401 | #ifdef CONFIG_XEN |
---|
1402 | inline int skb_checksum_setup(struct sk_buff *skb) |
---|
1403 | { |
---|
1404 | if (skb->proto_csum_blank) { |
---|
1405 | if (skb->protocol != htons(ETH_P_IP)) |
---|
1406 | goto out; |
---|
1407 | skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; |
---|
1408 | if (skb->h.raw >= skb->tail) |
---|
1409 | goto out; |
---|
1410 | switch (skb->nh.iph->protocol) { |
---|
1411 | case IPPROTO_TCP: |
---|
1412 | skb->csum = offsetof(struct tcphdr, check); |
---|
1413 | break; |
---|
1414 | case IPPROTO_UDP: |
---|
1415 | skb->csum = offsetof(struct udphdr, check); |
---|
1416 | break; |
---|
1417 | default: |
---|
1418 | if (net_ratelimit()) |
---|
1419 | printk(KERN_ERR "Attempting to checksum a non-" |
---|
1420 | "TCP/UDP packet, dropping a protocol" |
---|
1421 | " %d packet", skb->nh.iph->protocol); |
---|
1422 | goto out; |
---|
1423 | } |
---|
1424 | if ((skb->h.raw + skb->csum + 2) > skb->tail) |
---|
1425 | goto out; |
---|
1426 | skb->ip_summed = CHECKSUM_HW; |
---|
1427 | skb->proto_csum_blank = 0; |
---|
1428 | } |
---|
1429 | return 0; |
---|
1430 | out: |
---|
1431 | return -EPROTO; |
---|
1432 | } |
---|
1433 | #else |
---|
1434 | inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } |
---|
1435 | #endif |
---|
1436 | |
---|
1437 | |
---|
1438 | /** |
---|
1439 | * dev_queue_xmit - transmit a buffer |
---|
1440 | * @skb: buffer to transmit |
---|
1441 | * |
---|
1442 | * Queue a buffer for transmission to a network device. The caller must |
---|
1443 | * have set the device and priority and built the buffer before calling |
---|
1444 | * this function. The function can be called from an interrupt. |
---|
1445 | * |
---|
1446 | * A negative errno code is returned on a failure. A success does not |
---|
1447 | * guarantee the frame will be transmitted as it may be dropped due |
---|
1448 | * to congestion or traffic shaping. |
---|
1449 | * |
---|
1450 | * ----------------------------------------------------------------------------------- |
---|
1451 | * I notice this method can also return errors from the queue disciplines, |
---|
1452 | * including NET_XMIT_DROP, which is a positive value. So, errors can also |
---|
1453 | * be positive. |
---|
1454 | * |
---|
1455 | * Regardless of the return value, the skb is consumed, so it is currently |
---|
1456 | * difficult to retry a send to this method. (You can bump the ref count |
---|
1457 | * before sending to hold a reference for retry if you are careful.) |
---|
1458 | * |
---|
1459 | * When calling this method, interrupts MUST be enabled. This is because |
---|
1460 | * the BH enable code must have IRQs enabled so that it will not deadlock. |
---|
1461 | * --BLG |
---|
1462 | */ |
---|
1463 | |
---|
1464 | int dev_queue_xmit(struct sk_buff *skb) |
---|
1465 | { |
---|
1466 | struct net_device *dev = skb->dev; |
---|
1467 | struct Qdisc *q; |
---|
1468 | int rc = -ENOMEM; |
---|
1469 | |
---|
1470 | /* If a checksum-deferred packet is forwarded to a device that needs a |
---|
1471 | * checksum, correct the pointers and force checksumming. |
---|
1472 | */ |
---|
1473 | if (skb_checksum_setup(skb)) |
---|
1474 | goto out_kfree_skb; |
---|
1475 | |
---|
1476 | /* GSO will handle the following emulations directly. */ |
---|
1477 | if (netif_needs_gso(dev, skb)) |
---|
1478 | goto gso; |
---|
1479 | |
---|
1480 | if (skb_shinfo(skb)->frag_list && |
---|
1481 | !(dev->features & NETIF_F_FRAGLIST) && |
---|
1482 | __skb_linearize(skb)) |
---|
1483 | goto out_kfree_skb; |
---|
1484 | |
---|
1485 | /* Fragmented skb is linearized if device does not support SG, |
---|
1486 | * or if at least one of fragments is in highmem and device |
---|
1487 | * does not support DMA from it. |
---|
1488 | */ |
---|
1489 | if (skb_shinfo(skb)->nr_frags && |
---|
1490 | (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && |
---|
1491 | __skb_linearize(skb)) |
---|
1492 | goto out_kfree_skb; |
---|
1493 | |
---|
1494 | /* If packet is not checksummed and device does not support |
---|
1495 | * checksumming for this protocol, complete checksumming here. |
---|
1496 | */ |
---|
1497 | if (skb->ip_summed == CHECKSUM_HW && |
---|
1498 | (!(dev->features & NETIF_F_GEN_CSUM) && |
---|
1499 | (!(dev->features & NETIF_F_IP_CSUM) || |
---|
1500 | skb->protocol != htons(ETH_P_IP)))) |
---|
1501 | if (skb_checksum_help(skb, 0)) |
---|
1502 | goto out_kfree_skb; |
---|
1503 | |
---|
1504 | gso: |
---|
1505 | spin_lock_prefetch(&dev->queue_lock); |
---|
1506 | |
---|
1507 | /* Disable soft irqs for various locks below. Also |
---|
1508 | * stops preemption for RCU. |
---|
1509 | */ |
---|
1510 | rcu_read_lock_bh(); |
---|
1511 | |
---|
1512 | /* Updates of qdisc are serialized by queue_lock. |
---|
1513 | * The struct Qdisc which is pointed to by qdisc is now a |
---|
1514 | * rcu structure - it may be accessed without acquiring |
---|
1515 | * a lock (but the structure may be stale.) The freeing of the |
---|
1516 | * qdisc will be deferred until it's known that there are no |
---|
1517 | * more references to it. |
---|
1518 | * |
---|
1519 | * If the qdisc has an enqueue function, we still need to |
---|
1520 | * hold the queue_lock before calling it, since queue_lock |
---|
1521 | * also serializes access to the device queue. |
---|
1522 | */ |
---|
1523 | |
---|
1524 | q = rcu_dereference(dev->qdisc); |
---|
1525 | #ifdef CONFIG_NET_CLS_ACT |
---|
1526 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); |
---|
1527 | #endif |
---|
1528 | if (q->enqueue) { |
---|
1529 | /* Grab device queue */ |
---|
1530 | spin_lock(&dev->queue_lock); |
---|
1531 | |
---|
1532 | rc = q->enqueue(skb, q); |
---|
1533 | |
---|
1534 | qdisc_run(dev); |
---|
1535 | |
---|
1536 | spin_unlock(&dev->queue_lock); |
---|
1537 | rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; |
---|
1538 | goto out; |
---|
1539 | } |
---|
1540 | |
---|
1541 | /* The device has no queue. Common case for software devices: |
---|
1542 | loopback, all the sorts of tunnels... |
---|
1543 | |
---|
1544 | Really, it is unlikely that netif_tx_lock protection is necessary |
---|
1545 | here. (f.e. loopback and IP tunnels are clean ignoring statistics |
---|
1546 | counters.) |
---|
1547 | However, it is possible, that they rely on protection |
---|
1548 | made by us here. |
---|
1549 | |
---|
1550 | Check this and shot the lock. It is not prone from deadlocks. |
---|
1551 | Either shot noqueue qdisc, it is even simpler 8) |
---|
1552 | */ |
---|
1553 | if (dev->flags & IFF_UP) { |
---|
1554 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
---|
1555 | |
---|
1556 | if (dev->xmit_lock_owner != cpu) { |
---|
1557 | |
---|
1558 | HARD_TX_LOCK(dev, cpu); |
---|
1559 | |
---|
1560 | if (!netif_queue_stopped(dev)) { |
---|
1561 | rc = 0; |
---|
1562 | if (!dev_hard_start_xmit(skb, dev)) { |
---|
1563 | HARD_TX_UNLOCK(dev); |
---|
1564 | goto out; |
---|
1565 | } |
---|
1566 | } |
---|
1567 | HARD_TX_UNLOCK(dev); |
---|
1568 | if (net_ratelimit()) |
---|
1569 | printk(KERN_CRIT "Virtual device %s asks to " |
---|
1570 | "queue packet!\n", dev->name); |
---|
1571 | } else { |
---|
1572 | /* Recursion is detected! It is possible, |
---|
1573 | * unfortunately */ |
---|
1574 | if (net_ratelimit()) |
---|
1575 | printk(KERN_CRIT "Dead loop on virtual device " |
---|
1576 | "%s, fix it urgently!\n", dev->name); |
---|
1577 | } |
---|
1578 | } |
---|
1579 | |
---|
1580 | rc = -ENETDOWN; |
---|
1581 | rcu_read_unlock_bh(); |
---|
1582 | |
---|
1583 | out_kfree_skb: |
---|
1584 | kfree_skb(skb); |
---|
1585 | return rc; |
---|
1586 | out: |
---|
1587 | rcu_read_unlock_bh(); |
---|
1588 | return rc; |
---|
1589 | } |
---|
1590 | |
---|
1591 | |
---|
1592 | /*======================================================================= |
---|
1593 | Receiver routines |
---|
1594 | =======================================================================*/ |
---|
1595 | |
---|
1596 | int netdev_max_backlog = 1000; |
---|
1597 | int netdev_budget = 300; |
---|
1598 | int weight_p = 64; /* old backlog weight */ |
---|
1599 | |
---|
1600 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; |
---|
1601 | |
---|
1602 | |
---|
1603 | /** |
---|
1604 | * netif_rx - post buffer to the network code |
---|
1605 | * @skb: buffer to post |
---|
1606 | * |
---|
1607 | * This function receives a packet from a device driver and queues it for |
---|
1608 | * the upper (protocol) levels to process. It always succeeds. The buffer |
---|
1609 | * may be dropped during processing for congestion control or by the |
---|
1610 | * protocol layers. |
---|
1611 | * |
---|
1612 | * return values: |
---|
1613 | * NET_RX_SUCCESS (no congestion) |
---|
1614 | * NET_RX_CN_LOW (low congestion) |
---|
1615 | * NET_RX_CN_MOD (moderate congestion) |
---|
1616 | * NET_RX_CN_HIGH (high congestion) |
---|
1617 | * NET_RX_DROP (packet was dropped) |
---|
1618 | * |
---|
1619 | */ |
---|
1620 | |
---|
1621 | int netif_rx(struct sk_buff *skb) |
---|
1622 | { |
---|
1623 | struct softnet_data *queue; |
---|
1624 | unsigned long flags; |
---|
1625 | |
---|
1626 | /* if netpoll wants it, pretend we never saw it */ |
---|
1627 | if (netpoll_rx(skb)) |
---|
1628 | return NET_RX_DROP; |
---|
1629 | |
---|
1630 | if (!skb->tstamp.off_sec) |
---|
1631 | net_timestamp(skb); |
---|
1632 | |
---|
1633 | /* |
---|
1634 | * The code is rearranged so that the path is the most |
---|
1635 | * short when CPU is congested, but is still operating. |
---|
1636 | */ |
---|
1637 | local_irq_save(flags); |
---|
1638 | queue = &__get_cpu_var(softnet_data); |
---|
1639 | |
---|
1640 | __get_cpu_var(netdev_rx_stat).total++; |
---|
1641 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { |
---|
1642 | if (queue->input_pkt_queue.qlen) { |
---|
1643 | enqueue: |
---|
1644 | dev_hold(skb->dev); |
---|
1645 | __skb_queue_tail(&queue->input_pkt_queue, skb); |
---|
1646 | local_irq_restore(flags); |
---|
1647 | return NET_RX_SUCCESS; |
---|
1648 | } |
---|
1649 | |
---|
1650 | netif_rx_schedule(&queue->backlog_dev); |
---|
1651 | goto enqueue; |
---|
1652 | } |
---|
1653 | |
---|
1654 | __get_cpu_var(netdev_rx_stat).dropped++; |
---|
1655 | local_irq_restore(flags); |
---|
1656 | |
---|
1657 | kfree_skb(skb); |
---|
1658 | return NET_RX_DROP; |
---|
1659 | } |
---|
1660 | |
---|
1661 | int netif_rx_ni(struct sk_buff *skb) |
---|
1662 | { |
---|
1663 | int err; |
---|
1664 | |
---|
1665 | preempt_disable(); |
---|
1666 | err = netif_rx(skb); |
---|
1667 | if (local_softirq_pending()) |
---|
1668 | do_softirq(); |
---|
1669 | preempt_enable(); |
---|
1670 | |
---|
1671 | return err; |
---|
1672 | } |
---|
1673 | |
---|
1674 | EXPORT_SYMBOL(netif_rx_ni); |
---|
1675 | |
---|
1676 | static inline struct net_device *skb_bond(struct sk_buff *skb) |
---|
1677 | { |
---|
1678 | struct net_device *dev = skb->dev; |
---|
1679 | |
---|
1680 | if (dev->master) { |
---|
1681 | if (skb_bond_should_drop(skb)) { |
---|
1682 | kfree_skb(skb); |
---|
1683 | return NULL; |
---|
1684 | } |
---|
1685 | skb->dev = dev->master; |
---|
1686 | } |
---|
1687 | |
---|
1688 | return dev; |
---|
1689 | } |
---|
1690 | |
---|
1691 | static void net_tx_action(struct softirq_action *h) |
---|
1692 | { |
---|
1693 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
---|
1694 | |
---|
1695 | if (sd->completion_queue) { |
---|
1696 | struct sk_buff *clist; |
---|
1697 | |
---|
1698 | local_irq_disable(); |
---|
1699 | clist = sd->completion_queue; |
---|
1700 | sd->completion_queue = NULL; |
---|
1701 | local_irq_enable(); |
---|
1702 | |
---|
1703 | while (clist) { |
---|
1704 | struct sk_buff *skb = clist; |
---|
1705 | clist = clist->next; |
---|
1706 | |
---|
1707 | BUG_TRAP(!atomic_read(&skb->users)); |
---|
1708 | __kfree_skb(skb); |
---|
1709 | } |
---|
1710 | } |
---|
1711 | |
---|
1712 | if (sd->output_queue) { |
---|
1713 | struct net_device *head; |
---|
1714 | |
---|
1715 | local_irq_disable(); |
---|
1716 | head = sd->output_queue; |
---|
1717 | sd->output_queue = NULL; |
---|
1718 | local_irq_enable(); |
---|
1719 | |
---|
1720 | while (head) { |
---|
1721 | struct net_device *dev = head; |
---|
1722 | head = head->next_sched; |
---|
1723 | |
---|
1724 | smp_mb__before_clear_bit(); |
---|
1725 | clear_bit(__LINK_STATE_SCHED, &dev->state); |
---|
1726 | |
---|
1727 | if (spin_trylock(&dev->queue_lock)) { |
---|
1728 | qdisc_run(dev); |
---|
1729 | spin_unlock(&dev->queue_lock); |
---|
1730 | } else { |
---|
1731 | netif_schedule(dev); |
---|
1732 | } |
---|
1733 | } |
---|
1734 | } |
---|
1735 | } |
---|
1736 | |
---|
1737 | static __inline__ int deliver_skb(struct sk_buff *skb, |
---|
1738 | struct packet_type *pt_prev, |
---|
1739 | struct net_device *orig_dev) |
---|
1740 | { |
---|
1741 | atomic_inc(&skb->users); |
---|
1742 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
---|
1743 | } |
---|
1744 | |
---|
1745 | #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) |
---|
1746 | int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); |
---|
1747 | struct net_bridge; |
---|
1748 | struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, |
---|
1749 | unsigned char *addr); |
---|
1750 | void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); |
---|
1751 | |
---|
1752 | static __inline__ int handle_bridge(struct sk_buff **pskb, |
---|
1753 | struct packet_type **pt_prev, int *ret, |
---|
1754 | struct net_device *orig_dev) |
---|
1755 | { |
---|
1756 | struct net_bridge_port *port; |
---|
1757 | |
---|
1758 | if ((*pskb)->pkt_type == PACKET_LOOPBACK || |
---|
1759 | (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) |
---|
1760 | return 0; |
---|
1761 | |
---|
1762 | if (*pt_prev) { |
---|
1763 | *ret = deliver_skb(*pskb, *pt_prev, orig_dev); |
---|
1764 | *pt_prev = NULL; |
---|
1765 | } |
---|
1766 | |
---|
1767 | return br_handle_frame_hook(port, pskb); |
---|
1768 | } |
---|
1769 | #else |
---|
1770 | #define handle_bridge(skb, pt_prev, ret, orig_dev) (0) |
---|
1771 | #endif |
---|
1772 | |
---|
1773 | #ifdef CONFIG_NET_CLS_ACT |
---|
1774 | /* TODO: Maybe we should just force sch_ingress to be compiled in |
---|
1775 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
---|
1776 | * a compare and 2 stores extra right now if we dont have it on |
---|
1777 | * but have CONFIG_NET_CLS_ACT |
---|
1778 | * NOTE: This doesnt stop any functionality; if you dont have |
---|
1779 | * the ingress scheduler, you just cant add policies on ingress. |
---|
1780 | * |
---|
1781 | */ |
---|
1782 | static int ing_filter(struct sk_buff *skb) |
---|
1783 | { |
---|
1784 | struct Qdisc *q; |
---|
1785 | struct net_device *dev = skb->dev; |
---|
1786 | int result = TC_ACT_OK; |
---|
1787 | |
---|
1788 | if (dev->qdisc_ingress) { |
---|
1789 | __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); |
---|
1790 | if (MAX_RED_LOOP < ttl++) { |
---|
1791 | printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n", |
---|
1792 | skb->input_dev->name, skb->dev->name); |
---|
1793 | return TC_ACT_SHOT; |
---|
1794 | } |
---|
1795 | |
---|
1796 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); |
---|
1797 | |
---|
1798 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); |
---|
1799 | |
---|
1800 | spin_lock(&dev->ingress_lock); |
---|
1801 | if ((q = dev->qdisc_ingress) != NULL) |
---|
1802 | result = q->enqueue(skb, q); |
---|
1803 | spin_unlock(&dev->ingress_lock); |
---|
1804 | |
---|
1805 | } |
---|
1806 | |
---|
1807 | return result; |
---|
1808 | } |
---|
1809 | #endif |
---|
1810 | |
---|
1811 | int netif_receive_skb(struct sk_buff *skb) |
---|
1812 | { |
---|
1813 | struct packet_type *ptype, *pt_prev; |
---|
1814 | struct net_device *orig_dev; |
---|
1815 | int ret = NET_RX_DROP; |
---|
1816 | unsigned short type; |
---|
1817 | |
---|
1818 | /* if we've gotten here through NAPI, check netpoll */ |
---|
1819 | if (skb->dev->poll && netpoll_rx(skb)) |
---|
1820 | return NET_RX_DROP; |
---|
1821 | |
---|
1822 | if (!skb->tstamp.off_sec) |
---|
1823 | net_timestamp(skb); |
---|
1824 | |
---|
1825 | if (!skb->input_dev) |
---|
1826 | skb->input_dev = skb->dev; |
---|
1827 | |
---|
1828 | orig_dev = skb_bond(skb); |
---|
1829 | |
---|
1830 | if (!orig_dev) |
---|
1831 | return NET_RX_DROP; |
---|
1832 | |
---|
1833 | __get_cpu_var(netdev_rx_stat).total++; |
---|
1834 | |
---|
1835 | skb->h.raw = skb->nh.raw = skb->data; |
---|
1836 | skb->mac_len = skb->nh.raw - skb->mac.raw; |
---|
1837 | |
---|
1838 | pt_prev = NULL; |
---|
1839 | |
---|
1840 | rcu_read_lock(); |
---|
1841 | |
---|
1842 | #ifdef CONFIG_NET_CLS_ACT |
---|
1843 | if (skb->tc_verd & TC_NCLS) { |
---|
1844 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
---|
1845 | goto ncls; |
---|
1846 | } |
---|
1847 | #endif |
---|
1848 | |
---|
1849 | #ifdef CONFIG_XEN |
---|
1850 | switch (skb->ip_summed) { |
---|
1851 | case CHECKSUM_UNNECESSARY: |
---|
1852 | skb->proto_data_valid = 1; |
---|
1853 | break; |
---|
1854 | case CHECKSUM_HW: |
---|
1855 | /* XXX Implement me. */ |
---|
1856 | default: |
---|
1857 | skb->proto_data_valid = 0; |
---|
1858 | break; |
---|
1859 | } |
---|
1860 | #endif |
---|
1861 | |
---|
1862 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
---|
1863 | if (!ptype->dev || ptype->dev == skb->dev) { |
---|
1864 | if (pt_prev) |
---|
1865 | ret = deliver_skb(skb, pt_prev, orig_dev); |
---|
1866 | pt_prev = ptype; |
---|
1867 | } |
---|
1868 | } |
---|
1869 | |
---|
1870 | #ifdef CONFIG_NET_CLS_ACT |
---|
1871 | if (pt_prev) { |
---|
1872 | ret = deliver_skb(skb, pt_prev, orig_dev); |
---|
1873 | pt_prev = NULL; /* noone else should process this after*/ |
---|
1874 | } else { |
---|
1875 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); |
---|
1876 | } |
---|
1877 | |
---|
1878 | ret = ing_filter(skb); |
---|
1879 | |
---|
1880 | if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { |
---|
1881 | kfree_skb(skb); |
---|
1882 | goto out; |
---|
1883 | } |
---|
1884 | |
---|
1885 | skb->tc_verd = 0; |
---|
1886 | ncls: |
---|
1887 | #endif |
---|
1888 | |
---|
1889 | handle_diverter(skb); |
---|
1890 | |
---|
1891 | if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) |
---|
1892 | goto out; |
---|
1893 | |
---|
1894 | type = skb->protocol; |
---|
1895 | list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { |
---|
1896 | if (ptype->type == type && |
---|
1897 | (!ptype->dev || ptype->dev == skb->dev)) { |
---|
1898 | if (pt_prev) |
---|
1899 | ret = deliver_skb(skb, pt_prev, orig_dev); |
---|
1900 | pt_prev = ptype; |
---|
1901 | } |
---|
1902 | } |
---|
1903 | |
---|
1904 | if (pt_prev) { |
---|
1905 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
---|
1906 | } else { |
---|
1907 | kfree_skb(skb); |
---|
1908 | /* Jamal, now you will not able to escape explaining |
---|
1909 | * me how you were going to use this. :-) |
---|
1910 | */ |
---|
1911 | ret = NET_RX_DROP; |
---|
1912 | } |
---|
1913 | |
---|
1914 | out: |
---|
1915 | rcu_read_unlock(); |
---|
1916 | return ret; |
---|
1917 | } |
---|
1918 | |
---|
1919 | static int process_backlog(struct net_device *backlog_dev, int *budget) |
---|
1920 | { |
---|
1921 | int work = 0; |
---|
1922 | int quota = min(backlog_dev->quota, *budget); |
---|
1923 | struct softnet_data *queue = &__get_cpu_var(softnet_data); |
---|
1924 | unsigned long start_time = jiffies; |
---|
1925 | |
---|
1926 | backlog_dev->weight = weight_p; |
---|
1927 | for (;;) { |
---|
1928 | struct sk_buff *skb; |
---|
1929 | struct net_device *dev; |
---|
1930 | |
---|
1931 | local_irq_disable(); |
---|
1932 | skb = __skb_dequeue(&queue->input_pkt_queue); |
---|
1933 | if (!skb) |
---|
1934 | goto job_done; |
---|
1935 | local_irq_enable(); |
---|
1936 | |
---|
1937 | dev = skb->dev; |
---|
1938 | |
---|
1939 | netif_receive_skb(skb); |
---|
1940 | |
---|
1941 | dev_put(dev); |
---|
1942 | |
---|
1943 | work++; |
---|
1944 | |
---|
1945 | if (work >= quota || jiffies - start_time > 1) |
---|
1946 | break; |
---|
1947 | |
---|
1948 | } |
---|
1949 | |
---|
1950 | backlog_dev->quota -= work; |
---|
1951 | *budget -= work; |
---|
1952 | return -1; |
---|
1953 | |
---|
1954 | job_done: |
---|
1955 | backlog_dev->quota -= work; |
---|
1956 | *budget -= work; |
---|
1957 | |
---|
1958 | list_del(&backlog_dev->poll_list); |
---|
1959 | smp_mb__before_clear_bit(); |
---|
1960 | netif_poll_enable(backlog_dev); |
---|
1961 | |
---|
1962 | local_irq_enable(); |
---|
1963 | return 0; |
---|
1964 | } |
---|
1965 | |
---|
1966 | static void net_rx_action(struct softirq_action *h) |
---|
1967 | { |
---|
1968 | struct softnet_data *queue = &__get_cpu_var(softnet_data); |
---|
1969 | unsigned long start_time = jiffies; |
---|
1970 | int budget = netdev_budget; |
---|
1971 | void *have; |
---|
1972 | |
---|
1973 | local_irq_disable(); |
---|
1974 | |
---|
1975 | while (!list_empty(&queue->poll_list)) { |
---|
1976 | struct net_device *dev; |
---|
1977 | |
---|
1978 | if (budget <= 0 || jiffies - start_time > 1) |
---|
1979 | goto softnet_break; |
---|
1980 | |
---|
1981 | local_irq_enable(); |
---|
1982 | |
---|
1983 | dev = list_entry(queue->poll_list.next, |
---|
1984 | struct net_device, poll_list); |
---|
1985 | have = netpoll_poll_lock(dev); |
---|
1986 | |
---|
1987 | if (dev->quota <= 0 || dev->poll(dev, &budget)) { |
---|
1988 | netpoll_poll_unlock(have); |
---|
1989 | local_irq_disable(); |
---|
1990 | list_move_tail(&dev->poll_list, &queue->poll_list); |
---|
1991 | if (dev->quota < 0) |
---|
1992 | dev->quota += dev->weight; |
---|
1993 | else |
---|
1994 | dev->quota = dev->weight; |
---|
1995 | } else { |
---|
1996 | netpoll_poll_unlock(have); |
---|
1997 | dev_put(dev); |
---|
1998 | local_irq_disable(); |
---|
1999 | } |
---|
2000 | } |
---|
2001 | out: |
---|
2002 | #ifdef CONFIG_NET_DMA |
---|
2003 | /* |
---|
2004 | * There may not be any more sk_buffs coming right now, so push |
---|
2005 | * any pending DMA copies to hardware |
---|
2006 | */ |
---|
2007 | if (net_dma_client) { |
---|
2008 | struct dma_chan *chan; |
---|
2009 | rcu_read_lock(); |
---|
2010 | list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) |
---|
2011 | dma_async_memcpy_issue_pending(chan); |
---|
2012 | rcu_read_unlock(); |
---|
2013 | } |
---|
2014 | #endif |
---|
2015 | local_irq_enable(); |
---|
2016 | return; |
---|
2017 | |
---|
2018 | softnet_break: |
---|
2019 | __get_cpu_var(netdev_rx_stat).time_squeeze++; |
---|
2020 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
---|
2021 | goto out; |
---|
2022 | } |
---|
2023 | |
---|
2024 | static gifconf_func_t * gifconf_list [NPROTO]; |
---|
2025 | |
---|
2026 | /** |
---|
2027 | * register_gifconf - register a SIOCGIF handler |
---|
2028 | * @family: Address family |
---|
2029 | * @gifconf: Function handler |
---|
2030 | * |
---|
2031 | * Register protocol dependent address dumping routines. The handler |
---|
2032 | * that is passed must not be freed or reused until it has been replaced |
---|
2033 | * by another handler. |
---|
2034 | */ |
---|
2035 | int register_gifconf(unsigned int family, gifconf_func_t * gifconf) |
---|
2036 | { |
---|
2037 | if (family >= NPROTO) |
---|
2038 | return -EINVAL; |
---|
2039 | gifconf_list[family] = gifconf; |
---|
2040 | return 0; |
---|
2041 | } |
---|
2042 | |
---|
2043 | |
---|
2044 | /* |
---|
2045 | * Map an interface index to its name (SIOCGIFNAME) |
---|
2046 | */ |
---|
2047 | |
---|
2048 | /* |
---|
2049 | * We need this ioctl for efficient implementation of the |
---|
2050 | * if_indextoname() function required by the IPv6 API. Without |
---|
2051 | * it, we would have to search all the interfaces to find a |
---|
2052 | * match. --pb |
---|
2053 | */ |
---|
2054 | |
---|
2055 | static int dev_ifname(struct ifreq __user *arg) |
---|
2056 | { |
---|
2057 | struct net_device *dev; |
---|
2058 | struct ifreq ifr; |
---|
2059 | |
---|
2060 | /* |
---|
2061 | * Fetch the caller's info block. |
---|
2062 | */ |
---|
2063 | |
---|
2064 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
---|
2065 | return -EFAULT; |
---|
2066 | |
---|
2067 | read_lock(&dev_base_lock); |
---|
2068 | dev = __dev_get_by_index(ifr.ifr_ifindex); |
---|
2069 | if (!dev) { |
---|
2070 | read_unlock(&dev_base_lock); |
---|
2071 | return -ENODEV; |
---|
2072 | } |
---|
2073 | |
---|
2074 | strcpy(ifr.ifr_name, dev->name); |
---|
2075 | read_unlock(&dev_base_lock); |
---|
2076 | |
---|
2077 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
---|
2078 | return -EFAULT; |
---|
2079 | return 0; |
---|
2080 | } |
---|
2081 | |
---|
2082 | /* |
---|
2083 | * Perform a SIOCGIFCONF call. This structure will change |
---|
2084 | * size eventually, and there is nothing I can do about it. |
---|
2085 | * Thus we will need a 'compatibility mode'. |
---|
2086 | */ |
---|
2087 | |
---|
2088 | static int dev_ifconf(char __user *arg) |
---|
2089 | { |
---|
2090 | struct ifconf ifc; |
---|
2091 | struct net_device *dev; |
---|
2092 | char __user *pos; |
---|
2093 | int len; |
---|
2094 | int total; |
---|
2095 | int i; |
---|
2096 | |
---|
2097 | /* |
---|
2098 | * Fetch the caller's info block. |
---|
2099 | */ |
---|
2100 | |
---|
2101 | if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) |
---|
2102 | return -EFAULT; |
---|
2103 | |
---|
2104 | pos = ifc.ifc_buf; |
---|
2105 | len = ifc.ifc_len; |
---|
2106 | |
---|
2107 | /* |
---|
2108 | * Loop over the interfaces, and write an info block for each. |
---|
2109 | */ |
---|
2110 | |
---|
2111 | total = 0; |
---|
2112 | for (dev = dev_base; dev; dev = dev->next) { |
---|
2113 | for (i = 0; i < NPROTO; i++) { |
---|
2114 | if (gifconf_list[i]) { |
---|
2115 | int done; |
---|
2116 | if (!pos) |
---|
2117 | done = gifconf_list[i](dev, NULL, 0); |
---|
2118 | else |
---|
2119 | done = gifconf_list[i](dev, pos + total, |
---|
2120 | len - total); |
---|
2121 | if (done < 0) |
---|
2122 | return -EFAULT; |
---|
2123 | total += done; |
---|
2124 | } |
---|
2125 | } |
---|
2126 | } |
---|
2127 | |
---|
2128 | /* |
---|
2129 | * All done. Write the updated control block back to the caller. |
---|
2130 | */ |
---|
2131 | ifc.ifc_len = total; |
---|
2132 | |
---|
2133 | /* |
---|
2134 | * Both BSD and Solaris return 0 here, so we do too. |
---|
2135 | */ |
---|
2136 | return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; |
---|
2137 | } |
---|
2138 | |
---|
2139 | #ifdef CONFIG_PROC_FS |
---|
2140 | /* |
---|
2141 | * This is invoked by the /proc filesystem handler to display a device |
---|
2142 | * in detail. |
---|
2143 | */ |
---|
2144 | static __inline__ struct net_device *dev_get_idx(loff_t pos) |
---|
2145 | { |
---|
2146 | struct net_device *dev; |
---|
2147 | loff_t i; |
---|
2148 | |
---|
2149 | for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); |
---|
2150 | |
---|
2151 | return i == pos ? dev : NULL; |
---|
2152 | } |
---|
2153 | |
---|
2154 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) |
---|
2155 | { |
---|
2156 | read_lock(&dev_base_lock); |
---|
2157 | return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; |
---|
2158 | } |
---|
2159 | |
---|
2160 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
---|
2161 | { |
---|
2162 | ++*pos; |
---|
2163 | return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; |
---|
2164 | } |
---|
2165 | |
---|
2166 | void dev_seq_stop(struct seq_file *seq, void *v) |
---|
2167 | { |
---|
2168 | read_unlock(&dev_base_lock); |
---|
2169 | } |
---|
2170 | |
---|
2171 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) |
---|
2172 | { |
---|
2173 | if (dev->get_stats) { |
---|
2174 | struct net_device_stats *stats = dev->get_stats(dev); |
---|
2175 | |
---|
2176 | seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " |
---|
2177 | "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", |
---|
2178 | dev->name, stats->rx_bytes, stats->rx_packets, |
---|
2179 | stats->rx_errors, |
---|
2180 | stats->rx_dropped + stats->rx_missed_errors, |
---|
2181 | stats->rx_fifo_errors, |
---|
2182 | stats->rx_length_errors + stats->rx_over_errors + |
---|
2183 | stats->rx_crc_errors + stats->rx_frame_errors, |
---|
2184 | stats->rx_compressed, stats->multicast, |
---|
2185 | stats->tx_bytes, stats->tx_packets, |
---|
2186 | stats->tx_errors, stats->tx_dropped, |
---|
2187 | stats->tx_fifo_errors, stats->collisions, |
---|
2188 | stats->tx_carrier_errors + |
---|
2189 | stats->tx_aborted_errors + |
---|
2190 | stats->tx_window_errors + |
---|
2191 | stats->tx_heartbeat_errors, |
---|
2192 | stats->tx_compressed); |
---|
2193 | } else |
---|
2194 | seq_printf(seq, "%6s: No statistics available.\n", dev->name); |
---|
2195 | } |
---|
2196 | |
---|
2197 | /* |
---|
2198 | * Called from the PROCfs module. This now uses the new arbitrary sized |
---|
2199 | * /proc/net interface to create /proc/net/dev |
---|
2200 | */ |
---|
2201 | static int dev_seq_show(struct seq_file *seq, void *v) |
---|
2202 | { |
---|
2203 | if (v == SEQ_START_TOKEN) |
---|
2204 | seq_puts(seq, "Inter-| Receive " |
---|
2205 | " | Transmit\n" |
---|
2206 | " face |bytes packets errs drop fifo frame " |
---|
2207 | "compressed multicast|bytes packets errs " |
---|
2208 | "drop fifo colls carrier compressed\n"); |
---|
2209 | else |
---|
2210 | dev_seq_printf_stats(seq, v); |
---|
2211 | return 0; |
---|
2212 | } |
---|
2213 | |
---|
2214 | static struct netif_rx_stats *softnet_get_online(loff_t *pos) |
---|
2215 | { |
---|
2216 | struct netif_rx_stats *rc = NULL; |
---|
2217 | |
---|
2218 | while (*pos < NR_CPUS) |
---|
2219 | if (cpu_online(*pos)) { |
---|
2220 | rc = &per_cpu(netdev_rx_stat, *pos); |
---|
2221 | break; |
---|
2222 | } else |
---|
2223 | ++*pos; |
---|
2224 | return rc; |
---|
2225 | } |
---|
2226 | |
---|
2227 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) |
---|
2228 | { |
---|
2229 | return softnet_get_online(pos); |
---|
2230 | } |
---|
2231 | |
---|
2232 | static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
---|
2233 | { |
---|
2234 | ++*pos; |
---|
2235 | return softnet_get_online(pos); |
---|
2236 | } |
---|
2237 | |
---|
2238 | static void softnet_seq_stop(struct seq_file *seq, void *v) |
---|
2239 | { |
---|
2240 | } |
---|
2241 | |
---|
2242 | static int softnet_seq_show(struct seq_file *seq, void *v) |
---|
2243 | { |
---|
2244 | struct netif_rx_stats *s = v; |
---|
2245 | |
---|
2246 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
---|
2247 | s->total, s->dropped, s->time_squeeze, 0, |
---|
2248 | 0, 0, 0, 0, /* was fastroute */ |
---|
2249 | s->cpu_collision ); |
---|
2250 | return 0; |
---|
2251 | } |
---|
2252 | |
---|
2253 | static struct seq_operations dev_seq_ops = { |
---|
2254 | .start = dev_seq_start, |
---|
2255 | .next = dev_seq_next, |
---|
2256 | .stop = dev_seq_stop, |
---|
2257 | .show = dev_seq_show, |
---|
2258 | }; |
---|
2259 | |
---|
2260 | static int dev_seq_open(struct inode *inode, struct file *file) |
---|
2261 | { |
---|
2262 | return seq_open(file, &dev_seq_ops); |
---|
2263 | } |
---|
2264 | |
---|
2265 | static struct file_operations dev_seq_fops = { |
---|
2266 | .owner = THIS_MODULE, |
---|
2267 | .open = dev_seq_open, |
---|
2268 | .read = seq_read, |
---|
2269 | .llseek = seq_lseek, |
---|
2270 | .release = seq_release, |
---|
2271 | }; |
---|
2272 | |
---|
2273 | static struct seq_operations softnet_seq_ops = { |
---|
2274 | .start = softnet_seq_start, |
---|
2275 | .next = softnet_seq_next, |
---|
2276 | .stop = softnet_seq_stop, |
---|
2277 | .show = softnet_seq_show, |
---|
2278 | }; |
---|
2279 | |
---|
2280 | static int softnet_seq_open(struct inode *inode, struct file *file) |
---|
2281 | { |
---|
2282 | return seq_open(file, &softnet_seq_ops); |
---|
2283 | } |
---|
2284 | |
---|
2285 | static struct file_operations softnet_seq_fops = { |
---|
2286 | .owner = THIS_MODULE, |
---|
2287 | .open = softnet_seq_open, |
---|
2288 | .read = seq_read, |
---|
2289 | .llseek = seq_lseek, |
---|
2290 | .release = seq_release, |
---|
2291 | }; |
---|
2292 | |
---|
2293 | #ifdef CONFIG_WIRELESS_EXT |
---|
2294 | extern int wireless_proc_init(void); |
---|
2295 | #else |
---|
2296 | #define wireless_proc_init() 0 |
---|
2297 | #endif |
---|
2298 | |
---|
2299 | static int __init dev_proc_init(void) |
---|
2300 | { |
---|
2301 | int rc = -ENOMEM; |
---|
2302 | |
---|
2303 | if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) |
---|
2304 | goto out; |
---|
2305 | if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) |
---|
2306 | goto out_dev; |
---|
2307 | if (wireless_proc_init()) |
---|
2308 | goto out_softnet; |
---|
2309 | rc = 0; |
---|
2310 | out: |
---|
2311 | return rc; |
---|
2312 | out_softnet: |
---|
2313 | proc_net_remove("softnet_stat"); |
---|
2314 | out_dev: |
---|
2315 | proc_net_remove("dev"); |
---|
2316 | goto out; |
---|
2317 | } |
---|
2318 | #else |
---|
2319 | #define dev_proc_init() 0 |
---|
2320 | #endif /* CONFIG_PROC_FS */ |
---|
2321 | |
---|
2322 | |
---|
2323 | /** |
---|
2324 | * netdev_set_master - set up master/slave pair |
---|
2325 | * @slave: slave device |
---|
2326 | * @master: new master device |
---|
2327 | * |
---|
2328 | * Changes the master device of the slave. Pass %NULL to break the |
---|
2329 | * bonding. The caller must hold the RTNL semaphore. On a failure |
---|
2330 | * a negative errno code is returned. On success the reference counts |
---|
2331 | * are adjusted, %RTM_NEWLINK is sent to the routing socket and the |
---|
2332 | * function returns zero. |
---|
2333 | */ |
---|
2334 | int netdev_set_master(struct net_device *slave, struct net_device *master) |
---|
2335 | { |
---|
2336 | struct net_device *old = slave->master; |
---|
2337 | |
---|
2338 | ASSERT_RTNL(); |
---|
2339 | |
---|
2340 | if (master) { |
---|
2341 | if (old) |
---|
2342 | return -EBUSY; |
---|
2343 | dev_hold(master); |
---|
2344 | } |
---|
2345 | |
---|
2346 | slave->master = master; |
---|
2347 | |
---|
2348 | synchronize_net(); |
---|
2349 | |
---|
2350 | if (old) |
---|
2351 | dev_put(old); |
---|
2352 | |
---|
2353 | if (master) |
---|
2354 | slave->flags |= IFF_SLAVE; |
---|
2355 | else |
---|
2356 | slave->flags &= ~IFF_SLAVE; |
---|
2357 | |
---|
2358 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); |
---|
2359 | return 0; |
---|
2360 | } |
---|
2361 | |
---|
2362 | /** |
---|
2363 | * dev_set_promiscuity - update promiscuity count on a device |
---|
2364 | * @dev: device |
---|
2365 | * @inc: modifier |
---|
2366 | * |
---|
2367 | * Add or remove promiscuity from a device. While the count in the device |
---|
2368 | * remains above zero the interface remains promiscuous. Once it hits zero |
---|
2369 | * the device reverts back to normal filtering operation. A negative inc |
---|
2370 | * value is used to drop promiscuity on the device. |
---|
2371 | */ |
---|
2372 | void dev_set_promiscuity(struct net_device *dev, int inc) |
---|
2373 | { |
---|
2374 | unsigned short old_flags = dev->flags; |
---|
2375 | |
---|
2376 | if ((dev->promiscuity += inc) == 0) |
---|
2377 | dev->flags &= ~IFF_PROMISC; |
---|
2378 | else |
---|
2379 | dev->flags |= IFF_PROMISC; |
---|
2380 | if (dev->flags != old_flags) { |
---|
2381 | dev_mc_upload(dev); |
---|
2382 | printk(KERN_INFO "device %s %s promiscuous mode\n", |
---|
2383 | dev->name, (dev->flags & IFF_PROMISC) ? "entered" : |
---|
2384 | "left"); |
---|
2385 | audit_log(current->audit_context, GFP_ATOMIC, |
---|
2386 | AUDIT_ANOM_PROMISCUOUS, |
---|
2387 | "dev=%s prom=%d old_prom=%d auid=%u", |
---|
2388 | dev->name, (dev->flags & IFF_PROMISC), |
---|
2389 | (old_flags & IFF_PROMISC), |
---|
2390 | audit_get_loginuid(current->audit_context)); |
---|
2391 | } |
---|
2392 | } |
---|
2393 | |
---|
2394 | /** |
---|
2395 | * dev_set_allmulti - update allmulti count on a device |
---|
2396 | * @dev: device |
---|
2397 | * @inc: modifier |
---|
2398 | * |
---|
2399 | * Add or remove reception of all multicast frames to a device. While the |
---|
2400 | * count in the device remains above zero the interface remains listening |
---|
2401 | * to all interfaces. Once it hits zero the device reverts back to normal |
---|
2402 | * filtering operation. A negative @inc value is used to drop the counter |
---|
2403 | * when releasing a resource needing all multicasts. |
---|
2404 | */ |
---|
2405 | |
---|
2406 | void dev_set_allmulti(struct net_device *dev, int inc) |
---|
2407 | { |
---|
2408 | unsigned short old_flags = dev->flags; |
---|
2409 | |
---|
2410 | dev->flags |= IFF_ALLMULTI; |
---|
2411 | if ((dev->allmulti += inc) == 0) |
---|
2412 | dev->flags &= ~IFF_ALLMULTI; |
---|
2413 | if (dev->flags ^ old_flags) |
---|
2414 | dev_mc_upload(dev); |
---|
2415 | } |
---|
2416 | |
---|
2417 | unsigned dev_get_flags(const struct net_device *dev) |
---|
2418 | { |
---|
2419 | unsigned flags; |
---|
2420 | |
---|
2421 | flags = (dev->flags & ~(IFF_PROMISC | |
---|
2422 | IFF_ALLMULTI | |
---|
2423 | IFF_RUNNING | |
---|
2424 | IFF_LOWER_UP | |
---|
2425 | IFF_DORMANT)) | |
---|
2426 | (dev->gflags & (IFF_PROMISC | |
---|
2427 | IFF_ALLMULTI)); |
---|
2428 | |
---|
2429 | if (netif_running(dev)) { |
---|
2430 | if (netif_oper_up(dev)) |
---|
2431 | flags |= IFF_RUNNING; |
---|
2432 | if (netif_carrier_ok(dev)) |
---|
2433 | flags |= IFF_LOWER_UP; |
---|
2434 | if (netif_dormant(dev)) |
---|
2435 | flags |= IFF_DORMANT; |
---|
2436 | } |
---|
2437 | |
---|
2438 | return flags; |
---|
2439 | } |
---|
2440 | |
---|
2441 | int dev_change_flags(struct net_device *dev, unsigned flags) |
---|
2442 | { |
---|
2443 | int ret; |
---|
2444 | int old_flags = dev->flags; |
---|
2445 | |
---|
2446 | /* |
---|
2447 | * Set the flags on our device. |
---|
2448 | */ |
---|
2449 | |
---|
2450 | dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | |
---|
2451 | IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | |
---|
2452 | IFF_AUTOMEDIA)) | |
---|
2453 | (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | |
---|
2454 | IFF_ALLMULTI)); |
---|
2455 | |
---|
2456 | /* |
---|
2457 | * Load in the correct multicast list now the flags have changed. |
---|
2458 | */ |
---|
2459 | |
---|
2460 | dev_mc_upload(dev); |
---|
2461 | |
---|
2462 | /* |
---|
2463 | * Have we downed the interface. We handle IFF_UP ourselves |
---|
2464 | * according to user attempts to set it, rather than blindly |
---|
2465 | * setting it. |
---|
2466 | */ |
---|
2467 | |
---|
2468 | ret = 0; |
---|
2469 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ |
---|
2470 | ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); |
---|
2471 | |
---|
2472 | if (!ret) |
---|
2473 | dev_mc_upload(dev); |
---|
2474 | } |
---|
2475 | |
---|
2476 | if (dev->flags & IFF_UP && |
---|
2477 | ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | |
---|
2478 | IFF_VOLATILE))) |
---|
2479 | raw_notifier_call_chain(&netdev_chain, |
---|
2480 | NETDEV_CHANGE, dev); |
---|
2481 | |
---|
2482 | if ((flags ^ dev->gflags) & IFF_PROMISC) { |
---|
2483 | int inc = (flags & IFF_PROMISC) ? +1 : -1; |
---|
2484 | dev->gflags ^= IFF_PROMISC; |
---|
2485 | dev_set_promiscuity(dev, inc); |
---|
2486 | } |
---|
2487 | |
---|
2488 | /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI |
---|
2489 | is important. Some (broken) drivers set IFF_PROMISC, when |
---|
2490 | IFF_ALLMULTI is requested not asking us and not reporting. |
---|
2491 | */ |
---|
2492 | if ((flags ^ dev->gflags) & IFF_ALLMULTI) { |
---|
2493 | int inc = (flags & IFF_ALLMULTI) ? +1 : -1; |
---|
2494 | dev->gflags ^= IFF_ALLMULTI; |
---|
2495 | dev_set_allmulti(dev, inc); |
---|
2496 | } |
---|
2497 | |
---|
2498 | if (old_flags ^ dev->flags) |
---|
2499 | rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); |
---|
2500 | |
---|
2501 | return ret; |
---|
2502 | } |
---|
2503 | |
---|
2504 | int dev_set_mtu(struct net_device *dev, int new_mtu) |
---|
2505 | { |
---|
2506 | int err; |
---|
2507 | |
---|
2508 | if (new_mtu == dev->mtu) |
---|
2509 | return 0; |
---|
2510 | |
---|
2511 | /* MTU must be positive. */ |
---|
2512 | if (new_mtu < 0) |
---|
2513 | return -EINVAL; |
---|
2514 | |
---|
2515 | if (!netif_device_present(dev)) |
---|
2516 | return -ENODEV; |
---|
2517 | |
---|
2518 | err = 0; |
---|
2519 | if (dev->change_mtu) |
---|
2520 | err = dev->change_mtu(dev, new_mtu); |
---|
2521 | else |
---|
2522 | dev->mtu = new_mtu; |
---|
2523 | if (!err && dev->flags & IFF_UP) |
---|
2524 | raw_notifier_call_chain(&netdev_chain, |
---|
2525 | NETDEV_CHANGEMTU, dev); |
---|
2526 | return err; |
---|
2527 | } |
---|
2528 | |
---|
2529 | int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) |
---|
2530 | { |
---|
2531 | int err; |
---|
2532 | |
---|
2533 | if (!dev->set_mac_address) |
---|
2534 | return -EOPNOTSUPP; |
---|
2535 | if (sa->sa_family != dev->type) |
---|
2536 | return -EINVAL; |
---|
2537 | if (!netif_device_present(dev)) |
---|
2538 | return -ENODEV; |
---|
2539 | err = dev->set_mac_address(dev, sa); |
---|
2540 | if (!err) |
---|
2541 | raw_notifier_call_chain(&netdev_chain, |
---|
2542 | NETDEV_CHANGEADDR, dev); |
---|
2543 | return err; |
---|
2544 | } |
---|
2545 | |
---|
2546 | /* |
---|
2547 | * Perform the SIOCxIFxxx calls. |
---|
2548 | */ |
---|
2549 | static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) |
---|
2550 | { |
---|
2551 | int err; |
---|
2552 | struct net_device *dev = __dev_get_by_name(ifr->ifr_name); |
---|
2553 | |
---|
2554 | if (!dev) |
---|
2555 | return -ENODEV; |
---|
2556 | |
---|
2557 | switch (cmd) { |
---|
2558 | case SIOCGIFFLAGS: /* Get interface flags */ |
---|
2559 | ifr->ifr_flags = dev_get_flags(dev); |
---|
2560 | return 0; |
---|
2561 | |
---|
2562 | case SIOCSIFFLAGS: /* Set interface flags */ |
---|
2563 | return dev_change_flags(dev, ifr->ifr_flags); |
---|
2564 | |
---|
2565 | case SIOCGIFMETRIC: /* Get the metric on the interface |
---|
2566 | (currently unused) */ |
---|
2567 | ifr->ifr_metric = 0; |
---|
2568 | return 0; |
---|
2569 | |
---|
2570 | case SIOCSIFMETRIC: /* Set the metric on the interface |
---|
2571 | (currently unused) */ |
---|
2572 | return -EOPNOTSUPP; |
---|
2573 | |
---|
2574 | case SIOCGIFMTU: /* Get the MTU of a device */ |
---|
2575 | ifr->ifr_mtu = dev->mtu; |
---|
2576 | return 0; |
---|
2577 | |
---|
2578 | case SIOCSIFMTU: /* Set the MTU of a device */ |
---|
2579 | return dev_set_mtu(dev, ifr->ifr_mtu); |
---|
2580 | |
---|
2581 | case SIOCGIFHWADDR: |
---|
2582 | if (!dev->addr_len) |
---|
2583 | memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); |
---|
2584 | else |
---|
2585 | memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, |
---|
2586 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
---|
2587 | ifr->ifr_hwaddr.sa_family = dev->type; |
---|
2588 | return 0; |
---|
2589 | |
---|
2590 | case SIOCSIFHWADDR: |
---|
2591 | return dev_set_mac_address(dev, &ifr->ifr_hwaddr); |
---|
2592 | |
---|
2593 | case SIOCSIFHWBROADCAST: |
---|
2594 | if (ifr->ifr_hwaddr.sa_family != dev->type) |
---|
2595 | return -EINVAL; |
---|
2596 | memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, |
---|
2597 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
---|
2598 | raw_notifier_call_chain(&netdev_chain, |
---|
2599 | NETDEV_CHANGEADDR, dev); |
---|
2600 | return 0; |
---|
2601 | |
---|
2602 | case SIOCGIFMAP: |
---|
2603 | ifr->ifr_map.mem_start = dev->mem_start; |
---|
2604 | ifr->ifr_map.mem_end = dev->mem_end; |
---|
2605 | ifr->ifr_map.base_addr = dev->base_addr; |
---|
2606 | ifr->ifr_map.irq = dev->irq; |
---|
2607 | ifr->ifr_map.dma = dev->dma; |
---|
2608 | ifr->ifr_map.port = dev->if_port; |
---|
2609 | return 0; |
---|
2610 | |
---|
2611 | case SIOCSIFMAP: |
---|
2612 | if (dev->set_config) { |
---|
2613 | if (!netif_device_present(dev)) |
---|
2614 | return -ENODEV; |
---|
2615 | return dev->set_config(dev, &ifr->ifr_map); |
---|
2616 | } |
---|
2617 | return -EOPNOTSUPP; |
---|
2618 | |
---|
2619 | case SIOCADDMULTI: |
---|
2620 | if (!dev->set_multicast_list || |
---|
2621 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
---|
2622 | return -EINVAL; |
---|
2623 | if (!netif_device_present(dev)) |
---|
2624 | return -ENODEV; |
---|
2625 | return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, |
---|
2626 | dev->addr_len, 1); |
---|
2627 | |
---|
2628 | case SIOCDELMULTI: |
---|
2629 | if (!dev->set_multicast_list || |
---|
2630 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
---|
2631 | return -EINVAL; |
---|
2632 | if (!netif_device_present(dev)) |
---|
2633 | return -ENODEV; |
---|
2634 | return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, |
---|
2635 | dev->addr_len, 1); |
---|
2636 | |
---|
2637 | case SIOCGIFINDEX: |
---|
2638 | ifr->ifr_ifindex = dev->ifindex; |
---|
2639 | return 0; |
---|
2640 | |
---|
2641 | case SIOCGIFTXQLEN: |
---|
2642 | ifr->ifr_qlen = dev->tx_queue_len; |
---|
2643 | return 0; |
---|
2644 | |
---|
2645 | case SIOCSIFTXQLEN: |
---|
2646 | if (ifr->ifr_qlen < 0) |
---|
2647 | return -EINVAL; |
---|
2648 | dev->tx_queue_len = ifr->ifr_qlen; |
---|
2649 | return 0; |
---|
2650 | |
---|
2651 | case SIOCSIFNAME: |
---|
2652 | ifr->ifr_newname[IFNAMSIZ-1] = '\0'; |
---|
2653 | return dev_change_name(dev, ifr->ifr_newname); |
---|
2654 | |
---|
2655 | /* |
---|
2656 | * Unknown or private ioctl |
---|
2657 | */ |
---|
2658 | |
---|
2659 | default: |
---|
2660 | if ((cmd >= SIOCDEVPRIVATE && |
---|
2661 | cmd <= SIOCDEVPRIVATE + 15) || |
---|
2662 | cmd == SIOCBONDENSLAVE || |
---|
2663 | cmd == SIOCBONDRELEASE || |
---|
2664 | cmd == SIOCBONDSETHWADDR || |
---|
2665 | cmd == SIOCBONDSLAVEINFOQUERY || |
---|
2666 | cmd == SIOCBONDINFOQUERY || |
---|
2667 | cmd == SIOCBONDCHANGEACTIVE || |
---|
2668 | cmd == SIOCGMIIPHY || |
---|
2669 | cmd == SIOCGMIIREG || |
---|
2670 | cmd == SIOCSMIIREG || |
---|
2671 | cmd == SIOCBRADDIF || |
---|
2672 | cmd == SIOCBRDELIF || |
---|
2673 | cmd == SIOCWANDEV) { |
---|
2674 | err = -EOPNOTSUPP; |
---|
2675 | if (dev->do_ioctl) { |
---|
2676 | if (netif_device_present(dev)) |
---|
2677 | err = dev->do_ioctl(dev, ifr, |
---|
2678 | cmd); |
---|
2679 | else |
---|
2680 | err = -ENODEV; |
---|
2681 | } |
---|
2682 | } else |
---|
2683 | err = -EINVAL; |
---|
2684 | |
---|
2685 | } |
---|
2686 | return err; |
---|
2687 | } |
---|
2688 | |
---|
2689 | /* |
---|
2690 | * This function handles all "interface"-type I/O control requests. The actual |
---|
2691 | * 'doing' part of this is dev_ifsioc above. |
---|
2692 | */ |
---|
2693 | |
---|
2694 | /** |
---|
2695 | * dev_ioctl - network device ioctl |
---|
2696 | * @cmd: command to issue |
---|
2697 | * @arg: pointer to a struct ifreq in user space |
---|
2698 | * |
---|
2699 | * Issue ioctl functions to devices. This is normally called by the |
---|
2700 | * user space syscall interfaces but can sometimes be useful for |
---|
2701 | * other purposes. The return value is the return from the syscall if |
---|
2702 | * positive or a negative errno code on error. |
---|
2703 | */ |
---|
2704 | |
---|
2705 | int dev_ioctl(unsigned int cmd, void __user *arg) |
---|
2706 | { |
---|
2707 | struct ifreq ifr; |
---|
2708 | int ret; |
---|
2709 | char *colon; |
---|
2710 | |
---|
2711 | /* One special case: SIOCGIFCONF takes ifconf argument |
---|
2712 | and requires shared lock, because it sleeps writing |
---|
2713 | to user space. |
---|
2714 | */ |
---|
2715 | |
---|
2716 | if (cmd == SIOCGIFCONF) { |
---|
2717 | rtnl_lock(); |
---|
2718 | ret = dev_ifconf((char __user *) arg); |
---|
2719 | rtnl_unlock(); |
---|
2720 | return ret; |
---|
2721 | } |
---|
2722 | if (cmd == SIOCGIFNAME) |
---|
2723 | return dev_ifname((struct ifreq __user *)arg); |
---|
2724 | |
---|
2725 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
---|
2726 | return -EFAULT; |
---|
2727 | |
---|
2728 | ifr.ifr_name[IFNAMSIZ-1] = 0; |
---|
2729 | |
---|
2730 | colon = strchr(ifr.ifr_name, ':'); |
---|
2731 | if (colon) |
---|
2732 | *colon = 0; |
---|
2733 | |
---|
2734 | /* |
---|
2735 | * See which interface the caller is talking about. |
---|
2736 | */ |
---|
2737 | |
---|
2738 | switch (cmd) { |
---|
2739 | /* |
---|
2740 | * These ioctl calls: |
---|
2741 | * - can be done by all. |
---|
2742 | * - atomic and do not require locking. |
---|
2743 | * - return a value |
---|
2744 | */ |
---|
2745 | case SIOCGIFFLAGS: |
---|
2746 | case SIOCGIFMETRIC: |
---|
2747 | case SIOCGIFMTU: |
---|
2748 | case SIOCGIFHWADDR: |
---|
2749 | case SIOCGIFSLAVE: |
---|
2750 | case SIOCGIFMAP: |
---|
2751 | case SIOCGIFINDEX: |
---|
2752 | case SIOCGIFTXQLEN: |
---|
2753 | dev_load(ifr.ifr_name); |
---|
2754 | read_lock(&dev_base_lock); |
---|
2755 | ret = dev_ifsioc(&ifr, cmd); |
---|
2756 | read_unlock(&dev_base_lock); |
---|
2757 | if (!ret) { |
---|
2758 | if (colon) |
---|
2759 | *colon = ':'; |
---|
2760 | if (copy_to_user(arg, &ifr, |
---|
2761 | sizeof(struct ifreq))) |
---|
2762 | ret = -EFAULT; |
---|
2763 | } |
---|
2764 | return ret; |
---|
2765 | |
---|
2766 | case SIOCETHTOOL: |
---|
2767 | dev_load(ifr.ifr_name); |
---|
2768 | rtnl_lock(); |
---|
2769 | ret = dev_ethtool(&ifr); |
---|
2770 | rtnl_unlock(); |
---|
2771 | if (!ret) { |
---|
2772 | if (colon) |
---|
2773 | *colon = ':'; |
---|
2774 | if (copy_to_user(arg, &ifr, |
---|
2775 | sizeof(struct ifreq))) |
---|
2776 | ret = -EFAULT; |
---|
2777 | } |
---|
2778 | return ret; |
---|
2779 | |
---|
2780 | /* |
---|
2781 | * These ioctl calls: |
---|
2782 | * - require superuser power. |
---|
2783 | * - require strict serialization. |
---|
2784 | * - return a value |
---|
2785 | */ |
---|
2786 | case SIOCGMIIPHY: |
---|
2787 | case SIOCGMIIREG: |
---|
2788 | case SIOCSIFNAME: |
---|
2789 | if (!capable(CAP_NET_ADMIN)) |
---|
2790 | return -EPERM; |
---|
2791 | dev_load(ifr.ifr_name); |
---|
2792 | rtnl_lock(); |
---|
2793 | ret = dev_ifsioc(&ifr, cmd); |
---|
2794 | rtnl_unlock(); |
---|
2795 | if (!ret) { |
---|
2796 | if (colon) |
---|
2797 | *colon = ':'; |
---|
2798 | if (copy_to_user(arg, &ifr, |
---|
2799 | sizeof(struct ifreq))) |
---|
2800 | ret = -EFAULT; |
---|
2801 | } |
---|
2802 | return ret; |
---|
2803 | |
---|
2804 | /* |
---|
2805 | * These ioctl calls: |
---|
2806 | * - require superuser power. |
---|
2807 | * - require strict serialization. |
---|
2808 | * - do not return a value |
---|
2809 | */ |
---|
2810 | case SIOCSIFFLAGS: |
---|
2811 | case SIOCSIFMETRIC: |
---|
2812 | case SIOCSIFMTU: |
---|
2813 | case SIOCSIFMAP: |
---|
2814 | case SIOCSIFHWADDR: |
---|
2815 | case SIOCSIFSLAVE: |
---|
2816 | case SIOCADDMULTI: |
---|
2817 | case SIOCDELMULTI: |
---|
2818 | case SIOCSIFHWBROADCAST: |
---|
2819 | case SIOCSIFTXQLEN: |
---|
2820 | case SIOCSMIIREG: |
---|
2821 | case SIOCBONDENSLAVE: |
---|
2822 | case SIOCBONDRELEASE: |
---|
2823 | case SIOCBONDSETHWADDR: |
---|
2824 | case SIOCBONDCHANGEACTIVE: |
---|
2825 | case SIOCBRADDIF: |
---|
2826 | case SIOCBRDELIF: |
---|
2827 | if (!capable(CAP_NET_ADMIN)) |
---|
2828 | return -EPERM; |
---|
2829 | /* fall through */ |
---|
2830 | case SIOCBONDSLAVEINFOQUERY: |
---|
2831 | case SIOCBONDINFOQUERY: |
---|
2832 | dev_load(ifr.ifr_name); |
---|
2833 | rtnl_lock(); |
---|
2834 | ret = dev_ifsioc(&ifr, cmd); |
---|
2835 | rtnl_unlock(); |
---|
2836 | return ret; |
---|
2837 | |
---|
2838 | case SIOCGIFMEM: |
---|
2839 | /* Get the per device memory space. We can add this but |
---|
2840 | * currently do not support it */ |
---|
2841 | case SIOCSIFMEM: |
---|
2842 | /* Set the per device memory buffer space. |
---|
2843 | * Not applicable in our case */ |
---|
2844 | case SIOCSIFLINK: |
---|
2845 | return -EINVAL; |
---|
2846 | |
---|
2847 | /* |
---|
2848 | * Unknown or private ioctl. |
---|
2849 | */ |
---|
2850 | default: |
---|
2851 | if (cmd == SIOCWANDEV || |
---|
2852 | (cmd >= SIOCDEVPRIVATE && |
---|
2853 | cmd <= SIOCDEVPRIVATE + 15)) { |
---|
2854 | dev_load(ifr.ifr_name); |
---|
2855 | rtnl_lock(); |
---|
2856 | ret = dev_ifsioc(&ifr, cmd); |
---|
2857 | rtnl_unlock(); |
---|
2858 | if (!ret && copy_to_user(arg, &ifr, |
---|
2859 | sizeof(struct ifreq))) |
---|
2860 | ret = -EFAULT; |
---|
2861 | return ret; |
---|
2862 | } |
---|
2863 | #ifdef CONFIG_WIRELESS_EXT |
---|
2864 | /* Take care of Wireless Extensions */ |
---|
2865 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { |
---|
2866 | /* If command is `set a parameter', or |
---|
2867 | * `get the encoding parameters', check if |
---|
2868 | * the user has the right to do it */ |
---|
2869 | if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE |
---|
2870 | || cmd == SIOCGIWENCODEEXT) { |
---|
2871 | if (!capable(CAP_NET_ADMIN)) |
---|
2872 | return -EPERM; |
---|
2873 | } |
---|
2874 | dev_load(ifr.ifr_name); |
---|
2875 | rtnl_lock(); |
---|
2876 | /* Follow me in net/core/wireless.c */ |
---|
2877 | ret = wireless_process_ioctl(&ifr, cmd); |
---|
2878 | rtnl_unlock(); |
---|
2879 | if (IW_IS_GET(cmd) && |
---|
2880 | copy_to_user(arg, &ifr, |
---|
2881 | sizeof(struct ifreq))) |
---|
2882 | ret = -EFAULT; |
---|
2883 | return ret; |
---|
2884 | } |
---|
2885 | #endif /* CONFIG_WIRELESS_EXT */ |
---|
2886 | return -EINVAL; |
---|
2887 | } |
---|
2888 | } |
---|
2889 | |
---|
2890 | |
---|
2891 | /** |
---|
2892 | * dev_new_index - allocate an ifindex |
---|
2893 | * |
---|
2894 | * Returns a suitable unique value for a new device interface |
---|
2895 | * number. The caller must hold the rtnl semaphore or the |
---|
2896 | * dev_base_lock to be sure it remains unique. |
---|
2897 | */ |
---|
2898 | static int dev_new_index(void) |
---|
2899 | { |
---|
2900 | static int ifindex; |
---|
2901 | for (;;) { |
---|
2902 | if (++ifindex <= 0) |
---|
2903 | ifindex = 1; |
---|
2904 | if (!__dev_get_by_index(ifindex)) |
---|
2905 | return ifindex; |
---|
2906 | } |
---|
2907 | } |
---|
2908 | |
---|
2909 | static int dev_boot_phase = 1; |
---|
2910 | |
---|
2911 | /* Delayed registration/unregisteration */ |
---|
2912 | static DEFINE_SPINLOCK(net_todo_list_lock); |
---|
2913 | static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); |
---|
2914 | |
---|
2915 | static inline void net_set_todo(struct net_device *dev) |
---|
2916 | { |
---|
2917 | spin_lock(&net_todo_list_lock); |
---|
2918 | list_add_tail(&dev->todo_list, &net_todo_list); |
---|
2919 | spin_unlock(&net_todo_list_lock); |
---|
2920 | } |
---|
2921 | |
---|
2922 | /** |
---|
2923 | * register_netdevice - register a network device |
---|
2924 | * @dev: device to register |
---|
2925 | * |
---|
2926 | * Take a completed network device structure and add it to the kernel |
---|
2927 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
---|
2928 | * chain. 0 is returned on success. A negative errno code is returned |
---|
2929 | * on a failure to set up the device, or if the name is a duplicate. |
---|
2930 | * |
---|
2931 | * Callers must hold the rtnl semaphore. You may want |
---|
2932 | * register_netdev() instead of this. |
---|
2933 | * |
---|
2934 | * BUGS: |
---|
2935 | * The locking appears insufficient to guarantee two parallel registers |
---|
2936 | * will not get the same name. |
---|
2937 | */ |
---|
2938 | |
---|
2939 | int register_netdevice(struct net_device *dev) |
---|
2940 | { |
---|
2941 | struct hlist_head *head; |
---|
2942 | struct hlist_node *p; |
---|
2943 | int ret; |
---|
2944 | |
---|
2945 | BUG_ON(dev_boot_phase); |
---|
2946 | ASSERT_RTNL(); |
---|
2947 | |
---|
2948 | might_sleep(); |
---|
2949 | |
---|
2950 | /* When net_device's are persistent, this will be fatal. */ |
---|
2951 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
---|
2952 | |
---|
2953 | spin_lock_init(&dev->queue_lock); |
---|
2954 | spin_lock_init(&dev->_xmit_lock); |
---|
2955 | dev->xmit_lock_owner = -1; |
---|
2956 | #ifdef CONFIG_NET_CLS_ACT |
---|
2957 | spin_lock_init(&dev->ingress_lock); |
---|
2958 | #endif |
---|
2959 | |
---|
2960 | ret = alloc_divert_blk(dev); |
---|
2961 | if (ret) |
---|
2962 | goto out; |
---|
2963 | |
---|
2964 | dev->iflink = -1; |
---|
2965 | |
---|
2966 | /* Init, if this function is available */ |
---|
2967 | if (dev->init) { |
---|
2968 | ret = dev->init(dev); |
---|
2969 | if (ret) { |
---|
2970 | if (ret > 0) |
---|
2971 | ret = -EIO; |
---|
2972 | goto out_err; |
---|
2973 | } |
---|
2974 | } |
---|
2975 | |
---|
2976 | if (!dev_valid_name(dev->name)) { |
---|
2977 | ret = -EINVAL; |
---|
2978 | goto out_err; |
---|
2979 | } |
---|
2980 | |
---|
2981 | dev->ifindex = dev_new_index(); |
---|
2982 | if (dev->iflink == -1) |
---|
2983 | dev->iflink = dev->ifindex; |
---|
2984 | |
---|
2985 | /* Check for existence of name */ |
---|
2986 | head = dev_name_hash(dev->name); |
---|
2987 | hlist_for_each(p, head) { |
---|
2988 | struct net_device *d |
---|
2989 | = hlist_entry(p, struct net_device, name_hlist); |
---|
2990 | if (!strncmp(d->name, dev->name, IFNAMSIZ)) { |
---|
2991 | ret = -EEXIST; |
---|
2992 | goto out_err; |
---|
2993 | } |
---|
2994 | } |
---|
2995 | |
---|
2996 | /* Fix illegal SG+CSUM combinations. */ |
---|
2997 | if ((dev->features & NETIF_F_SG) && |
---|
2998 | !(dev->features & NETIF_F_ALL_CSUM)) { |
---|
2999 | printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", |
---|
3000 | dev->name); |
---|
3001 | dev->features &= ~NETIF_F_SG; |
---|
3002 | } |
---|
3003 | |
---|
3004 | /* TSO requires that SG is present as well. */ |
---|
3005 | if ((dev->features & NETIF_F_TSO) && |
---|
3006 | !(dev->features & NETIF_F_SG)) { |
---|
3007 | printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", |
---|
3008 | dev->name); |
---|
3009 | dev->features &= ~NETIF_F_TSO; |
---|
3010 | } |
---|
3011 | if (dev->features & NETIF_F_UFO) { |
---|
3012 | if (!(dev->features & NETIF_F_HW_CSUM)) { |
---|
3013 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " |
---|
3014 | "NETIF_F_HW_CSUM feature.\n", |
---|
3015 | dev->name); |
---|
3016 | dev->features &= ~NETIF_F_UFO; |
---|
3017 | } |
---|
3018 | if (!(dev->features & NETIF_F_SG)) { |
---|
3019 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " |
---|
3020 | "NETIF_F_SG feature.\n", |
---|
3021 | dev->name); |
---|
3022 | dev->features &= ~NETIF_F_UFO; |
---|
3023 | } |
---|
3024 | } |
---|
3025 | |
---|
3026 | /* |
---|
3027 | * nil rebuild_header routine, |
---|
3028 | * that should be never called and used as just bug trap. |
---|
3029 | */ |
---|
3030 | |
---|
3031 | if (!dev->rebuild_header) |
---|
3032 | dev->rebuild_header = default_rebuild_header; |
---|
3033 | |
---|
3034 | ret = netdev_register_sysfs(dev); |
---|
3035 | if (ret) |
---|
3036 | goto out_err; |
---|
3037 | dev->reg_state = NETREG_REGISTERED; |
---|
3038 | |
---|
3039 | /* |
---|
3040 | * Default initial state at registry is that the |
---|
3041 | * device is present. |
---|
3042 | */ |
---|
3043 | |
---|
3044 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
---|
3045 | |
---|
3046 | dev->next = NULL; |
---|
3047 | dev_init_scheduler(dev); |
---|
3048 | write_lock_bh(&dev_base_lock); |
---|
3049 | *dev_tail = dev; |
---|
3050 | dev_tail = &dev->next; |
---|
3051 | hlist_add_head(&dev->name_hlist, head); |
---|
3052 | hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); |
---|
3053 | dev_hold(dev); |
---|
3054 | write_unlock_bh(&dev_base_lock); |
---|
3055 | |
---|
3056 | /* Notify protocols, that a new device appeared. */ |
---|
3057 | raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); |
---|
3058 | |
---|
3059 | ret = 0; |
---|
3060 | |
---|
3061 | out: |
---|
3062 | return ret; |
---|
3063 | out_err: |
---|
3064 | free_divert_blk(dev); |
---|
3065 | goto out; |
---|
3066 | } |
---|
3067 | |
---|
3068 | /** |
---|
3069 | * register_netdev - register a network device |
---|
3070 | * @dev: device to register |
---|
3071 | * |
---|
3072 | * Take a completed network device structure and add it to the kernel |
---|
3073 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
---|
3074 | * chain. 0 is returned on success. A negative errno code is returned |
---|
3075 | * on a failure to set up the device, or if the name is a duplicate. |
---|
3076 | * |
---|
3077 | * This is a wrapper around register_netdev that takes the rtnl semaphore |
---|
3078 | * and expands the device name if you passed a format string to |
---|
3079 | * alloc_netdev. |
---|
3080 | */ |
---|
3081 | int register_netdev(struct net_device *dev) |
---|
3082 | { |
---|
3083 | int err; |
---|
3084 | |
---|
3085 | rtnl_lock(); |
---|
3086 | |
---|
3087 | /* |
---|
3088 | * If the name is a format string the caller wants us to do a |
---|
3089 | * name allocation. |
---|
3090 | */ |
---|
3091 | if (strchr(dev->name, '%')) { |
---|
3092 | err = dev_alloc_name(dev, dev->name); |
---|
3093 | if (err < 0) |
---|
3094 | goto out; |
---|
3095 | } |
---|
3096 | |
---|
3097 | /* |
---|
3098 | * Back compatibility hook. Kill this one in 2.5 |
---|
3099 | */ |
---|
3100 | if (dev->name[0] == 0 || dev->name[0] == ' ') { |
---|
3101 | err = dev_alloc_name(dev, "eth%d"); |
---|
3102 | if (err < 0) |
---|
3103 | goto out; |
---|
3104 | } |
---|
3105 | |
---|
3106 | err = register_netdevice(dev); |
---|
3107 | out: |
---|
3108 | rtnl_unlock(); |
---|
3109 | return err; |
---|
3110 | } |
---|
3111 | EXPORT_SYMBOL(register_netdev); |
---|
3112 | |
---|
3113 | /* |
---|
3114 | * netdev_wait_allrefs - wait until all references are gone. |
---|
3115 | * |
---|
3116 | * This is called when unregistering network devices. |
---|
3117 | * |
---|
3118 | * Any protocol or device that holds a reference should register |
---|
3119 | * for netdevice notification, and cleanup and put back the |
---|
3120 | * reference if they receive an UNREGISTER event. |
---|
3121 | * We can get stuck here if buggy protocols don't correctly |
---|
3122 | * call dev_put. |
---|
3123 | */ |
---|
3124 | static void netdev_wait_allrefs(struct net_device *dev) |
---|
3125 | { |
---|
3126 | unsigned long rebroadcast_time, warning_time; |
---|
3127 | |
---|
3128 | rebroadcast_time = warning_time = jiffies; |
---|
3129 | while (atomic_read(&dev->refcnt) != 0) { |
---|
3130 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
---|
3131 | rtnl_lock(); |
---|
3132 | |
---|
3133 | /* Rebroadcast unregister notification */ |
---|
3134 | raw_notifier_call_chain(&netdev_chain, |
---|
3135 | NETDEV_UNREGISTER, dev); |
---|
3136 | |
---|
3137 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, |
---|
3138 | &dev->state)) { |
---|
3139 | /* We must not have linkwatch events |
---|
3140 | * pending on unregister. If this |
---|
3141 | * happens, we simply run the queue |
---|
3142 | * unscheduled, resulting in a noop |
---|
3143 | * for this device. |
---|
3144 | */ |
---|
3145 | linkwatch_run_queue(); |
---|
3146 | } |
---|
3147 | |
---|
3148 | __rtnl_unlock(); |
---|
3149 | |
---|
3150 | rebroadcast_time = jiffies; |
---|
3151 | } |
---|
3152 | |
---|
3153 | msleep(250); |
---|
3154 | |
---|
3155 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
---|
3156 | printk(KERN_EMERG "unregister_netdevice: " |
---|
3157 | "waiting for %s to become free. Usage " |
---|
3158 | "count = %d\n", |
---|
3159 | dev->name, atomic_read(&dev->refcnt)); |
---|
3160 | warning_time = jiffies; |
---|
3161 | } |
---|
3162 | } |
---|
3163 | } |
---|
3164 | |
---|
3165 | /* The sequence is: |
---|
3166 | * |
---|
3167 | * rtnl_lock(); |
---|
3168 | * ... |
---|
3169 | * register_netdevice(x1); |
---|
3170 | * register_netdevice(x2); |
---|
3171 | * ... |
---|
3172 | * unregister_netdevice(y1); |
---|
3173 | * unregister_netdevice(y2); |
---|
3174 | * ... |
---|
3175 | * rtnl_unlock(); |
---|
3176 | * free_netdev(y1); |
---|
3177 | * free_netdev(y2); |
---|
3178 | * |
---|
3179 | * We are invoked by rtnl_unlock() after it drops the semaphore. |
---|
3180 | * This allows us to deal with problems: |
---|
3181 | * 1) We can delete sysfs objects which invoke hotplug |
---|
3182 | * without deadlocking with linkwatch via keventd. |
---|
3183 | * 2) Since we run with the RTNL semaphore not held, we can sleep |
---|
3184 | * safely in order to wait for the netdev refcnt to drop to zero. |
---|
3185 | */ |
---|
3186 | static DEFINE_MUTEX(net_todo_run_mutex); |
---|
3187 | void netdev_run_todo(void) |
---|
3188 | { |
---|
3189 | struct list_head list; |
---|
3190 | |
---|
3191 | /* Need to guard against multiple cpu's getting out of order. */ |
---|
3192 | mutex_lock(&net_todo_run_mutex); |
---|
3193 | |
---|
3194 | /* Not safe to do outside the semaphore. We must not return |
---|
3195 | * until all unregister events invoked by the local processor |
---|
3196 | * have been completed (either by this todo run, or one on |
---|
3197 | * another cpu). |
---|
3198 | */ |
---|
3199 | if (list_empty(&net_todo_list)) |
---|
3200 | goto out; |
---|
3201 | |
---|
3202 | /* Snapshot list, allow later requests */ |
---|
3203 | spin_lock(&net_todo_list_lock); |
---|
3204 | list_replace_init(&net_todo_list, &list); |
---|
3205 | spin_unlock(&net_todo_list_lock); |
---|
3206 | |
---|
3207 | while (!list_empty(&list)) { |
---|
3208 | struct net_device *dev |
---|
3209 | = list_entry(list.next, struct net_device, todo_list); |
---|
3210 | list_del(&dev->todo_list); |
---|
3211 | |
---|
3212 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { |
---|
3213 | printk(KERN_ERR "network todo '%s' but state %d\n", |
---|
3214 | dev->name, dev->reg_state); |
---|
3215 | dump_stack(); |
---|
3216 | continue; |
---|
3217 | } |
---|
3218 | |
---|
3219 | netdev_unregister_sysfs(dev); |
---|
3220 | dev->reg_state = NETREG_UNREGISTERED; |
---|
3221 | |
---|
3222 | netdev_wait_allrefs(dev); |
---|
3223 | |
---|
3224 | /* paranoia */ |
---|
3225 | BUG_ON(atomic_read(&dev->refcnt)); |
---|
3226 | BUG_TRAP(!dev->ip_ptr); |
---|
3227 | BUG_TRAP(!dev->ip6_ptr); |
---|
3228 | BUG_TRAP(!dev->dn_ptr); |
---|
3229 | |
---|
3230 | /* It must be the very last action, |
---|
3231 | * after this 'dev' may point to freed up memory. |
---|
3232 | */ |
---|
3233 | if (dev->destructor) |
---|
3234 | dev->destructor(dev); |
---|
3235 | } |
---|
3236 | |
---|
3237 | out: |
---|
3238 | mutex_unlock(&net_todo_run_mutex); |
---|
3239 | } |
---|
3240 | |
---|
3241 | /** |
---|
3242 | * alloc_netdev - allocate network device |
---|
3243 | * @sizeof_priv: size of private data to allocate space for |
---|
3244 | * @name: device name format string |
---|
3245 | * @setup: callback to initialize device |
---|
3246 | * |
---|
3247 | * Allocates a struct net_device with private data area for driver use |
---|
3248 | * and performs basic initialization. |
---|
3249 | */ |
---|
3250 | struct net_device *alloc_netdev(int sizeof_priv, const char *name, |
---|
3251 | void (*setup)(struct net_device *)) |
---|
3252 | { |
---|
3253 | void *p; |
---|
3254 | struct net_device *dev; |
---|
3255 | int alloc_size; |
---|
3256 | |
---|
3257 | /* ensure 32-byte alignment of both the device and private area */ |
---|
3258 | alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; |
---|
3259 | alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; |
---|
3260 | |
---|
3261 | p = kzalloc(alloc_size, GFP_KERNEL); |
---|
3262 | if (!p) { |
---|
3263 | printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); |
---|
3264 | return NULL; |
---|
3265 | } |
---|
3266 | |
---|
3267 | dev = (struct net_device *) |
---|
3268 | (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); |
---|
3269 | dev->padded = (char *)dev - (char *)p; |
---|
3270 | |
---|
3271 | if (sizeof_priv) |
---|
3272 | dev->priv = netdev_priv(dev); |
---|
3273 | |
---|
3274 | setup(dev); |
---|
3275 | strcpy(dev->name, name); |
---|
3276 | return dev; |
---|
3277 | } |
---|
3278 | EXPORT_SYMBOL(alloc_netdev); |
---|
3279 | |
---|
3280 | /** |
---|
3281 | * free_netdev - free network device |
---|
3282 | * @dev: device |
---|
3283 | * |
---|
3284 | * This function does the last stage of destroying an allocated device |
---|
3285 | * interface. The reference to the device object is released. |
---|
3286 | * If this is the last reference then it will be freed. |
---|
3287 | */ |
---|
3288 | void free_netdev(struct net_device *dev) |
---|
3289 | { |
---|
3290 | #ifdef CONFIG_SYSFS |
---|
3291 | /* Compatibility with error handling in drivers */ |
---|
3292 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
---|
3293 | kfree((char *)dev - dev->padded); |
---|
3294 | return; |
---|
3295 | } |
---|
3296 | |
---|
3297 | BUG_ON(dev->reg_state != NETREG_UNREGISTERED); |
---|
3298 | dev->reg_state = NETREG_RELEASED; |
---|
3299 | |
---|
3300 | /* will free via class release */ |
---|
3301 | class_device_put(&dev->class_dev); |
---|
3302 | #else |
---|
3303 | kfree((char *)dev - dev->padded); |
---|
3304 | #endif |
---|
3305 | } |
---|
3306 | |
---|
3307 | /* Synchronize with packet receive processing. */ |
---|
3308 | void synchronize_net(void) |
---|
3309 | { |
---|
3310 | might_sleep(); |
---|
3311 | synchronize_rcu(); |
---|
3312 | } |
---|
3313 | |
---|
3314 | /** |
---|
3315 | * unregister_netdevice - remove device from the kernel |
---|
3316 | * @dev: device |
---|
3317 | * |
---|
3318 | * This function shuts down a device interface and removes it |
---|
3319 | * from the kernel tables. On success 0 is returned, on a failure |
---|
3320 | * a negative errno code is returned. |
---|
3321 | * |
---|
3322 | * Callers must hold the rtnl semaphore. You may want |
---|
3323 | * unregister_netdev() instead of this. |
---|
3324 | */ |
---|
3325 | |
---|
3326 | int unregister_netdevice(struct net_device *dev) |
---|
3327 | { |
---|
3328 | struct net_device *d, **dp; |
---|
3329 | |
---|
3330 | BUG_ON(dev_boot_phase); |
---|
3331 | ASSERT_RTNL(); |
---|
3332 | |
---|
3333 | /* Some devices call without registering for initialization unwind. */ |
---|
3334 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
---|
3335 | printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " |
---|
3336 | "was registered\n", dev->name, dev); |
---|
3337 | return -ENODEV; |
---|
3338 | } |
---|
3339 | |
---|
3340 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
---|
3341 | |
---|
3342 | /* If device is running, close it first. */ |
---|
3343 | if (dev->flags & IFF_UP) |
---|
3344 | dev_close(dev); |
---|
3345 | |
---|
3346 | /* And unlink it from device chain. */ |
---|
3347 | for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { |
---|
3348 | if (d == dev) { |
---|
3349 | write_lock_bh(&dev_base_lock); |
---|
3350 | hlist_del(&dev->name_hlist); |
---|
3351 | hlist_del(&dev->index_hlist); |
---|
3352 | if (dev_tail == &dev->next) |
---|
3353 | dev_tail = dp; |
---|
3354 | *dp = d->next; |
---|
3355 | write_unlock_bh(&dev_base_lock); |
---|
3356 | break; |
---|
3357 | } |
---|
3358 | } |
---|
3359 | if (!d) { |
---|
3360 | printk(KERN_ERR "unregister net_device: '%s' not found\n", |
---|
3361 | dev->name); |
---|
3362 | return -ENODEV; |
---|
3363 | } |
---|
3364 | |
---|
3365 | dev->reg_state = NETREG_UNREGISTERING; |
---|
3366 | |
---|
3367 | synchronize_net(); |
---|
3368 | |
---|
3369 | /* Shutdown queueing discipline. */ |
---|
3370 | dev_shutdown(dev); |
---|
3371 | |
---|
3372 | |
---|
3373 | /* Notify protocols, that we are about to destroy |
---|
3374 | this device. They should clean all the things. |
---|
3375 | */ |
---|
3376 | raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); |
---|
3377 | |
---|
3378 | /* |
---|
3379 | * Flush the multicast chain |
---|
3380 | */ |
---|
3381 | dev_mc_discard(dev); |
---|
3382 | |
---|
3383 | if (dev->uninit) |
---|
3384 | dev->uninit(dev); |
---|
3385 | |
---|
3386 | /* Notifier chain MUST detach us from master device. */ |
---|
3387 | BUG_TRAP(!dev->master); |
---|
3388 | |
---|
3389 | free_divert_blk(dev); |
---|
3390 | |
---|
3391 | /* Finish processing unregister after unlock */ |
---|
3392 | net_set_todo(dev); |
---|
3393 | |
---|
3394 | synchronize_net(); |
---|
3395 | |
---|
3396 | dev_put(dev); |
---|
3397 | return 0; |
---|
3398 | } |
---|
3399 | |
---|
3400 | /** |
---|
3401 | * unregister_netdev - remove device from the kernel |
---|
3402 | * @dev: device |
---|
3403 | * |
---|
3404 | * This function shuts down a device interface and removes it |
---|
3405 | * from the kernel tables. On success 0 is returned, on a failure |
---|
3406 | * a negative errno code is returned. |
---|
3407 | * |
---|
3408 | * This is just a wrapper for unregister_netdevice that takes |
---|
3409 | * the rtnl semaphore. In general you want to use this and not |
---|
3410 | * unregister_netdevice. |
---|
3411 | */ |
---|
3412 | void unregister_netdev(struct net_device *dev) |
---|
3413 | { |
---|
3414 | rtnl_lock(); |
---|
3415 | unregister_netdevice(dev); |
---|
3416 | rtnl_unlock(); |
---|
3417 | } |
---|
3418 | |
---|
3419 | EXPORT_SYMBOL(unregister_netdev); |
---|
3420 | |
---|
3421 | #ifdef CONFIG_HOTPLUG_CPU |
---|
3422 | static int dev_cpu_callback(struct notifier_block *nfb, |
---|
3423 | unsigned long action, |
---|
3424 | void *ocpu) |
---|
3425 | { |
---|
3426 | struct sk_buff **list_skb; |
---|
3427 | struct net_device **list_net; |
---|
3428 | struct sk_buff *skb; |
---|
3429 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
---|
3430 | struct softnet_data *sd, *oldsd; |
---|
3431 | |
---|
3432 | if (action != CPU_DEAD) |
---|
3433 | return NOTIFY_OK; |
---|
3434 | |
---|
3435 | local_irq_disable(); |
---|
3436 | cpu = smp_processor_id(); |
---|
3437 | sd = &per_cpu(softnet_data, cpu); |
---|
3438 | oldsd = &per_cpu(softnet_data, oldcpu); |
---|
3439 | |
---|
3440 | /* Find end of our completion_queue. */ |
---|
3441 | list_skb = &sd->completion_queue; |
---|
3442 | while (*list_skb) |
---|
3443 | list_skb = &(*list_skb)->next; |
---|
3444 | /* Append completion queue from offline CPU. */ |
---|
3445 | *list_skb = oldsd->completion_queue; |
---|
3446 | oldsd->completion_queue = NULL; |
---|
3447 | |
---|
3448 | /* Find end of our output_queue. */ |
---|
3449 | list_net = &sd->output_queue; |
---|
3450 | while (*list_net) |
---|
3451 | list_net = &(*list_net)->next_sched; |
---|
3452 | /* Append output queue from offline CPU. */ |
---|
3453 | *list_net = oldsd->output_queue; |
---|
3454 | oldsd->output_queue = NULL; |
---|
3455 | |
---|
3456 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
---|
3457 | local_irq_enable(); |
---|
3458 | |
---|
3459 | /* Process offline CPU's input_pkt_queue */ |
---|
3460 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) |
---|
3461 | netif_rx(skb); |
---|
3462 | |
---|
3463 | return NOTIFY_OK; |
---|
3464 | } |
---|
3465 | #endif /* CONFIG_HOTPLUG_CPU */ |
---|
3466 | |
---|
3467 | #ifdef CONFIG_NET_DMA |
---|
3468 | /** |
---|
3469 | * net_dma_rebalance - |
---|
3470 | * This is called when the number of channels allocated to the net_dma_client |
---|
3471 | * changes. The net_dma_client tries to have one DMA channel per CPU. |
---|
3472 | */ |
---|
3473 | static void net_dma_rebalance(void) |
---|
3474 | { |
---|
3475 | unsigned int cpu, i, n; |
---|
3476 | struct dma_chan *chan; |
---|
3477 | |
---|
3478 | if (net_dma_count == 0) { |
---|
3479 | for_each_online_cpu(cpu) |
---|
3480 | rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); |
---|
3481 | return; |
---|
3482 | } |
---|
3483 | |
---|
3484 | i = 0; |
---|
3485 | cpu = first_cpu(cpu_online_map); |
---|
3486 | |
---|
3487 | rcu_read_lock(); |
---|
3488 | list_for_each_entry(chan, &net_dma_client->channels, client_node) { |
---|
3489 | n = ((num_online_cpus() / net_dma_count) |
---|
3490 | + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); |
---|
3491 | |
---|
3492 | while(n) { |
---|
3493 | per_cpu(softnet_data, cpu).net_dma = chan; |
---|
3494 | cpu = next_cpu(cpu, cpu_online_map); |
---|
3495 | n--; |
---|
3496 | } |
---|
3497 | i++; |
---|
3498 | } |
---|
3499 | rcu_read_unlock(); |
---|
3500 | } |
---|
3501 | |
---|
3502 | /** |
---|
3503 | * netdev_dma_event - event callback for the net_dma_client |
---|
3504 | * @client: should always be net_dma_client |
---|
3505 | * @chan: DMA channel for the event |
---|
3506 | * @event: event type |
---|
3507 | */ |
---|
3508 | static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, |
---|
3509 | enum dma_event event) |
---|
3510 | { |
---|
3511 | spin_lock(&net_dma_event_lock); |
---|
3512 | switch (event) { |
---|
3513 | case DMA_RESOURCE_ADDED: |
---|
3514 | net_dma_count++; |
---|
3515 | net_dma_rebalance(); |
---|
3516 | break; |
---|
3517 | case DMA_RESOURCE_REMOVED: |
---|
3518 | net_dma_count--; |
---|
3519 | net_dma_rebalance(); |
---|
3520 | break; |
---|
3521 | default: |
---|
3522 | break; |
---|
3523 | } |
---|
3524 | spin_unlock(&net_dma_event_lock); |
---|
3525 | } |
---|
3526 | |
---|
3527 | /** |
---|
3528 | * netdev_dma_regiser - register the networking subsystem as a DMA client |
---|
3529 | */ |
---|
3530 | static int __init netdev_dma_register(void) |
---|
3531 | { |
---|
3532 | spin_lock_init(&net_dma_event_lock); |
---|
3533 | net_dma_client = dma_async_client_register(netdev_dma_event); |
---|
3534 | if (net_dma_client == NULL) |
---|
3535 | return -ENOMEM; |
---|
3536 | |
---|
3537 | dma_async_client_chan_request(net_dma_client, num_online_cpus()); |
---|
3538 | return 0; |
---|
3539 | } |
---|
3540 | |
---|
3541 | #else |
---|
3542 | static int __init netdev_dma_register(void) { return -ENODEV; } |
---|
3543 | #endif /* CONFIG_NET_DMA */ |
---|
3544 | |
---|
3545 | /* |
---|
3546 | * Initialize the DEV module. At boot time this walks the device list and |
---|
3547 | * unhooks any devices that fail to initialise (normally hardware not |
---|
3548 | * present) and leaves us with a valid list of present and active devices. |
---|
3549 | * |
---|
3550 | */ |
---|
3551 | |
---|
3552 | /* |
---|
3553 | * This is called single threaded during boot, so no need |
---|
3554 | * to take the rtnl semaphore. |
---|
3555 | */ |
---|
3556 | static int __init net_dev_init(void) |
---|
3557 | { |
---|
3558 | int i, rc = -ENOMEM; |
---|
3559 | |
---|
3560 | BUG_ON(!dev_boot_phase); |
---|
3561 | |
---|
3562 | net_random_init(); |
---|
3563 | |
---|
3564 | if (dev_proc_init()) |
---|
3565 | goto out; |
---|
3566 | |
---|
3567 | if (netdev_sysfs_init()) |
---|
3568 | goto out; |
---|
3569 | |
---|
3570 | INIT_LIST_HEAD(&ptype_all); |
---|
3571 | for (i = 0; i < 16; i++) |
---|
3572 | INIT_LIST_HEAD(&ptype_base[i]); |
---|
3573 | |
---|
3574 | for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) |
---|
3575 | INIT_HLIST_HEAD(&dev_name_head[i]); |
---|
3576 | |
---|
3577 | for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) |
---|
3578 | INIT_HLIST_HEAD(&dev_index_head[i]); |
---|
3579 | |
---|
3580 | /* |
---|
3581 | * Initialise the packet receive queues. |
---|
3582 | */ |
---|
3583 | |
---|
3584 | for_each_possible_cpu(i) { |
---|
3585 | struct softnet_data *queue; |
---|
3586 | |
---|
3587 | queue = &per_cpu(softnet_data, i); |
---|
3588 | skb_queue_head_init(&queue->input_pkt_queue); |
---|
3589 | queue->completion_queue = NULL; |
---|
3590 | INIT_LIST_HEAD(&queue->poll_list); |
---|
3591 | set_bit(__LINK_STATE_START, &queue->backlog_dev.state); |
---|
3592 | queue->backlog_dev.weight = weight_p; |
---|
3593 | queue->backlog_dev.poll = process_backlog; |
---|
3594 | atomic_set(&queue->backlog_dev.refcnt, 1); |
---|
3595 | } |
---|
3596 | |
---|
3597 | netdev_dma_register(); |
---|
3598 | |
---|
3599 | dev_boot_phase = 0; |
---|
3600 | |
---|
3601 | open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); |
---|
3602 | open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); |
---|
3603 | |
---|
3604 | hotcpu_notifier(dev_cpu_callback, 0); |
---|
3605 | dst_init(); |
---|
3606 | dev_mcast_init(); |
---|
3607 | rc = 0; |
---|
3608 | out: |
---|
3609 | return rc; |
---|
3610 | } |
---|
3611 | |
---|
3612 | subsys_initcall(net_dev_init); |
---|
3613 | |
---|
3614 | EXPORT_SYMBOL(__dev_get_by_index); |
---|
3615 | EXPORT_SYMBOL(__dev_get_by_name); |
---|
3616 | EXPORT_SYMBOL(__dev_remove_pack); |
---|
3617 | EXPORT_SYMBOL(dev_valid_name); |
---|
3618 | EXPORT_SYMBOL(dev_add_pack); |
---|
3619 | EXPORT_SYMBOL(dev_alloc_name); |
---|
3620 | EXPORT_SYMBOL(dev_close); |
---|
3621 | EXPORT_SYMBOL(dev_get_by_flags); |
---|
3622 | EXPORT_SYMBOL(dev_get_by_index); |
---|
3623 | EXPORT_SYMBOL(dev_get_by_name); |
---|
3624 | EXPORT_SYMBOL(dev_open); |
---|
3625 | EXPORT_SYMBOL(dev_queue_xmit); |
---|
3626 | EXPORT_SYMBOL(dev_remove_pack); |
---|
3627 | EXPORT_SYMBOL(dev_set_allmulti); |
---|
3628 | EXPORT_SYMBOL(dev_set_promiscuity); |
---|
3629 | EXPORT_SYMBOL(dev_change_flags); |
---|
3630 | EXPORT_SYMBOL(dev_set_mtu); |
---|
3631 | EXPORT_SYMBOL(dev_set_mac_address); |
---|
3632 | EXPORT_SYMBOL(free_netdev); |
---|
3633 | EXPORT_SYMBOL(netdev_boot_setup_check); |
---|
3634 | EXPORT_SYMBOL(netdev_set_master); |
---|
3635 | EXPORT_SYMBOL(netdev_state_change); |
---|
3636 | EXPORT_SYMBOL(netif_receive_skb); |
---|
3637 | EXPORT_SYMBOL(netif_rx); |
---|
3638 | EXPORT_SYMBOL(register_gifconf); |
---|
3639 | EXPORT_SYMBOL(register_netdevice); |
---|
3640 | EXPORT_SYMBOL(register_netdevice_notifier); |
---|
3641 | EXPORT_SYMBOL(skb_checksum_help); |
---|
3642 | EXPORT_SYMBOL(synchronize_net); |
---|
3643 | EXPORT_SYMBOL(unregister_netdevice); |
---|
3644 | EXPORT_SYMBOL(unregister_netdevice_notifier); |
---|
3645 | EXPORT_SYMBOL(net_enable_timestamp); |
---|
3646 | EXPORT_SYMBOL(net_disable_timestamp); |
---|
3647 | EXPORT_SYMBOL(dev_get_flags); |
---|
3648 | EXPORT_SYMBOL(skb_checksum_setup); |
---|
3649 | |
---|
3650 | #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) |
---|
3651 | EXPORT_SYMBOL(br_handle_frame_hook); |
---|
3652 | EXPORT_SYMBOL(br_fdb_get_hook); |
---|
3653 | EXPORT_SYMBOL(br_fdb_put_hook); |
---|
3654 | #endif |
---|
3655 | |
---|
3656 | #ifdef CONFIG_KMOD |
---|
3657 | EXPORT_SYMBOL(dev_load); |
---|
3658 | #endif |
---|
3659 | |
---|
3660 | EXPORT_PER_CPU_SYMBOL(softnet_data); |
---|