Root/drivers/net/iseries_veth.c

1/* File veth.c created by Kyle A. Lucke on Mon Aug 7 2000. */
2/*
3 * IBM eServer iSeries Virtual Ethernet Device Driver
4 * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
5 * Substantially cleaned up by:
6 * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
7 * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation.
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22 * USA
23 *
24 *
25 * This module implements the virtual ethernet device for iSeries LPAR
26 * Linux. It uses hypervisor message passing to implement an
27 * ethernet-like network device communicating between partitions on
28 * the iSeries.
29 *
30 * The iSeries LPAR hypervisor currently allows for up to 16 different
31 * virtual ethernets. These are all dynamically configurable on
32 * OS/400 partitions, but dynamic configuration is not supported under
33 * Linux yet. An ethXX network device will be created for each
34 * virtual ethernet this partition is connected to.
35 *
36 * - This driver is responsible for routing packets to and from other
37 * partitions. The MAC addresses used by the virtual ethernets
38 * contains meaning and must not be modified.
39 *
40 * - Having 2 virtual ethernets to the same remote partition DOES NOT
41 * double the available bandwidth. The 2 devices will share the
42 * available hypervisor bandwidth.
43 *
44 * - If you send a packet to your own mac address, it will just be
45 * dropped, you won't get it on the receive side.
46 *
47 * - Multicast is implemented by sending the frame frame to every
48 * other partition. It is the responsibility of the receiving
49 * partition to filter the addresses desired.
50 *
51 * Tunable parameters:
52 *
53 * VETH_NUMBUFFERS: This compile time option defaults to 120. It
54 * controls how much memory Linux will allocate per remote partition
55 * it is communicating with. It can be thought of as the maximum
56 * number of packets outstanding to a remote partition at a time.
57 */
58
59#include <linux/module.h>
60#include <linux/types.h>
61#include <linux/errno.h>
62#include <linux/ioport.h>
63#include <linux/kernel.h>
64#include <linux/netdevice.h>
65#include <linux/etherdevice.h>
66#include <linux/skbuff.h>
67#include <linux/init.h>
68#include <linux/delay.h>
69#include <linux/mm.h>
70#include <linux/ethtool.h>
71#include <linux/if_ether.h>
72
73#include <asm/abs_addr.h>
74#include <asm/iseries/mf.h>
75#include <asm/uaccess.h>
76#include <asm/firmware.h>
77#include <asm/iseries/hv_lp_config.h>
78#include <asm/iseries/hv_types.h>
79#include <asm/iseries/hv_lp_event.h>
80#include <asm/iommu.h>
81#include <asm/vio.h>
82
83#undef DEBUG
84
85MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
86MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
87MODULE_LICENSE("GPL");
88
89#define VETH_EVENT_CAP (0)
90#define VETH_EVENT_FRAMES (1)
91#define VETH_EVENT_MONITOR (2)
92#define VETH_EVENT_FRAMES_ACK (3)
93
94#define VETH_MAX_ACKS_PER_MSG (20)
95#define VETH_MAX_FRAMES_PER_MSG (6)
96
97struct veth_frames_data {
98    u32 addr[VETH_MAX_FRAMES_PER_MSG];
99    u16 len[VETH_MAX_FRAMES_PER_MSG];
100    u32 eofmask;
101};
102#define VETH_EOF_SHIFT (32-VETH_MAX_FRAMES_PER_MSG)
103
104struct veth_frames_ack_data {
105    u16 token[VETH_MAX_ACKS_PER_MSG];
106};
107
108struct veth_cap_data {
109    u8 caps_version;
110    u8 rsvd1;
111    u16 num_buffers;
112    u16 ack_threshold;
113    u16 rsvd2;
114    u32 ack_timeout;
115    u32 rsvd3;
116    u64 rsvd4[3];
117};
118
119struct veth_lpevent {
120    struct HvLpEvent base_event;
121    union {
122        struct veth_cap_data caps_data;
123        struct veth_frames_data frames_data;
124        struct veth_frames_ack_data frames_ack_data;
125    } u;
126
127};
128
129#define DRV_NAME "iseries_veth"
130#define DRV_VERSION "2.0"
131
132#define VETH_NUMBUFFERS (120)
133#define VETH_ACKTIMEOUT (1000000) /* microseconds */
134#define VETH_MAX_MCAST (12)
135
136#define VETH_MAX_MTU (9000)
137
138#if VETH_NUMBUFFERS < 10
139#define ACK_THRESHOLD (1)
140#elif VETH_NUMBUFFERS < 20
141#define ACK_THRESHOLD (4)
142#elif VETH_NUMBUFFERS < 40
143#define ACK_THRESHOLD (10)
144#else
145#define ACK_THRESHOLD (20)
146#endif
147
148#define VETH_STATE_SHUTDOWN (0x0001)
149#define VETH_STATE_OPEN (0x0002)
150#define VETH_STATE_RESET (0x0004)
151#define VETH_STATE_SENTMON (0x0008)
152#define VETH_STATE_SENTCAPS (0x0010)
153#define VETH_STATE_GOTCAPACK (0x0020)
154#define VETH_STATE_GOTCAPS (0x0040)
155#define VETH_STATE_SENTCAPACK (0x0080)
156#define VETH_STATE_READY (0x0100)
157
158struct veth_msg {
159    struct veth_msg *next;
160    struct veth_frames_data data;
161    int token;
162    int in_use;
163    struct sk_buff *skb;
164    struct device *dev;
165};
166
167struct veth_lpar_connection {
168    HvLpIndex remote_lp;
169    struct delayed_work statemachine_wq;
170    struct veth_msg *msgs;
171    int num_events;
172    struct veth_cap_data local_caps;
173
174    struct kobject kobject;
175    struct timer_list ack_timer;
176
177    struct timer_list reset_timer;
178    unsigned int reset_timeout;
179    unsigned long last_contact;
180    int outstanding_tx;
181
182    spinlock_t lock;
183    unsigned long state;
184    HvLpInstanceId src_inst;
185    HvLpInstanceId dst_inst;
186    struct veth_lpevent cap_event, cap_ack_event;
187    u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
188    u32 num_pending_acks;
189
190    int num_ack_events;
191    struct veth_cap_data remote_caps;
192    u32 ack_timeout;
193
194    struct veth_msg *msg_stack_head;
195};
196
197struct veth_port {
198    struct device *dev;
199    u64 mac_addr;
200    HvLpIndexMap lpar_map;
201
202    /* queue_lock protects the stopped_map and dev's queue. */
203    spinlock_t queue_lock;
204    HvLpIndexMap stopped_map;
205
206    /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */
207    rwlock_t mcast_gate;
208    int promiscuous;
209    int num_mcast;
210    u64 mcast_addr[VETH_MAX_MCAST];
211
212    struct kobject kobject;
213};
214
215static HvLpIndex this_lp;
216static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
217static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
218
219static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
220static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
221static void veth_wake_queues(struct veth_lpar_connection *cnx);
222static void veth_stop_queues(struct veth_lpar_connection *cnx);
223static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *);
224static void veth_release_connection(struct kobject *kobject);
225static void veth_timed_ack(unsigned long ptr);
226static void veth_timed_reset(unsigned long ptr);
227
228/*
229 * Utility functions
230 */
231
232#define veth_info(fmt, args...) \
233    printk(KERN_INFO DRV_NAME ": " fmt, ## args)
234
235#define veth_error(fmt, args...) \
236    printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args)
237
238#ifdef DEBUG
239#define veth_debug(fmt, args...) \
240    printk(KERN_DEBUG DRV_NAME ": " fmt, ## args)
241#else
242#define veth_debug(fmt, args...) do {} while (0)
243#endif
244
245/* You must hold the connection's lock when you call this function. */
246static inline void veth_stack_push(struct veth_lpar_connection *cnx,
247                   struct veth_msg *msg)
248{
249    msg->next = cnx->msg_stack_head;
250    cnx->msg_stack_head = msg;
251}
252
253/* You must hold the connection's lock when you call this function. */
254static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
255{
256    struct veth_msg *msg;
257
258    msg = cnx->msg_stack_head;
259    if (msg)
260        cnx->msg_stack_head = cnx->msg_stack_head->next;
261
262    return msg;
263}
264
265/* You must hold the connection's lock when you call this function. */
266static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx)
267{
268    return cnx->msg_stack_head == NULL;
269}
270
271static inline HvLpEvent_Rc
272veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
273         HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
274         u64 token,
275         u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
276{
277    return HvCallEvent_signalLpEventFast(cnx->remote_lp,
278                         HvLpEvent_Type_VirtualLan,
279                         subtype, ackind, acktype,
280                         cnx->src_inst,
281                         cnx->dst_inst,
282                         token, data1, data2, data3,
283                         data4, data5);
284}
285
286static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
287                       u16 subtype, u64 token, void *data)
288{
289    u64 *p = (u64 *) data;
290
291    return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
292                HvLpEvent_AckType_ImmediateAck,
293                token, p[0], p[1], p[2], p[3], p[4]);
294}
295
296struct veth_allocation {
297    struct completion c;
298    int num;
299};
300
301static void veth_complete_allocation(void *parm, int number)
302{
303    struct veth_allocation *vc = (struct veth_allocation *)parm;
304
305    vc->num = number;
306    complete(&vc->c);
307}
308
309static int veth_allocate_events(HvLpIndex rlp, int number)
310{
311    struct veth_allocation vc =
312        { COMPLETION_INITIALIZER_ONSTACK(vc.c), 0 };
313
314    mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
315                sizeof(struct veth_lpevent), number,
316                &veth_complete_allocation, &vc);
317    wait_for_completion(&vc.c);
318
319    return vc.num;
320}
321
322/*
323 * sysfs support
324 */
325
326struct veth_cnx_attribute {
327    struct attribute attr;
328    ssize_t (*show)(struct veth_lpar_connection *, char *buf);
329    ssize_t (*store)(struct veth_lpar_connection *, const char *buf);
330};
331
332static ssize_t veth_cnx_attribute_show(struct kobject *kobj,
333        struct attribute *attr, char *buf)
334{
335    struct veth_cnx_attribute *cnx_attr;
336    struct veth_lpar_connection *cnx;
337
338    cnx_attr = container_of(attr, struct veth_cnx_attribute, attr);
339    cnx = container_of(kobj, struct veth_lpar_connection, kobject);
340
341    if (!cnx_attr->show)
342        return -EIO;
343
344    return cnx_attr->show(cnx, buf);
345}
346
347#define CUSTOM_CNX_ATTR(_name, _format, _expression) \
348static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\
349{ \
350    return sprintf(buf, _format, _expression); \
351} \
352struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)
353
354#define SIMPLE_CNX_ATTR(_name) \
355    CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)
356
357SIMPLE_CNX_ATTR(outstanding_tx);
358SIMPLE_CNX_ATTR(remote_lp);
359SIMPLE_CNX_ATTR(num_events);
360SIMPLE_CNX_ATTR(src_inst);
361SIMPLE_CNX_ATTR(dst_inst);
362SIMPLE_CNX_ATTR(num_pending_acks);
363SIMPLE_CNX_ATTR(num_ack_events);
364CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));
365CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));
366CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);
367CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ?
368        jiffies_to_msecs(jiffies - cnx->last_contact) : 0);
369
370#define GET_CNX_ATTR(_name) (&veth_cnx_attr_##_name.attr)
371
372static struct attribute *veth_cnx_default_attrs[] = {
373    GET_CNX_ATTR(outstanding_tx),
374    GET_CNX_ATTR(remote_lp),
375    GET_CNX_ATTR(num_events),
376    GET_CNX_ATTR(reset_timeout),
377    GET_CNX_ATTR(last_contact),
378    GET_CNX_ATTR(state),
379    GET_CNX_ATTR(src_inst),
380    GET_CNX_ATTR(dst_inst),
381    GET_CNX_ATTR(num_pending_acks),
382    GET_CNX_ATTR(num_ack_events),
383    GET_CNX_ATTR(ack_timeout),
384    NULL
385};
386
387static struct sysfs_ops veth_cnx_sysfs_ops = {
388        .show = veth_cnx_attribute_show
389};
390
391static struct kobj_type veth_lpar_connection_ktype = {
392    .release = veth_release_connection,
393    .sysfs_ops = &veth_cnx_sysfs_ops,
394    .default_attrs = veth_cnx_default_attrs
395};
396
397struct veth_port_attribute {
398    struct attribute attr;
399    ssize_t (*show)(struct veth_port *, char *buf);
400    ssize_t (*store)(struct veth_port *, const char *buf);
401};
402
403static ssize_t veth_port_attribute_show(struct kobject *kobj,
404        struct attribute *attr, char *buf)
405{
406    struct veth_port_attribute *port_attr;
407    struct veth_port *port;
408
409    port_attr = container_of(attr, struct veth_port_attribute, attr);
410    port = container_of(kobj, struct veth_port, kobject);
411
412    if (!port_attr->show)
413        return -EIO;
414
415    return port_attr->show(port, buf);
416}
417
418#define CUSTOM_PORT_ATTR(_name, _format, _expression) \
419static ssize_t _name##_show(struct veth_port *port, char *buf) \
420{ \
421    return sprintf(buf, _format, _expression); \
422} \
423struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name)
424
425#define SIMPLE_PORT_ATTR(_name) \
426    CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name)
427
428SIMPLE_PORT_ATTR(promiscuous);
429SIMPLE_PORT_ATTR(num_mcast);
430CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map);
431CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map);
432CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr);
433
434#define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr)
435static struct attribute *veth_port_default_attrs[] = {
436    GET_PORT_ATTR(mac_addr),
437    GET_PORT_ATTR(lpar_map),
438    GET_PORT_ATTR(stopped_map),
439    GET_PORT_ATTR(promiscuous),
440    GET_PORT_ATTR(num_mcast),
441    NULL
442};
443
444static struct sysfs_ops veth_port_sysfs_ops = {
445    .show = veth_port_attribute_show
446};
447
448static struct kobj_type veth_port_ktype = {
449    .sysfs_ops = &veth_port_sysfs_ops,
450    .default_attrs = veth_port_default_attrs
451};
452
453/*
454 * LPAR connection code
455 */
456
457static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
458{
459    schedule_delayed_work(&cnx->statemachine_wq, 0);
460}
461
462static void veth_take_cap(struct veth_lpar_connection *cnx,
463              struct veth_lpevent *event)
464{
465    unsigned long flags;
466
467    spin_lock_irqsave(&cnx->lock, flags);
468    /* Receiving caps may mean the other end has just come up, so
469     * we need to reload the instance ID of the far end */
470    cnx->dst_inst =
471        HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
472                          HvLpEvent_Type_VirtualLan);
473
474    if (cnx->state & VETH_STATE_GOTCAPS) {
475        veth_error("Received a second capabilities from LPAR %d.\n",
476               cnx->remote_lp);
477        event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
478        HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
479    } else {
480        memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
481        cnx->state |= VETH_STATE_GOTCAPS;
482        veth_kick_statemachine(cnx);
483    }
484    spin_unlock_irqrestore(&cnx->lock, flags);
485}
486
487static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
488                  struct veth_lpevent *event)
489{
490    unsigned long flags;
491
492    spin_lock_irqsave(&cnx->lock, flags);
493    if (cnx->state & VETH_STATE_GOTCAPACK) {
494        veth_error("Received a second capabilities ack from LPAR %d.\n",
495               cnx->remote_lp);
496    } else {
497        memcpy(&cnx->cap_ack_event, event,
498               sizeof(&cnx->cap_ack_event));
499        cnx->state |= VETH_STATE_GOTCAPACK;
500        veth_kick_statemachine(cnx);
501    }
502    spin_unlock_irqrestore(&cnx->lock, flags);
503}
504
505static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
506                  struct veth_lpevent *event)
507{
508    unsigned long flags;
509
510    spin_lock_irqsave(&cnx->lock, flags);
511    veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
512
513    /* Avoid kicking the statemachine once we're shutdown.
514     * It's unnecessary and it could break veth_stop_connection(). */
515
516    if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
517        cnx->state |= VETH_STATE_RESET;
518        veth_kick_statemachine(cnx);
519    }
520    spin_unlock_irqrestore(&cnx->lock, flags);
521}
522
523static void veth_handle_ack(struct veth_lpevent *event)
524{
525    HvLpIndex rlp = event->base_event.xTargetLp;
526    struct veth_lpar_connection *cnx = veth_cnx[rlp];
527
528    BUG_ON(! cnx);
529
530    switch (event->base_event.xSubtype) {
531    case VETH_EVENT_CAP:
532        veth_take_cap_ack(cnx, event);
533        break;
534    case VETH_EVENT_MONITOR:
535        veth_take_monitor_ack(cnx, event);
536        break;
537    default:
538        veth_error("Unknown ack type %d from LPAR %d.\n",
539                event->base_event.xSubtype, rlp);
540    };
541}
542
543static void veth_handle_int(struct veth_lpevent *event)
544{
545    HvLpIndex rlp = event->base_event.xSourceLp;
546    struct veth_lpar_connection *cnx = veth_cnx[rlp];
547    unsigned long flags;
548    int i, acked = 0;
549
550    BUG_ON(! cnx);
551
552    switch (event->base_event.xSubtype) {
553    case VETH_EVENT_CAP:
554        veth_take_cap(cnx, event);
555        break;
556    case VETH_EVENT_MONITOR:
557        /* do nothing... this'll hang out here til we're dead,
558         * and the hypervisor will return it for us. */
559        break;
560    case VETH_EVENT_FRAMES_ACK:
561        spin_lock_irqsave(&cnx->lock, flags);
562
563        for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
564            u16 msgnum = event->u.frames_ack_data.token[i];
565
566            if (msgnum < VETH_NUMBUFFERS) {
567                veth_recycle_msg(cnx, cnx->msgs + msgnum);
568                cnx->outstanding_tx--;
569                acked++;
570            }
571        }
572
573        if (acked > 0) {
574            cnx->last_contact = jiffies;
575            veth_wake_queues(cnx);
576        }
577
578        spin_unlock_irqrestore(&cnx->lock, flags);
579        break;
580    case VETH_EVENT_FRAMES:
581        veth_receive(cnx, event);
582        break;
583    default:
584        veth_error("Unknown interrupt type %d from LPAR %d.\n",
585                event->base_event.xSubtype, rlp);
586    };
587}
588
589static void veth_handle_event(struct HvLpEvent *event)
590{
591    struct veth_lpevent *veth_event = (struct veth_lpevent *)event;
592
593    if (hvlpevent_is_ack(event))
594        veth_handle_ack(veth_event);
595    else
596        veth_handle_int(veth_event);
597}
598
599static int veth_process_caps(struct veth_lpar_connection *cnx)
600{
601    struct veth_cap_data *remote_caps = &cnx->remote_caps;
602    int num_acks_needed;
603
604    /* Convert timer to jiffies */
605    cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
606
607    if ( (remote_caps->num_buffers == 0)
608         || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG)
609         || (remote_caps->ack_threshold == 0)
610         || (cnx->ack_timeout == 0) ) {
611        veth_error("Received incompatible capabilities from LPAR %d.\n",
612                cnx->remote_lp);
613        return HvLpEvent_Rc_InvalidSubtypeData;
614    }
615
616    num_acks_needed = (remote_caps->num_buffers
617               / remote_caps->ack_threshold) + 1;
618
619    /* FIXME: locking on num_ack_events? */
620    if (cnx->num_ack_events < num_acks_needed) {
621        int num;
622
623        num = veth_allocate_events(cnx->remote_lp,
624                       num_acks_needed-cnx->num_ack_events);
625        if (num > 0)
626            cnx->num_ack_events += num;
627
628        if (cnx->num_ack_events < num_acks_needed) {
629            veth_error("Couldn't allocate enough ack events "
630                    "for LPAR %d.\n", cnx->remote_lp);
631
632            return HvLpEvent_Rc_BufferNotAvailable;
633        }
634    }
635
636
637    return HvLpEvent_Rc_Good;
638}
639
640/* FIXME: The gotos here are a bit dubious */
641static void veth_statemachine(struct work_struct *work)
642{
643    struct veth_lpar_connection *cnx =
644        container_of(work, struct veth_lpar_connection,
645                 statemachine_wq.work);
646    int rlp = cnx->remote_lp;
647    int rc;
648
649    spin_lock_irq(&cnx->lock);
650
651 restart:
652    if (cnx->state & VETH_STATE_RESET) {
653        if (cnx->state & VETH_STATE_OPEN)
654            HvCallEvent_closeLpEventPath(cnx->remote_lp,
655                             HvLpEvent_Type_VirtualLan);
656
657        /*
658         * Reset ack data. This prevents the ack_timer actually
659         * doing anything, even if it runs one more time when
660         * we drop the lock below.
661         */
662        memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
663        cnx->num_pending_acks = 0;
664
665        cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
666                | VETH_STATE_OPEN | VETH_STATE_SENTCAPS
667                | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
668                | VETH_STATE_SENTCAPACK | VETH_STATE_READY);
669
670        /* Clean up any leftover messages */
671        if (cnx->msgs) {
672            int i;
673            for (i = 0; i < VETH_NUMBUFFERS; ++i)
674                veth_recycle_msg(cnx, cnx->msgs + i);
675        }
676
677        cnx->outstanding_tx = 0;
678        veth_wake_queues(cnx);
679
680        /* Drop the lock so we can do stuff that might sleep or
681         * take other locks. */
682        spin_unlock_irq(&cnx->lock);
683
684        del_timer_sync(&cnx->ack_timer);
685        del_timer_sync(&cnx->reset_timer);
686
687        spin_lock_irq(&cnx->lock);
688
689        if (cnx->state & VETH_STATE_RESET)
690            goto restart;
691
692        /* Hack, wait for the other end to reset itself. */
693        if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
694            schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
695            goto out;
696        }
697    }
698
699    if (cnx->state & VETH_STATE_SHUTDOWN)
700        /* It's all over, do nothing */
701        goto out;
702
703    if ( !(cnx->state & VETH_STATE_OPEN) ) {
704        if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
705            goto cant_cope;
706
707        HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
708        cnx->src_inst =
709            HvCallEvent_getSourceLpInstanceId(rlp,
710                              HvLpEvent_Type_VirtualLan);
711        cnx->dst_inst =
712            HvCallEvent_getTargetLpInstanceId(rlp,
713                              HvLpEvent_Type_VirtualLan);
714        cnx->state |= VETH_STATE_OPEN;
715    }
716
717    if ( (cnx->state & VETH_STATE_OPEN)
718         && !(cnx->state & VETH_STATE_SENTMON) ) {
719        rc = veth_signalevent(cnx, VETH_EVENT_MONITOR,
720                      HvLpEvent_AckInd_DoAck,
721                      HvLpEvent_AckType_DeferredAck,
722                      0, 0, 0, 0, 0, 0);
723
724        if (rc == HvLpEvent_Rc_Good) {
725            cnx->state |= VETH_STATE_SENTMON;
726        } else {
727            if ( (rc != HvLpEvent_Rc_PartitionDead)
728                 && (rc != HvLpEvent_Rc_PathClosed) )
729                veth_error("Error sending monitor to LPAR %d, "
730                        "rc = %d\n", rlp, rc);
731
732            /* Oh well, hope we get a cap from the other
733             * end and do better when that kicks us */
734            goto out;
735        }
736    }
737
738    if ( (cnx->state & VETH_STATE_OPEN)
739         && !(cnx->state & VETH_STATE_SENTCAPS)) {
740        u64 *rawcap = (u64 *)&cnx->local_caps;
741
742        rc = veth_signalevent(cnx, VETH_EVENT_CAP,
743                      HvLpEvent_AckInd_DoAck,
744                      HvLpEvent_AckType_ImmediateAck,
745                      0, rawcap[0], rawcap[1], rawcap[2],
746                      rawcap[3], rawcap[4]);
747
748        if (rc == HvLpEvent_Rc_Good) {
749            cnx->state |= VETH_STATE_SENTCAPS;
750        } else {
751            if ( (rc != HvLpEvent_Rc_PartitionDead)
752                 && (rc != HvLpEvent_Rc_PathClosed) )
753                veth_error("Error sending caps to LPAR %d, "
754                        "rc = %d\n", rlp, rc);
755
756            /* Oh well, hope we get a cap from the other
757             * end and do better when that kicks us */
758            goto out;
759        }
760    }
761
762    if ((cnx->state & VETH_STATE_GOTCAPS)
763        && !(cnx->state & VETH_STATE_SENTCAPACK)) {
764        struct veth_cap_data *remote_caps = &cnx->remote_caps;
765
766        memcpy(remote_caps, &cnx->cap_event.u.caps_data,
767               sizeof(*remote_caps));
768
769        spin_unlock_irq(&cnx->lock);
770        rc = veth_process_caps(cnx);
771        spin_lock_irq(&cnx->lock);
772
773        /* We dropped the lock, so recheck for anything which
774         * might mess us up */
775        if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
776            goto restart;
777
778        cnx->cap_event.base_event.xRc = rc;
779        HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
780        if (rc == HvLpEvent_Rc_Good)
781            cnx->state |= VETH_STATE_SENTCAPACK;
782        else
783            goto cant_cope;
784    }
785
786    if ((cnx->state & VETH_STATE_GOTCAPACK)
787        && (cnx->state & VETH_STATE_GOTCAPS)
788        && !(cnx->state & VETH_STATE_READY)) {
789        if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
790            /* Start the ACK timer */
791            cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
792            add_timer(&cnx->ack_timer);
793            cnx->state |= VETH_STATE_READY;
794        } else {
795            veth_error("Caps rejected by LPAR %d, rc = %d\n",
796                    rlp, cnx->cap_ack_event.base_event.xRc);
797            goto cant_cope;
798        }
799    }
800
801 out:
802    spin_unlock_irq(&cnx->lock);
803    return;
804
805 cant_cope:
806    /* FIXME: we get here if something happens we really can't
807     * cope with. The link will never work once we get here, and
808     * all we can do is not lock the rest of the system up */
809    veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
810            " (state = 0x%04lx)\n", rlp, cnx->state);
811    cnx->state |= VETH_STATE_SHUTDOWN;
812    spin_unlock_irq(&cnx->lock);
813}
814
815static int veth_init_connection(u8 rlp)
816{
817    struct veth_lpar_connection *cnx;
818    struct veth_msg *msgs;
819    int i;
820
821    if ( (rlp == this_lp)
822         || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
823        return 0;
824
825    cnx = kzalloc(sizeof(*cnx), GFP_KERNEL);
826    if (! cnx)
827        return -ENOMEM;
828
829    cnx->remote_lp = rlp;
830    spin_lock_init(&cnx->lock);
831    INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine);
832
833    init_timer(&cnx->ack_timer);
834    cnx->ack_timer.function = veth_timed_ack;
835    cnx->ack_timer.data = (unsigned long) cnx;
836
837    init_timer(&cnx->reset_timer);
838    cnx->reset_timer.function = veth_timed_reset;
839    cnx->reset_timer.data = (unsigned long) cnx;
840    cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000);
841
842    memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
843
844    veth_cnx[rlp] = cnx;
845
846    /* This gets us 1 reference, which is held on behalf of the driver
847     * infrastructure. It's released at module unload. */
848    kobject_init(&cnx->kobject, &veth_lpar_connection_ktype);
849
850    msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL);
851    if (! msgs) {
852        veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
853        return -ENOMEM;
854    }
855
856    cnx->msgs = msgs;
857
858    for (i = 0; i < VETH_NUMBUFFERS; i++) {
859        msgs[i].token = i;
860        veth_stack_push(cnx, msgs + i);
861    }
862
863    cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
864
865    if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
866        veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
867        return -ENOMEM;
868    }
869
870    cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
871    cnx->local_caps.ack_threshold = ACK_THRESHOLD;
872    cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
873
874    return 0;
875}
876
877static void veth_stop_connection(struct veth_lpar_connection *cnx)
878{
879    if (!cnx)
880        return;
881
882    spin_lock_irq(&cnx->lock);
883    cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
884    veth_kick_statemachine(cnx);
885    spin_unlock_irq(&cnx->lock);
886
887    /* There's a slim chance the reset code has just queued the
888     * statemachine to run in five seconds. If so we need to cancel
889     * that and requeue the work to run now. */
890    if (cancel_delayed_work(&cnx->statemachine_wq)) {
891        spin_lock_irq(&cnx->lock);
892        veth_kick_statemachine(cnx);
893        spin_unlock_irq(&cnx->lock);
894    }
895
896    /* Wait for the state machine to run. */
897    flush_scheduled_work();
898}
899
900static void veth_destroy_connection(struct veth_lpar_connection *cnx)
901{
902    if (!cnx)
903        return;
904
905    if (cnx->num_events > 0)
906        mf_deallocate_lp_events(cnx->remote_lp,
907                      HvLpEvent_Type_VirtualLan,
908                      cnx->num_events,
909                      NULL, NULL);
910    if (cnx->num_ack_events > 0)
911        mf_deallocate_lp_events(cnx->remote_lp,
912                      HvLpEvent_Type_VirtualLan,
913                      cnx->num_ack_events,
914                      NULL, NULL);
915
916    kfree(cnx->msgs);
917    veth_cnx[cnx->remote_lp] = NULL;
918    kfree(cnx);
919}
920
921static void veth_release_connection(struct kobject *kobj)
922{
923    struct veth_lpar_connection *cnx;
924    cnx = container_of(kobj, struct veth_lpar_connection, kobject);
925    veth_stop_connection(cnx);
926    veth_destroy_connection(cnx);
927}
928
929/*
930 * net_device code
931 */
932
933static int veth_open(struct net_device *dev)
934{
935    netif_start_queue(dev);
936    return 0;
937}
938
939static int veth_close(struct net_device *dev)
940{
941    netif_stop_queue(dev);
942    return 0;
943}
944
945static int veth_change_mtu(struct net_device *dev, int new_mtu)
946{
947    if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
948        return -EINVAL;
949    dev->mtu = new_mtu;
950    return 0;
951}
952
953static void veth_set_multicast_list(struct net_device *dev)
954{
955    struct veth_port *port = netdev_priv(dev);
956    unsigned long flags;
957
958    write_lock_irqsave(&port->mcast_gate, flags);
959
960    if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
961            (dev->mc_count > VETH_MAX_MCAST)) {
962        port->promiscuous = 1;
963    } else {
964        struct dev_mc_list *dmi = dev->mc_list;
965        int i;
966
967        port->promiscuous = 0;
968
969        /* Update table */
970        port->num_mcast = 0;
971
972        for (i = 0; i < dev->mc_count; i++) {
973            u8 *addr = dmi->dmi_addr;
974            u64 xaddr = 0;
975
976            if (addr[0] & 0x01) {/* multicast address? */
977                memcpy(&xaddr, addr, ETH_ALEN);
978                port->mcast_addr[port->num_mcast] = xaddr;
979                port->num_mcast++;
980            }
981            dmi = dmi->next;
982        }
983    }
984
985    write_unlock_irqrestore(&port->mcast_gate, flags);
986}
987
988static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
989{
990    strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1);
991    info->driver[sizeof(info->driver) - 1] = '\0';
992    strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1);
993    info->version[sizeof(info->version) - 1] = '\0';
994}
995
996static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
997{
998    ecmd->supported = (SUPPORTED_1000baseT_Full
999              | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
1000    ecmd->advertising = (SUPPORTED_1000baseT_Full
1001                | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
1002    ecmd->port = PORT_FIBRE;
1003    ecmd->transceiver = XCVR_INTERNAL;
1004    ecmd->phy_address = 0;
1005    ecmd->speed = SPEED_1000;
1006    ecmd->duplex = DUPLEX_FULL;
1007    ecmd->autoneg = AUTONEG_ENABLE;
1008    ecmd->maxtxpkt = 120;
1009    ecmd->maxrxpkt = 120;
1010    return 0;
1011}
1012
1013static u32 veth_get_link(struct net_device *dev)
1014{
1015    return 1;
1016}
1017
1018static const struct ethtool_ops ops = {
1019    .get_drvinfo = veth_get_drvinfo,
1020    .get_settings = veth_get_settings,
1021    .get_link = veth_get_link,
1022};
1023
1024static const struct net_device_ops veth_netdev_ops = {
1025    .ndo_open = veth_open,
1026    .ndo_stop = veth_close,
1027    .ndo_start_xmit = veth_start_xmit,
1028    .ndo_change_mtu = veth_change_mtu,
1029    .ndo_set_multicast_list = veth_set_multicast_list,
1030    .ndo_set_mac_address = NULL,
1031    .ndo_validate_addr = eth_validate_addr,
1032};
1033
1034static struct net_device *veth_probe_one(int vlan,
1035        struct vio_dev *vio_dev)
1036{
1037    struct net_device *dev;
1038    struct veth_port *port;
1039    struct device *vdev = &vio_dev->dev;
1040    int i, rc;
1041    const unsigned char *mac_addr;
1042
1043    mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL);
1044    if (mac_addr == NULL)
1045        mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL);
1046    if (mac_addr == NULL) {
1047        veth_error("Unable to fetch MAC address from device tree.\n");
1048        return NULL;
1049    }
1050
1051    dev = alloc_etherdev(sizeof (struct veth_port));
1052    if (! dev) {
1053        veth_error("Unable to allocate net_device structure!\n");
1054        return NULL;
1055    }
1056
1057    port = netdev_priv(dev);
1058
1059    spin_lock_init(&port->queue_lock);
1060    rwlock_init(&port->mcast_gate);
1061    port->stopped_map = 0;
1062
1063    for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1064        HvLpVirtualLanIndexMap map;
1065
1066        if (i == this_lp)
1067            continue;
1068        map = HvLpConfig_getVirtualLanIndexMapForLp(i);
1069        if (map & (0x8000 >> vlan))
1070            port->lpar_map |= (1 << i);
1071    }
1072    port->dev = vdev;
1073
1074    memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
1075
1076    dev->mtu = VETH_MAX_MTU;
1077
1078    memcpy(&port->mac_addr, mac_addr, ETH_ALEN);
1079
1080    dev->netdev_ops = &veth_netdev_ops;
1081    SET_ETHTOOL_OPS(dev, &ops);
1082
1083    SET_NETDEV_DEV(dev, vdev);
1084
1085    rc = register_netdev(dev);
1086    if (rc != 0) {
1087        veth_error("Failed registering net device for vlan%d.\n", vlan);
1088        free_netdev(dev);
1089        return NULL;
1090    }
1091
1092    kobject_init(&port->kobject, &veth_port_ktype);
1093    if (0 != kobject_add(&port->kobject, &dev->dev.kobj, "veth_port"))
1094        veth_error("Failed adding port for %s to sysfs.\n", dev->name);
1095
1096    veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
1097            dev->name, vlan, port->lpar_map);
1098
1099    return dev;
1100}
1101
1102/*
1103 * Tx path
1104 */
1105
1106static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
1107                struct net_device *dev)
1108{
1109    struct veth_lpar_connection *cnx = veth_cnx[rlp];
1110    struct veth_port *port = netdev_priv(dev);
1111    HvLpEvent_Rc rc;
1112    struct veth_msg *msg = NULL;
1113    unsigned long flags;
1114
1115    if (! cnx)
1116        return 0;
1117
1118    spin_lock_irqsave(&cnx->lock, flags);
1119
1120    if (! (cnx->state & VETH_STATE_READY))
1121        goto no_error;
1122
1123    if ((skb->len - ETH_HLEN) > VETH_MAX_MTU)
1124        goto drop;
1125
1126    msg = veth_stack_pop(cnx);
1127    if (! msg)
1128        goto drop;
1129
1130    msg->in_use = 1;
1131    msg->skb = skb_get(skb);
1132
1133    msg->data.addr[0] = dma_map_single(port->dev, skb->data,
1134                skb->len, DMA_TO_DEVICE);
1135
1136    if (dma_mapping_error(port->dev, msg->data.addr[0]))
1137        goto recycle_and_drop;
1138
1139    msg->dev = port->dev;
1140    msg->data.len[0] = skb->len;
1141    msg->data.eofmask = 1 << VETH_EOF_SHIFT;
1142
1143    rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data);
1144
1145    if (rc != HvLpEvent_Rc_Good)
1146        goto recycle_and_drop;
1147
1148    /* If the timer's not already running, start it now. */
1149    if (0 == cnx->outstanding_tx)
1150        mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout);
1151
1152    cnx->last_contact = jiffies;
1153    cnx->outstanding_tx++;
1154
1155    if (veth_stack_is_empty(cnx))
1156        veth_stop_queues(cnx);
1157
1158 no_error:
1159    spin_unlock_irqrestore(&cnx->lock, flags);
1160    return 0;
1161
1162 recycle_and_drop:
1163    veth_recycle_msg(cnx, msg);
1164 drop:
1165    spin_unlock_irqrestore(&cnx->lock, flags);
1166    return 1;
1167}
1168
1169static void veth_transmit_to_many(struct sk_buff *skb,
1170                      HvLpIndexMap lpmask,
1171                      struct net_device *dev)
1172{
1173    int i, success, error;
1174
1175    success = error = 0;
1176
1177    for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1178        if ((lpmask & (1 << i)) == 0)
1179            continue;
1180
1181        if (veth_transmit_to_one(skb, i, dev))
1182            error = 1;
1183        else
1184            success = 1;
1185    }
1186
1187    if (error)
1188        dev->stats.tx_errors++;
1189
1190    if (success) {
1191        dev->stats.tx_packets++;
1192        dev->stats.tx_bytes += skb->len;
1193    }
1194}
1195
1196static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1197{
1198    unsigned char *frame = skb->data;
1199    struct veth_port *port = netdev_priv(dev);
1200    HvLpIndexMap lpmask;
1201
1202    if (! (frame[0] & 0x01)) {
1203        /* unicast packet */
1204        HvLpIndex rlp = frame[5];
1205
1206        if ( ! ((1 << rlp) & port->lpar_map) ) {
1207            dev_kfree_skb(skb);
1208            return 0;
1209        }
1210
1211        lpmask = 1 << rlp;
1212    } else {
1213        lpmask = port->lpar_map;
1214    }
1215
1216    veth_transmit_to_many(skb, lpmask, dev);
1217
1218    dev_kfree_skb(skb);
1219
1220    return 0;
1221}
1222
1223/* You must hold the connection's lock when you call this function. */
1224static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1225                 struct veth_msg *msg)
1226{
1227    u32 dma_address, dma_length;
1228
1229    if (msg->in_use) {
1230        msg->in_use = 0;
1231        dma_address = msg->data.addr[0];
1232        dma_length = msg->data.len[0];
1233
1234        if (!dma_mapping_error(msg->dev, dma_address))
1235            dma_unmap_single(msg->dev, dma_address, dma_length,
1236                    DMA_TO_DEVICE);
1237
1238        if (msg->skb) {
1239            dev_kfree_skb_any(msg->skb);
1240            msg->skb = NULL;
1241        }
1242
1243        memset(&msg->data, 0, sizeof(msg->data));
1244        veth_stack_push(cnx, msg);
1245    } else if (cnx->state & VETH_STATE_OPEN) {
1246        veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1247                cnx->remote_lp, msg->token);
1248    }
1249}
1250
1251static void veth_wake_queues(struct veth_lpar_connection *cnx)
1252{
1253    int i;
1254
1255    for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1256        struct net_device *dev = veth_dev[i];
1257        struct veth_port *port;
1258        unsigned long flags;
1259
1260        if (! dev)
1261            continue;
1262
1263        port = netdev_priv(dev);
1264
1265        if (! (port->lpar_map & (1<<cnx->remote_lp)))
1266            continue;
1267
1268        spin_lock_irqsave(&port->queue_lock, flags);
1269
1270        port->stopped_map &= ~(1 << cnx->remote_lp);
1271
1272        if (0 == port->stopped_map && netif_queue_stopped(dev)) {
1273            veth_debug("cnx %d: woke queue for %s.\n",
1274                    cnx->remote_lp, dev->name);
1275            netif_wake_queue(dev);
1276        }
1277        spin_unlock_irqrestore(&port->queue_lock, flags);
1278    }
1279}
1280
1281static void veth_stop_queues(struct veth_lpar_connection *cnx)
1282{
1283    int i;
1284
1285    for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1286        struct net_device *dev = veth_dev[i];
1287        struct veth_port *port;
1288
1289        if (! dev)
1290            continue;
1291
1292        port = netdev_priv(dev);
1293
1294        /* If this cnx is not on the vlan for this port, continue */
1295        if (! (port->lpar_map & (1 << cnx->remote_lp)))
1296            continue;
1297
1298        spin_lock(&port->queue_lock);
1299
1300        netif_stop_queue(dev);
1301        port->stopped_map |= (1 << cnx->remote_lp);
1302
1303        veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n",
1304                cnx->remote_lp, dev->name, port->stopped_map);
1305
1306        spin_unlock(&port->queue_lock);
1307    }
1308}
1309
1310static void veth_timed_reset(unsigned long ptr)
1311{
1312    struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr;
1313    unsigned long trigger_time, flags;
1314
1315    /* FIXME is it possible this fires after veth_stop_connection()?
1316     * That would reschedule the statemachine for 5 seconds and probably
1317     * execute it after the module's been unloaded. Hmm. */
1318
1319    spin_lock_irqsave(&cnx->lock, flags);
1320
1321    if (cnx->outstanding_tx > 0) {
1322        trigger_time = cnx->last_contact + cnx->reset_timeout;
1323
1324        if (trigger_time < jiffies) {
1325            cnx->state |= VETH_STATE_RESET;
1326            veth_kick_statemachine(cnx);
1327            veth_error("%d packets not acked by LPAR %d within %d "
1328                    "seconds, resetting.\n",
1329                    cnx->outstanding_tx, cnx->remote_lp,
1330                    cnx->reset_timeout / HZ);
1331        } else {
1332            /* Reschedule the timer */
1333            trigger_time = jiffies + cnx->reset_timeout;
1334            mod_timer(&cnx->reset_timer, trigger_time);
1335        }
1336    }
1337
1338    spin_unlock_irqrestore(&cnx->lock, flags);
1339}
1340
1341/*
1342 * Rx path
1343 */
1344
1345static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1346{
1347    int wanted = 0;
1348    int i;
1349    unsigned long flags;
1350
1351    if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1352        return 1;
1353
1354    read_lock_irqsave(&port->mcast_gate, flags);
1355
1356    if (port->promiscuous) {
1357        wanted = 1;
1358        goto out;
1359    }
1360
1361    for (i = 0; i < port->num_mcast; ++i) {
1362        if (port->mcast_addr[i] == mac_addr) {
1363            wanted = 1;
1364            break;
1365        }
1366    }
1367
1368 out:
1369    read_unlock_irqrestore(&port->mcast_gate, flags);
1370
1371    return wanted;
1372}
1373
1374struct dma_chunk {
1375    u64 addr;
1376    u64 size;
1377};
1378
1379#define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1380
1381static inline void veth_build_dma_list(struct dma_chunk *list,
1382                       unsigned char *p, unsigned long length)
1383{
1384    unsigned long done;
1385    int i = 1;
1386
1387    /* FIXME: skbs are continguous in real addresses. Do we
1388     * really need to break it into PAGE_SIZE chunks, or can we do
1389     * it just at the granularity of iSeries real->absolute
1390     * mapping? Indeed, given the way the allocator works, can we
1391     * count on them being absolutely contiguous? */
1392    list[0].addr = iseries_hv_addr(p);
1393    list[0].size = min(length,
1394               PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1395
1396    done = list[0].size;
1397    while (done < length) {
1398        list[i].addr = iseries_hv_addr(p + done);
1399        list[i].size = min(length-done, PAGE_SIZE);
1400        done += list[i].size;
1401        i++;
1402    }
1403}
1404
1405static void veth_flush_acks(struct veth_lpar_connection *cnx)
1406{
1407    HvLpEvent_Rc rc;
1408
1409    rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK,
1410                 0, &cnx->pending_acks);
1411
1412    if (rc != HvLpEvent_Rc_Good)
1413        veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1414                cnx->remote_lp, (int)rc);
1415
1416    cnx->num_pending_acks = 0;
1417    memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1418}
1419
1420static void veth_receive(struct veth_lpar_connection *cnx,
1421             struct veth_lpevent *event)
1422{
1423    struct veth_frames_data *senddata = &event->u.frames_data;
1424    int startchunk = 0;
1425    int nchunks;
1426    unsigned long flags;
1427    HvLpDma_Rc rc;
1428
1429    do {
1430        u16 length = 0;
1431        struct sk_buff *skb;
1432        struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1433        struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1434        u64 dest;
1435        HvLpVirtualLanIndex vlan;
1436        struct net_device *dev;
1437        struct veth_port *port;
1438
1439        /* FIXME: do we need this? */
1440        memset(local_list, 0, sizeof(local_list));
1441        memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1442
1443        /* a 0 address marks the end of the valid entries */
1444        if (senddata->addr[startchunk] == 0)
1445            break;
1446
1447        /* make sure that we have at least 1 EOF entry in the
1448         * remaining entries */
1449        if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1450            veth_error("Missing EOF fragment in event "
1451                    "eofmask = 0x%x startchunk = %d\n",
1452                    (unsigned)senddata->eofmask,
1453                    startchunk);
1454            break;
1455        }
1456
1457        /* build list of chunks in this frame */
1458        nchunks = 0;
1459        do {
1460            remote_list[nchunks].addr =
1461                (u64) senddata->addr[startchunk+nchunks] << 32;
1462            remote_list[nchunks].size =
1463                senddata->len[startchunk+nchunks];
1464            length += remote_list[nchunks].size;
1465        } while (! (senddata->eofmask &
1466                (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1467
1468        /* length == total length of all chunks */
1469        /* nchunks == # of chunks in this frame */
1470
1471        if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1472            veth_error("Received oversize frame from LPAR %d "
1473                    "(length = %d)\n",
1474                    cnx->remote_lp, length);
1475            continue;
1476        }
1477
1478        skb = alloc_skb(length, GFP_ATOMIC);
1479        if (!skb)
1480            continue;
1481
1482        veth_build_dma_list(local_list, skb->data, length);
1483
1484        rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1485                        event->base_event.xSourceLp,
1486                        HvLpDma_Direction_RemoteToLocal,
1487                        cnx->src_inst,
1488                        cnx->dst_inst,
1489                        HvLpDma_AddressType_RealAddress,
1490                        HvLpDma_AddressType_TceIndex,
1491                        iseries_hv_addr(&local_list),
1492                        iseries_hv_addr(&remote_list),
1493                        length);
1494        if (rc != HvLpDma_Rc_Good) {
1495            dev_kfree_skb_irq(skb);
1496            continue;
1497        }
1498
1499        vlan = skb->data[9];
1500        dev = veth_dev[vlan];
1501        if (! dev) {
1502            /*
1503             * Some earlier versions of the driver sent
1504             * broadcasts down all connections, even to lpars
1505             * that weren't on the relevant vlan. So ignore
1506             * packets belonging to a vlan we're not on.
1507             * We can also be here if we receive packets while
1508             * the driver is going down, because then dev is NULL.
1509             */
1510            dev_kfree_skb_irq(skb);
1511            continue;
1512        }
1513
1514        port = netdev_priv(dev);
1515        dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1516
1517        if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1518            dev_kfree_skb_irq(skb);
1519            continue;
1520        }
1521        if (! veth_frame_wanted(port, dest)) {
1522            dev_kfree_skb_irq(skb);
1523            continue;
1524        }
1525
1526        skb_put(skb, length);
1527        skb->protocol = eth_type_trans(skb, dev);
1528        skb->ip_summed = CHECKSUM_NONE;
1529        netif_rx(skb); /* send it up */
1530        dev->stats.rx_packets++;
1531        dev->stats.rx_bytes += length;
1532    } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1533
1534    /* Ack it */
1535    spin_lock_irqsave(&cnx->lock, flags);
1536    BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1537
1538    cnx->pending_acks[cnx->num_pending_acks++] =
1539        event->base_event.xCorrelationToken;
1540
1541    if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold)
1542         || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1543        veth_flush_acks(cnx);
1544
1545    spin_unlock_irqrestore(&cnx->lock, flags);
1546}
1547
1548static void veth_timed_ack(unsigned long ptr)
1549{
1550    struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1551    unsigned long flags;
1552
1553    /* Ack all the events */
1554    spin_lock_irqsave(&cnx->lock, flags);
1555    if (cnx->num_pending_acks > 0)
1556        veth_flush_acks(cnx);
1557
1558    /* Reschedule the timer */
1559    cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1560    add_timer(&cnx->ack_timer);
1561    spin_unlock_irqrestore(&cnx->lock, flags);
1562}
1563
1564static int veth_remove(struct vio_dev *vdev)
1565{
1566    struct veth_lpar_connection *cnx;
1567    struct net_device *dev;
1568    struct veth_port *port;
1569    int i;
1570
1571    dev = veth_dev[vdev->unit_address];
1572
1573    if (! dev)
1574        return 0;
1575
1576    port = netdev_priv(dev);
1577
1578    for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1579        cnx = veth_cnx[i];
1580
1581        if (cnx && (port->lpar_map & (1 << i))) {
1582            /* Drop our reference to connections on our VLAN */
1583            kobject_put(&cnx->kobject);
1584        }
1585    }
1586
1587    veth_dev[vdev->unit_address] = NULL;
1588    kobject_del(&port->kobject);
1589    kobject_put(&port->kobject);
1590    unregister_netdev(dev);
1591    free_netdev(dev);
1592
1593    return 0;
1594}
1595
1596static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1597{
1598    int i = vdev->unit_address;
1599    struct net_device *dev;
1600    struct veth_port *port;
1601
1602    dev = veth_probe_one(i, vdev);
1603    if (dev == NULL) {
1604        veth_remove(vdev);
1605        return 1;
1606    }
1607    veth_dev[i] = dev;
1608
1609    port = (struct veth_port*)netdev_priv(dev);
1610
1611    /* Start the state machine on each connection on this vlan. If we're
1612     * the first dev to do so this will commence link negotiation */
1613    for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1614        struct veth_lpar_connection *cnx;
1615
1616        if (! (port->lpar_map & (1 << i)))
1617            continue;
1618
1619        cnx = veth_cnx[i];
1620        if (!cnx)
1621            continue;
1622
1623        kobject_get(&cnx->kobject);
1624        veth_kick_statemachine(cnx);
1625    }
1626
1627    return 0;
1628}
1629
1630/**
1631 * veth_device_table: Used by vio.c to match devices that we
1632 * support.
1633 */
1634static struct vio_device_id veth_device_table[] __devinitdata = {
1635    { "network", "IBM,iSeries-l-lan" },
1636    { "", "" }
1637};
1638MODULE_DEVICE_TABLE(vio, veth_device_table);
1639
1640static struct vio_driver veth_driver = {
1641    .id_table = veth_device_table,
1642    .probe = veth_probe,
1643    .remove = veth_remove,
1644    .driver = {
1645        .name = DRV_NAME,
1646        .owner = THIS_MODULE,
1647    }
1648};
1649
1650/*
1651 * Module initialization/cleanup
1652 */
1653
1654static void __exit veth_module_cleanup(void)
1655{
1656    int i;
1657    struct veth_lpar_connection *cnx;
1658
1659    /* Disconnect our "irq" to stop events coming from the Hypervisor. */
1660    HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1661
1662    /* Make sure any work queued from Hypervisor callbacks is finished. */
1663    flush_scheduled_work();
1664
1665    for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1666        cnx = veth_cnx[i];
1667
1668        if (!cnx)
1669            continue;
1670
1671        /* Remove the connection from sysfs */
1672        kobject_del(&cnx->kobject);
1673        /* Drop the driver's reference to the connection */
1674        kobject_put(&cnx->kobject);
1675    }
1676
1677    /* Unregister the driver, which will close all the netdevs and stop
1678     * the connections when they're no longer referenced. */
1679    vio_unregister_driver(&veth_driver);
1680}
1681module_exit(veth_module_cleanup);
1682
1683static int __init veth_module_init(void)
1684{
1685    int i;
1686    int rc;
1687
1688    if (!firmware_has_feature(FW_FEATURE_ISERIES))
1689        return -ENODEV;
1690
1691    this_lp = HvLpConfig_getLpIndex_outline();
1692
1693    for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1694        rc = veth_init_connection(i);
1695        if (rc != 0)
1696            goto error;
1697    }
1698
1699    HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1700                  &veth_handle_event);
1701
1702    rc = vio_register_driver(&veth_driver);
1703    if (rc != 0)
1704        goto error;
1705
1706    for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1707        struct kobject *kobj;
1708
1709        if (!veth_cnx[i])
1710            continue;
1711
1712        kobj = &veth_cnx[i]->kobject;
1713        /* If the add failes, complain but otherwise continue */
1714        if (0 != driver_add_kobj(&veth_driver.driver, kobj,
1715                    "cnx%.2d", veth_cnx[i]->remote_lp))
1716            veth_error("cnx %d: Failed adding to sysfs.\n", i);
1717    }
1718
1719    return 0;
1720
1721error:
1722    for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1723        veth_destroy_connection(veth_cnx[i]);
1724    }
1725
1726    return rc;
1727}
1728module_init(veth_module_init);
1729

Archive Download this file



interactive