source: trunk/packages/xen-3.1/xen-3.1/tools/vnet/vnet-module/varp.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 18 years ago

Add xen and xen-common

File size: 40.8 KB
Line 
1/*
2 * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19
20#ifdef __KERNEL__
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/string.h>
26#include <linux/version.h>
27
28#include <linux/net.h>
29#include <linux/in.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
33#include <linux/udp.h>
34
35#include <net/ip.h>
36#include <net/protocol.h>
37#include <net/route.h>
38#include <linux/skbuff.h>
39#include <linux/spinlock.h>
40#include <asm/semaphore.h>
41
42#else
43
44#include "sys_kernel.h"
45#include <netinet/in.h>
46#include <arpa/inet.h>
47#include <linux/ip.h>
48#include <linux/udp.h>
49#include "spinlock.h"
50#include "skbuff.h"
51
52#endif
53
54#include <tunnel.h>
55#include <vnet.h>
56#include <vif.h>
57#include <if_varp.h>
58#include <varp.h>
59#include <varp_util.h>
60#include <vnet.h>
61#include <etherip.h>
62#include <vnet_forward.h>
63
64#include "allocate.h"
65#include "iostream.h"
66#include "hash_table.h"
67#include "sys_net.h"
68#include "sys_string.h"
69#include "skb_util.h"
70#include "timer_util.h"
71
72#define MODULE_NAME "VARP"
73#define DEBUG 1
74#undef DEBUG
75#include "debug.h"
76
77/** @file VARP: Virtual ARP.
78 *
79 * Handles virtual ARP requests for vnet/vmac.
80 */
81
82/*
83
84Varp uses UDP on port 1798.
85
86on domain up: ?
87  send varp.announce { id, vmac, vnet, coa } for each vif
88  that haven't announced before, or has changed.
89  install vif entries in local table.
90
91on varp.announce{ id, vmac, vnet, coa }:
92  update VARP entry for vmac x vnet if have one, reset ttl.
93
94on varp.request { id, vmac, vnet }:
95  if have a vif for the requested vmac/vnet,
96  reply with varp.announce{ id, vmac, vnet, coa }
97
98on timer:
99  traverse VARP table, flush old entries.
100
101on probe timer:
102  probe again if not out of tries.
103  if out of tries invalidate entry.
104
105*/
106
107/** Time-to-live of varp entries (in jiffies).*/
108#define VARP_ENTRY_TTL      (60*HZ)
109
110/** Maximum number of varp probes to make. */
111#define VARP_PROBE_MAX      5
112
113/** Interval between varp probes (in jiffies). */
114#define VARP_PROBE_INTERVAL (3*HZ)
115
116/** Maximum number of queued skbs for a varp entry. */
117#define VARP_QUEUE_MAX      16
118
119/** Number of buckets in the varp table (must be prime). */
120#define VARP_TABLE_BUCKETS  3001
121
122/** Varp entry states. */
123enum {
124    VARP_STATE_INCOMPLETE = 1,
125    VARP_STATE_REACHABLE = 2,
126    VARP_STATE_FAILED = 3,
127};
128
129/** Varp entry flags. */
130enum {
131    VARP_FLAG_PROBING = 1,
132    VARP_FLAG_PERMANENT = 2,
133};
134
135/** Key for varp entries. */
136typedef struct VarpKey {
137    /** Vnet id (network order). */
138    VnetId vnet;
139    /** Virtual MAC address. */
140    Vmac vmac;
141} VarpKey;
142
143/** An entry in the varp cache. */
144typedef struct VarpEntry {
145    /** Key for the entry. */
146    VarpKey key;
147    /** Care-of address for the key. */
148    VarpAddr addr;
149    /** Last-updated timestamp. */
150    unsigned long timestamp;
151    /** State. */
152    short state;
153    /** Flags. */
154    short flags;
155    /** Reference count. */
156    atomic_t refcount;
157    /** Lock. */
158    rwlock_t lock;
159    unsigned long lflags;
160
161    /** How many probes have been made. */
162    atomic_t probes;
163    /** Probe timer. */
164    struct timer_list timer;
165    void (*error)(struct VarpEntry *ventry, struct sk_buff *skb);
166    /** Outbound skb queue. */
167    struct sk_buff_head queue;
168    /** Maximum size of the queue. */
169    int queue_max;
170    atomic_t deleted;
171} VarpEntry;
172
173/** The varp cache. Varp entries indexed by VarpKey. */
174typedef struct VarpTable {
175
176    HashTable *table;
177
178    /** Sweep timer. */
179    struct timer_list timer;
180
181    rwlock_t lock;
182    struct semaphore mutex;
183
184    int entry_ttl;
185    int probe_max;
186    int probe_interval;
187    int queue_max;
188
189} VarpTable;
190
191/** The varp cache. */
192static VarpTable *varp_table = NULL;
193
194/** Module parameter for the multicast address. */
195static char *varp_mcaddr = NULL;
196
197/** Multicast address (network order). */
198u32 varp_mcast_addr = 0;
199
200/** UDP port (network order). */
201u16 varp_port = 0;
202
203char *varp_device = "xen-br0";
204
205#define VarpTable_read_lock(vtable, flags)    \
206  do{ read_lock_irqsave(&(vtable)->lock, (flags)); } while(0)
207
208#define VarpTable_read_unlock(vtable, flags)  \
209  do{ read_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)
210
211#define VarpTable_write_lock(vtable, flags)    \
212  do{ write_lock_irqsave(&(vtable)->lock, (flags)); } while(0)
213
214#define VarpTable_write_unlock(vtable, flags)  \
215  do{ write_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)
216
217#define VarpEntry_lock(ventry, flags)    \
218  do{ write_lock_irqsave(&(ventry)->lock, (flags)); (ventry)->lflags = (flags); } while(0)
219
220#define VarpEntry_unlock(ventry, flags)  \
221  do{ (flags) = (ventry)->lflags; write_unlock_irqrestore(&(ventry)->lock, (flags)); } while(0)
222
223void VarpTable_sweep(VarpTable *vtable);
224void VarpTable_flush(VarpTable *vtable);
225void VarpTable_print(VarpTable *vtable, IOStream *io);
226int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb);
227
228#include "./varp_util.c"
229
230/** Print the varp cache (if debug on).
231 */
232void varp_dprint(void){
233#ifdef DEBUG
234    VarpTable_print(varp_table, iostdout);
235#endif
236} 
237
238/** Flush the varp cache.
239 */
240void varp_flush(void){
241    VarpTable_flush(varp_table);
242}
243
244#ifdef __KERNEL__
245static int device_ucast_addr(const char *device, uint32_t *addr)
246{
247    int err;
248    struct net_device *dev = NULL;
249
250    err = vnet_get_device(device, &dev);
251    if(err) goto exit;
252    err = vnet_get_device_address(dev, addr);
253  exit:
254    if(err){
255        *addr = 0;
256    }
257    return err;
258}
259
260/** Get the unicast address of the varp device.
261 */
262int varp_ucast_addr(uint32_t *addr)
263{
264    int err = -ENODEV;
265    const char *devices[] = { varp_device, "eth0", "eth1", "eth2", NULL };
266    const char **p;
267    for(p = devices; err && *p; p++){
268        err = device_ucast_addr(*p, addr);
269    }
270    return err;
271}
272
273/** Lookup a network device by name.
274 *
275 * @param name device name
276 * @param dev return parameter for the device
277 * @return 0 on success, error code otherwise
278 */
279int vnet_get_device(const char *name, struct net_device **dev){
280    int err = 0;
281    *dev = dev_get_by_name(name);
282    if(!*dev){
283        err = -ENETDOWN;
284    }
285    return err;
286}
287
288/** Get the source address from a device.
289 *
290 * @param dev device
291 * @param addr return parameter for address
292 * @return 0 on success, error code otherwise
293 */
294int vnet_get_device_address(struct net_device *dev, u32 *addr){
295    int err = 0;
296    struct in_device *in_dev;
297
298    in_dev = in_dev_get(dev);
299    if(!in_dev){
300        err = -ENODEV;
301        goto exit;
302    }
303    *addr = in_dev->ifa_list->ifa_address;
304    in_dev_put(in_dev);
305  exit:
306    return err;
307}
308
309#else
310
311int varp_ucast_addr(uint32_t *addr)
312{
313    return 0;
314}
315
316#endif
317
318/** Print varp info and the varp cache.
319 */
320void varp_print(IOStream *io){
321    uint32_t addr = 0;
322    varp_ucast_addr(&addr);
323
324    IOStream_print(io, "(varp \n");
325    IOStream_print(io, " (device %s)\n", varp_device);
326    IOStream_print(io, " (mcast_addr " IPFMT ")\n", NIPQUAD(varp_mcast_addr));
327    IOStream_print(io, " (ucast_addr " IPFMT ")\n", NIPQUAD(addr));
328    IOStream_print(io, " (port %d)\n", ntohs(varp_port));
329    IOStream_print(io, " (encapsulation %s)\n",
330                   (etherip_in_udp ? "etherip_in_udp" : "etherip"));
331    IOStream_print(io, " (entry_ttl %lu)\n", varp_table->entry_ttl);
332    IOStream_print(io, ")\n");
333    VarpTable_print(varp_table, io);
334}
335
336#ifdef __KERNEL__
337
338#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
339
340static inline int addr_route(u32 daddr, struct rtable **prt){
341    int err = 0;
342    struct flowi fl = {
343        .nl_u = {
344            .ip4_u = {
345                .daddr = daddr,
346            }
347        }
348    };
349   
350    err = ip_route_output_key(prt, &fl);
351    return err;
352}
353
354#else
355
356static inline int addr_route(u32 daddr, struct rtable **prt){
357    int err = 0;
358    struct rt_key key = { .dst = daddr };
359    err = ip_route_output_key(prt, &key);
360    return err;
361}
362
363#endif // LINUX_VERSION_CODE
364
365#ifndef LL_RESERVED_SPACE
366#define HH_DATA_MOD     16
367#define LL_RESERVED_SPACE(dev) \
368        ((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
369
370#endif // LL_RESERVED_SPACE
371
372#else // __KERNEL__
373
374#define ip_eth_mc_map(daddr, dmac) do{ }while(0)
375
376#endif // __KERNEL__
377
378/** Send a varp protocol message.
379 *
380 * @param opcode varp opcode (host order)
381 * @param dev device (may be null)
382 * @param skb skb being replied to (may be null)
383 * @param vnet vnet id (in network order)
384 * @param vmac vmac (in network order)
385 * @return 0 on success, error code otherwise
386 */
387int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin,
388              VnetId *vnet, Vmac *vmac){
389    int err = 0;
390    int link_n = 0;
391    int ip_n = sizeof(struct iphdr);
392    int udp_n = sizeof(struct udphdr);
393    int varp_n = sizeof(VarpHdr);
394    struct sk_buff *skbout = NULL;
395    VarpHdr *varph = NULL;
396    u8 smacbuf[6] = {}, dmacbuf[6] = {};
397    u8 *smac = smacbuf, *dmac = dmacbuf;
398    u32 saddr = 0, daddr = 0;
399    u16 sport = 0, dport = 0;
400#if defined(DEBUG)
401    char vnetbuf[VNET_ID_BUF];
402#endif
403
404    dprintf("> opcode=%d vnet= %s vmac=" MACFMT "\n",
405            opcode, VnetId_ntoa(vnet, vnetbuf), MAC6TUPLE(vmac->mac));
406
407    dport = varp_port;
408    if(skbin){
409        daddr = skbin->nh.iph->saddr;
410        dmac = eth_hdr(skbin)->h_source;
411        sport = skbin->h.uh->dest;
412    } else {
413        if(MULTICAST(varp_mcast_addr)){
414            daddr = varp_mcast_addr;
415            ip_eth_mc_map(daddr, dmac);
416        } else {
417            daddr = INADDR_BROADCAST;
418        }
419        sport = varp_port;
420    }
421
422#ifdef __KERNEL__
423    {
424        struct in_device *in_dev = NULL;
425        if(!dev){
426            struct rtable *rt = NULL;
427            err = addr_route(daddr, &rt);
428            if(err) goto exit;
429            dev = rt->u.dst.dev;
430        }
431       
432        in_dev = in_dev_get(dev);
433        if(!in_dev){
434            err = -ENODEV;
435            goto exit;
436        }
437        link_n = LL_RESERVED_SPACE(dev);
438        saddr = in_dev->ifa_list->ifa_address;
439        smac = dev->dev_addr;
440        if(daddr == INADDR_BROADCAST){
441            daddr = in_dev->ifa_list->ifa_broadcast;
442            dmac = dev->broadcast;
443        }
444        in_dev_put(in_dev);
445    }
446#else
447    {
448        extern uint32_t vnetd_addr(void); 
449        saddr = vnetd_addr();
450    }
451#endif // __KERNEL__
452
453    dprintf("> dev=%s\n", (dev ? dev->name : "<none>"));
454    dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac));
455    dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr));
456    dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport));
457
458    skbout = alloc_skb(link_n + ip_n + udp_n + varp_n, GFP_ATOMIC);
459    if (!skbout){
460        err = -ENOMEM;
461        goto exit;
462    }
463    skbout->dev = dev;
464    skb_reserve(skbout, link_n);
465    skbout->protocol = htons(ETH_P_IP);
466
467#ifdef __KERNEL__
468    // Device header. Pushes device header on front of skb.
469    if (dev->hard_header){
470        err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len);
471        if(err < 0) goto exit;
472        skbout->mac.raw = skbout->data;
473    }
474#else
475    smac = smac; // Defeat unused variable warning.
476#endif // __KERNEL__
477
478    // IP header.
479    skbout->nh.raw = skb_put(skbout, ip_n);
480    skbout->nh.iph->version  = 4;
481    skbout->nh.iph->ihl      = ip_n / 4;
482    skbout->nh.iph->tos      = 0;
483    skbout->nh.iph->tot_len  = htons(ip_n + udp_n + varp_n);
484    skbout->nh.iph->id       = 0;
485    skbout->nh.iph->frag_off = 0;
486    skbout->nh.iph->ttl      = 64;
487    skbout->nh.iph->protocol = IPPROTO_UDP;
488    skbout->nh.iph->saddr    = saddr;
489    skbout->nh.iph->daddr    = daddr; 
490    skbout->nh.iph->check    = 0;
491
492    // UDP header.
493    skbout->h.raw = skb_put(skbout, udp_n);
494    skbout->h.uh->source     = sport;
495    skbout->h.uh->dest       = dport;
496    skbout->h.uh->len        = htons(udp_n + varp_n);
497    skbout->h.uh->check      = 0;
498
499    // Varp header.
500    varph = (void*)skb_put(skbout, varp_n);
501    *varph = (VarpHdr){};
502    varph->hdr.id            = htons(VARP_ID);
503    varph->hdr.opcode        = htons(opcode);
504    varph->vnet              = *vnet;
505    varph->vmac              = *vmac;
506    varph->addr.family       = AF_INET;
507    varph->addr.u.ip4.s_addr = saddr;
508
509    err = skb_xmit(skbout);
510
511  exit:
512    if(err && skbout) kfree_skb(skbout);
513    dprintf("< err=%d\n", err);
514    return err;
515}
516
517
518/** Send a varp request for the vnet and destination mac of a packet.
519 * Assumes the ventry is locked.
520 *
521 * @param skb packet
522 * @param vnet vnet (in network order)
523 * @return 0 on success, error code otherwise
524 */
525int varp_solicit(VnetId *vnet, Vmac *vmac){
526    return varp_send(VARP_OP_REQUEST, NULL, NULL, vnet, vmac);
527}
528
529/* Test some flags.
530 *
531 * @param ventry varp entry
532 * @param flags to test
533 * @return nonzero if flags set
534 */
535int VarpEntry_get_flags(VarpEntry *ventry, int flags){
536    return ventry->flags & flags;
537}
538
539/** Set some flags.
540 *
541 * @param ventry varp entry
542 * @param flags to set
543 * @param set set flags on if nonzero, off if zero
544 * @return new flags value
545 */
546int VarpEntry_set_flags(VarpEntry *ventry, int flags, int set){
547    if(set){
548        ventry->flags |= flags;
549    } else {
550        ventry->flags &= ~flags;
551    }
552    return ventry->flags;
553}
554
555/** Print a varp entry.
556 *
557 * @param ventry varp entry
558 */
559void VarpEntry_print(VarpEntry *ventry, IOStream *io){
560    IOStream_print(io, "(ventry \n");
561    if(ventry){
562        unsigned long now = jiffies;
563        char *state, *flags;
564        char vnetbuf[VNET_ID_BUF];
565        char addrbuf[VARP_ADDR_BUF];
566
567        switch(ventry->state){
568        case VARP_STATE_INCOMPLETE: state = "incomplete"; break;
569        case VARP_STATE_REACHABLE:  state = "reachable"; break;
570        case VARP_STATE_FAILED:     state = "failed"; break;
571        default:                    state = "unknown"; break;
572        }
573        flags = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : "-");
574
575        IOStream_print(io, " (ref %d)\n", atomic_read(&ventry->refcount));
576        IOStream_print(io, " (state %s)\n", state);
577        IOStream_print(io, " (flags %s)\n", flags);
578        IOStream_print(io, " (addr %s)\n", VarpAddr_ntoa(&ventry->addr, addrbuf));
579        IOStream_print(io, " (queue %d)\n", skb_queue_len(&ventry->queue));
580        IOStream_print(io, " (age %lu)\n", now - ventry->timestamp);
581        IOStream_print(io, " (vmac " MACFMT ")\n", MAC6TUPLE(ventry->key.vmac.mac));
582        IOStream_print(io, " (vnet %s)\n", VnetId_ntoa(&ventry->key.vnet, vnetbuf));
583    }
584    IOStream_print(io, ")\n");
585}
586
587/** Free a varp entry.
588 *
589 * @param ventry varp entry
590 */
591static void VarpEntry_free(VarpEntry *ventry){
592    if(!ventry) return;
593    deallocate(ventry);
594}
595
596/** Increment reference count.
597 *
598 * @param ventry varp entry (may be null)
599 */
600void VarpEntry_incref(VarpEntry *ventry){
601    if(!ventry) return;
602    atomic_inc(&ventry->refcount);
603}
604
605/** Decrement reference count, freeing if zero.
606 *
607 * @param ventry varp entry (may be null)
608 */
609void VarpEntry_decref(VarpEntry *ventry){
610    if(!ventry) return;
611    if(atomic_dec_and_test(&ventry->refcount)){
612        VarpEntry_free(ventry);
613    }
614}
615
616/** Call the error handler.
617 *
618 * @param ventry varp entry
619 */
620void VarpEntry_error(VarpEntry *ventry){
621    struct sk_buff *skb;
622    skb = skb_peek(&ventry->queue);
623    if(!skb) return;
624    if(ventry->error) ventry->error(ventry, skb);
625    skb_queue_purge(&ventry->queue);
626}
627
628/** Schedule the varp entry timer.
629 * Must increment the reference count before doing
630 * this the first time, so the ventry won't be freed
631 * before the timer goes off.
632 *
633 * @param ventry varp entry
634 */
635void VarpEntry_schedule(VarpEntry *ventry){
636    timer_set(&ventry->timer, VARP_PROBE_INTERVAL);
637}
638
639/** Function called when a varp entry timer goes off.
640 * If the entry is still incomplete, carries on probing.
641 * Otherwise stops probing.
642 *
643 * @param arg ventry
644 */
645static void varp_timer_fn(unsigned long arg){
646    unsigned long flags;
647    VarpEntry *ventry = (VarpEntry *)arg;
648    struct sk_buff *skb = NULL;
649    int probing = 0;
650
651    dprintf(">\n");
652    VarpEntry_lock(ventry, flags);
653    if(!atomic_read(&ventry->deleted)){
654        switch(ventry->state){
655        case VARP_STATE_REACHABLE:
656        case VARP_STATE_FAILED:
657            break;
658        case VARP_STATE_INCOMPLETE:
659            // Probe if haven't run out of tries, otherwise fail.
660            if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){
661                unsigned long qflags;
662                VnetId vnet;
663                Vmac vmac;
664
665                probing = 1;
666                spin_lock_irqsave(&ventry->queue.lock, qflags);
667                skb = skb_peek(&ventry->queue);
668                if(skb){
669                    vmac = *(Vmac*)eth_hdr(skb)->h_dest;
670                }
671                spin_unlock_irqrestore(&ventry->queue.lock, qflags);
672                if(skb){
673                    dprintf("> skbs in queue - solicit\n");
674                    vnet = ventry->key.vnet;
675                    atomic_inc(&ventry->probes);
676                    VarpEntry_unlock(ventry, flags);
677                    varp_solicit(&vnet, &vmac);
678                    VarpEntry_lock(ventry, flags);       
679                } else {
680                    dprintf("> empty queue.\n");
681                }
682                VarpEntry_schedule(ventry);
683            } else {
684                VarpEntry_error(ventry);
685                ventry->state = VARP_STATE_FAILED;
686            }
687            break;
688        }
689    }
690    VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, probing);
691    VarpEntry_unlock(ventry, flags);
692    if(!probing) VarpEntry_decref(ventry);
693    dprintf("<\n");
694}
695
696/** Default error function for varp entries.
697 *
698 * @param ventry varp entry
699 * @param skb packet dropped because of error
700 */
701static void varp_error_fn(VarpEntry *ventry, struct sk_buff *skb){
702}
703
704/** Create a varp entry. Initializes the internal state.
705 *
706 * @param vnet vnet id
707 * @param vmac virtual MAC address (copied)
708 * @return ventry or null
709 */
710VarpEntry * VarpEntry_new(VnetId *vnet, Vmac *vmac){
711    VarpEntry *ventry = ALLOCATE(VarpEntry);
712    if(ventry){
713        unsigned long now = jiffies;
714
715        atomic_set(&ventry->refcount, 1);
716        atomic_set(&ventry->probes, 0);
717        atomic_set(&ventry->deleted, 0);
718        ventry->lock = RW_LOCK_UNLOCKED;
719        ventry->state = VARP_STATE_INCOMPLETE;
720        ventry->queue_max = VARP_QUEUE_MAX;
721        skb_queue_head_init(&ventry->queue);
722        timer_init(&ventry->timer, varp_timer_fn, ventry);
723        ventry->timestamp = now;
724        ventry->error = varp_error_fn;
725
726        ventry->key.vnet = *vnet;
727        ventry->key.vmac = *vmac;
728    }
729    return ventry;
730}
731
732/** Hash function for keys in the varp cache.
733 * Hashes the vnet id and mac.
734 *
735 * @param k key (VarpKey)
736 * @return hashcode
737 */
738static Hashcode varp_key_hash_fn(void *k){
739    return hash_hvoid(0, k, sizeof(VarpKey));
740}
741
742/** Test equality for keys in the varp cache.
743 * Compares vnet and mac.
744 *
745 * @param k1 key to compare (VarpKey)
746 * @param k2 key to compare (VarpKey)
747 * @return 1 if equal, 0 otherwise
748 */
749static int varp_key_equal_fn(void *k1, void *k2){
750    return memcmp(k1, k2, sizeof(VarpKey)) == 0;
751}
752
753/** Free an entry in the varp cache.
754 *
755 * @param table containing table
756 * @param entry entry to free
757 */
758static void varp_entry_free_fn(HashTable *table, HTEntry *entry){
759    VarpEntry *ventry;
760    if(!entry) return;
761    ventry = entry->value;
762    if(ventry) VarpEntry_decref(ventry);
763    HTEntry_free(entry);
764}
765
766/** Free the whole varp cache.
767 * Dangerous.
768 *
769 * @param vtable varp cache
770 */
771void VarpTable_free(VarpTable *vtable){
772    unsigned long vtflags;
773    if(!vtable) return;
774    VarpTable_write_lock(vtable, vtflags);
775    timer_cancel(&vtable->timer);
776    vtable->timer.data = 0;
777    if(vtable->table){
778        HashTable *table = vtable->table;
779        HashTable_for_decl(entry);
780
781        vtable->table = NULL;
782        HashTable_for_each(entry, table){
783            VarpEntry *ventry = entry->value;
784            unsigned long flags;
785            VarpEntry_lock(ventry, flags);
786            atomic_set(&ventry->deleted, 1);
787            if(VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
788                timer_cancel(&ventry->timer);
789                ventry->timer.data = 0;
790                VarpEntry_decref(ventry);
791            }
792            VarpEntry_unlock(ventry, flags);
793        }
794        HashTable_free(table); 
795    }
796    VarpTable_write_unlock(vtable, vtflags);
797    deallocate(vtable);
798}
799
800/** Schedule the varp table timer.
801 *
802 * @param vtable varp table
803 */
804void VarpTable_schedule(VarpTable *vtable){
805    timer_set(&vtable->timer, vtable->entry_ttl);
806}
807
808/** Function called when the varp table timer goes off.
809 * Sweeps old varp cache entries and reschedules itself.
810 *
811 * @param arg varp table
812 */
813static void varp_table_timer_fn(unsigned long arg){
814    VarpTable *vtable = (VarpTable *)arg;
815    if(vtable){
816        VarpTable_sweep(vtable);
817        VarpTable_schedule(vtable);
818    }
819}
820
821/** Print a varp table.
822 *
823 * @param vtable table
824 */
825void VarpTable_print(VarpTable *vtable, IOStream *io){
826    HashTable_for_decl(entry);
827    VarpEntry *ventry;
828    unsigned long vtflags, flags;
829
830    VarpTable_read_lock(vtable, vtflags);
831    HashTable_for_each(entry, vtable->table){
832        ventry = entry->value;
833        VarpEntry_lock(ventry, flags);
834        VarpEntry_print(ventry, io);
835        VarpEntry_unlock(ventry, flags);
836    }
837    VarpTable_read_unlock(vtable, vtflags);
838}
839
840/** Create a varp table.
841 *
842 * @return new table or null
843 */
844VarpTable * VarpTable_new(void){
845    int err = -ENOMEM;
846    VarpTable *vtable = NULL;
847
848    vtable = ALLOCATE(VarpTable);
849    if(!vtable) goto exit;
850    vtable->table = HashTable_new(VARP_TABLE_BUCKETS);
851    if(!vtable->table) goto exit;
852    vtable->table->key_size = sizeof(VarpKey);
853    vtable->table->key_equal_fn = varp_key_equal_fn;
854    vtable->table->key_hash_fn = varp_key_hash_fn;
855    vtable->table->entry_free_fn = varp_entry_free_fn;
856
857    vtable->entry_ttl = VARP_ENTRY_TTL;
858    vtable->probe_max = VARP_PROBE_MAX;
859    vtable->probe_interval = VARP_PROBE_INTERVAL;
860    vtable->queue_max = VARP_QUEUE_MAX;
861
862    init_MUTEX(&vtable->mutex);
863    vtable->lock = RW_LOCK_UNLOCKED;
864    timer_init(&vtable->timer, varp_table_timer_fn, vtable);
865    err = 0;
866  exit:
867    if(err){
868        VarpTable_free(vtable);
869        vtable = NULL;
870    }
871    return vtable;
872}
873
874/** Add a new entry to the varp table.
875 *
876 * @param vtable table
877 * @param vnet vnet id
878 * @param vmac virtual MAC address (copied)
879 * @return new entry or null
880 */
881VarpEntry * VarpTable_add(VarpTable *vtable, VnetId *vnet, Vmac *vmac){
882    int err = 0;
883    VarpKey key = { .vnet = *vnet, .vmac = *vmac};
884    VarpEntry *ventry = NULL;
885    HTEntry *entry = NULL;
886    unsigned long vtflags;
887
888    VarpTable_write_lock(vtable, vtflags);
889    ventry = HashTable_get(vtable->table, &key);
890    if(ventry){
891        VarpEntry_incref(ventry);
892        goto exit;
893    }
894    err = -ENOMEM;
895    ventry = VarpEntry_new(vnet, vmac);
896    if(!ventry) goto exit;
897    entry = HashTable_add(vtable->table, ventry, ventry);
898    if(!entry){
899        VarpEntry_decref(ventry);
900        ventry = NULL;
901        goto exit;
902    }
903    err = 0;
904    VarpEntry_incref(ventry);
905  exit:
906    VarpTable_write_unlock(vtable, vtflags);
907    return ventry;
908}
909
910/** Remove an entry from the varp table.
911 *
912 * @param vtable table
913 * @param ventry entry to remove
914 * @return removed count
915 */
916int VarpTable_remove(VarpTable *vtable, VarpEntry *ventry){
917    //TODO: Could send a varp announce with null addr for the entry
918    // vnet and vmac to notify others, so they will resolve the addr
919    // instead of sending traffic to us.
920    atomic_set(&ventry->deleted, 1);
921    skb_queue_purge(&ventry->queue);
922    return HashTable_remove(vtable->table, ventry);
923}
924
925/** Remove all entries using a vnet.
926 * Caller must hold the table lock.
927 *
928 * @param vtable table
929 * @param vnet vnet
930 * @return removed count
931 */
932int VarpTable_remove_vnet(VarpTable *vtable, VnetId *vnet){
933    int count = 0;
934    HashTable_for_decl(entry);
935
936    HashTable_for_each(entry, vtable->table){
937        VarpEntry *ventry = entry->value;
938        if(VnetId_eq(&ventry->key.vnet, vnet)){
939            count += VarpTable_remove(vtable, ventry);
940        }
941    }
942    return count;
943}
944
945/** Remove all entries using a vnet from the varp table.
946 *
947 * @param vnet vnet
948 * @return removed count
949 */
950int varp_remove_vnet(VnetId *vnet){
951    int count = 0;
952    unsigned long vtflags;
953
954    VarpTable_write_lock(varp_table, vtflags);
955    count = VarpTable_remove_vnet(varp_table, vnet);
956    VarpTable_write_unlock(varp_table, vtflags);
957    return count;
958}
959
960/** Lookup an entry in the varp table.
961 *
962 * @param vtable table
963 * @param vnet vnet id
964 * @param vmac virtual MAC address
965 * @param create create a new entry if needed if true
966 * @return entry found or null
967 */
968VarpEntry * VarpTable_lookup(VarpTable *vtable, VnetId *vnet, Vmac *vmac, int create){
969    VarpKey key = { .vnet = *vnet, .vmac = *vmac };
970    VarpEntry *ventry = NULL;
971    unsigned long vtflags;
972
973    VarpTable_read_lock(vtable, vtflags);
974    ventry = HashTable_get(vtable->table, &key);
975    if(ventry) VarpEntry_incref(ventry);
976    VarpTable_read_unlock(vtable, vtflags);
977
978    if(!ventry && create){
979        ventry = VarpTable_add(vtable, vnet, vmac);
980    }
981    return ventry;
982}
983
984/** Handle output for a reachable ventry.
985 * Send the skb using the tunnel to the care-of address.
986 * Assumes the ventry lock is held.
987 *
988 * @param ventry varp entry
989 * @param skb skb to send
990 * @return 0 on success, error code otherwise
991 */
992int VarpEntry_send(VarpEntry *ventry, struct sk_buff *skb){
993    int err = 0;
994    unsigned long flags = 0;
995    VarpAddr addr;
996    VnetId vnet;
997
998    dprintf("> skb=%p\n", skb);
999    vnet = ventry->key.vnet;
1000    addr = ventry->addr;
1001    VarpEntry_unlock(ventry, flags);
1002    err = vnet_tunnel_send(&vnet, &addr, skb);
1003    VarpEntry_lock(ventry, flags);
1004    dprintf("< err=%d\n", err);
1005    return err;
1006}
1007
1008/** Handle output for a non-reachable ventry. Send messages to complete it.
1009 * If the entry is still incomplete, queue the skb, otherwise
1010 * send it. If the queue is full, dequeue and free an old skb to
1011 * make room for the new one.
1012 * Assumes the ventry lock is held.
1013 *
1014 * @param ventry varp entry
1015 * @param skb skb to send
1016 * @return 0 on success, error code otherwise
1017 */
1018int VarpEntry_resolve(VarpEntry *ventry, struct sk_buff *skb){
1019    int err = 0;
1020    unsigned long flags = 0;
1021    VnetId vnet;
1022    Vmac vmac;
1023
1024    dprintf("> skb=%p\n", skb);
1025    ventry->state = VARP_STATE_INCOMPLETE;
1026    atomic_set(&ventry->probes, 1);
1027    if(!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
1028        VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, 1);
1029        VarpEntry_incref(ventry);
1030        VarpEntry_schedule(ventry);
1031    }
1032    vnet = ventry->key.vnet;
1033    vmac = *(Vmac*)eth_hdr(skb)->h_dest;
1034    VarpEntry_unlock(ventry, flags);
1035    varp_solicit(&vnet, &vmac);
1036    VarpEntry_lock(ventry, flags);
1037
1038    if(ventry->state == VARP_STATE_INCOMPLETE){
1039        while(skb_queue_len(&ventry->queue) >= ventry->queue_max){
1040            struct sk_buff *oldskb;
1041            oldskb = skb_dequeue(&ventry->queue);
1042            //oldskb = ventry->queue.next;
1043            //__skb_unlink(oldskb, &ventry->queue);
1044            if(!oldskb) break;
1045            dprintf("> dropping skb=%p\n", oldskb);
1046            kfree_skb(oldskb);
1047        }
1048        skb_queue_tail(&ventry->queue, skb);
1049    } else {
1050        err = VarpEntry_send(ventry, skb);
1051    }
1052    dprintf("< err=%d\n", err);
1053    return err;
1054}
1055
1056/** Process the output queue for a ventry.  Sends the queued skbs if
1057 * the ventry is reachable, otherwise drops them.
1058 *
1059 * @param ventry varp entry
1060 */
1061void VarpEntry_process_queue(VarpEntry *ventry){
1062    struct sk_buff *skb;
1063    for( ; ; ){
1064        if(ventry->state != VARP_STATE_REACHABLE) break;
1065        skb = skb_dequeue(&ventry->queue);
1066        if(!skb) break;
1067        VarpEntry_send(ventry, skb);
1068    }
1069    skb_queue_purge(&ventry->queue);
1070}
1071
1072/** Multicast an skb on a vnet.
1073 *
1074 * @param vnet vnet id
1075 * @param skb skb to send
1076 * @return 0 on success, error code otherwise
1077 */
1078static int varp_multicast(VnetId *vnet, struct sk_buff *skb){
1079    VarpAddr addr = { .family = AF_INET };
1080    addr.u.ip4.s_addr = varp_mcast_addr;
1081    return vnet_tunnel_send(vnet, &addr, skb);
1082}
1083
1084/** Handle output for a ventry. Resolves the ventry
1085 * if necessary.
1086 *
1087 * @param ventry varp entry
1088 * @param skb skb to send
1089 * @return 0 on success, error code otherwise
1090 */
1091int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){
1092    int err = 0;
1093    unsigned long flags;
1094
1095    VarpEntry_lock(ventry, flags);
1096    switch(ventry->state){
1097    case VARP_STATE_REACHABLE:
1098        if(skb_queue_len(&ventry->queue) > 0){
1099            VarpEntry_process_queue(ventry);
1100        }
1101        err = VarpEntry_send(ventry, skb);
1102        break;
1103    default: 
1104        if(0){
1105            err = VarpEntry_resolve(ventry, skb);
1106        } else {     
1107            // Multicast the skb if the entry is not reachable.
1108            VnetId vnet = ventry->key.vnet;
1109            VarpEntry_unlock(ventry, flags);
1110            err = varp_multicast(&vnet, skb);
1111            VarpEntry_lock(ventry, flags);
1112        }
1113        break;
1114    }
1115    VarpEntry_unlock(ventry, flags);
1116    return err;
1117}
1118
1119/** Update a ventry. Sets the address and state to those given
1120 * and sets the timestamp to 'now'.
1121 *
1122 * @param ventry varp entry
1123 * @param addr care-of address
1124 * @param state state
1125 * @return 0 on success, error code otherwise
1126 */
1127int VarpEntry_update(VarpEntry *ventry, VarpAddr *addr, int state, int vflags){
1128    int err = 0;
1129    unsigned long now = jiffies;
1130    unsigned long flags;
1131
1132    VarpEntry_lock(ventry, flags);
1133    //if(atomic_read(&ventry->deleted)) goto exit;
1134    if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit;
1135    ventry->addr = *addr;
1136    ventry->timestamp = now;
1137    ventry->state = state;
1138    // Can't process the queue while atomic as it calls schedule(),
1139    // and that's bad.
1140    //if(0 && (vflags & VARP_UPDATE_QUEUE) && !in_atomic()){
1141    //    VarpEntry_process_queue(ventry);
1142    //}
1143  exit:
1144    VarpEntry_unlock(ventry, flags);
1145    dprintf("< err=%d\n", err);
1146    return err;
1147}
1148   
1149/** Update the entry for a vnet.
1150 *
1151 * @param vtable varp table
1152 * @param vnet vnet id
1153 * @param vmac mac address
1154 * @param addr care-of-address
1155 * @param state state
1156 * @param flags update flags
1157 * @return 0 on success, error code otherwise
1158 */
1159int VarpTable_update(VarpTable *vtable, VnetId *vnet, Vmac *vmac, VarpAddr *addr,
1160                     int state, int flags){
1161    int err = 0;
1162    VarpEntry *ventry;
1163#ifdef DEBUG
1164    char vnetbuf[VNET_ID_BUF];
1165    char addrbuf[VARP_ADDR_BUF];
1166   
1167    dprintf("> vnet=%s mac=" MACFMT " addr=%s state=%d flags=%x\n",
1168            VnetId_ntoa(vnet, vnetbuf),
1169            MAC6TUPLE(vmac->mac),
1170            VarpAddr_ntoa(addr, addrbuf),
1171            state,
1172            flags);
1173#endif
1174    ventry = VarpTable_lookup(vtable, vnet, vmac, (flags & VARP_UPDATE_CREATE));
1175    if(!ventry){
1176        err = -ENOENT;
1177        goto exit;
1178    }
1179    err = VarpEntry_update(ventry, addr, state, flags);
1180    VarpEntry_decref(ventry);
1181  exit:
1182    dprintf("< err=%d\n", err);
1183    return err;
1184}
1185
1186/** Update the entry for a vnet: make it reachable and create an entry
1187 * if needed.
1188 *
1189 * @param vnet vnet id
1190 * @param vmac mac address
1191 * @param addr care-of-address
1192 * @return 0 on success, error code otherwise
1193 */
1194int varp_update(VnetId *vnet, unsigned char *vmac, VarpAddr *addr){
1195    int err = 0;
1196    if(!varp_table){
1197        err = -ENOSYS;
1198    } else {
1199        err = VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr,
1200                               VARP_STATE_REACHABLE, VARP_UPDATE_CREATE);
1201    }
1202    return err;
1203}
1204
1205static inline int VarpEntry_sweepable(VarpEntry *ventry){
1206    return !VarpEntry_get_flags(ventry, (VARP_FLAG_PERMANENT | VARP_FLAG_PROBING));
1207}
1208
1209static inline int VarpTable_old(VarpTable *vtable, VarpEntry *ventry, unsigned long now){
1210    return now - ventry->timestamp > vtable->entry_ttl;
1211}
1212
1213/** Sweep old varp entries.
1214 * Doesn't affect entries that are probing or permanent.
1215 *
1216 * @param vtable table
1217 */
1218void VarpTable_sweep(VarpTable *vtable){
1219    HashTable_for_decl(entry);
1220    VarpEntry *ventry;
1221    unsigned long now = jiffies;
1222    unsigned long vtflags, flags;
1223    int sweep, swept = 0;
1224
1225    if(!vtable) return;
1226    VarpTable_write_lock(vtable, vtflags);
1227    HashTable_for_each(entry, vtable->table){
1228        ventry = entry->value;
1229        VarpEntry_lock(ventry, flags);
1230        sweep = VarpEntry_sweepable(ventry) && VarpTable_old(vtable, ventry, now);
1231        if(sweep){
1232            swept++;
1233            iprintf("> Sweeping:\n");
1234            VarpEntry_print(ventry, iostdout);
1235            //VarpEntry_process_queue(ventry);
1236            ventry->state = VARP_STATE_INCOMPLETE;
1237        }
1238        VarpEntry_unlock(ventry, flags);
1239        if(sweep){
1240            VarpTable_remove(vtable, ventry);
1241        }
1242    }
1243    VarpTable_write_unlock(vtable, vtflags);
1244    if(swept){
1245        iprintf(">\n");
1246        varp_print(iostdout);
1247    }
1248}
1249
1250/** Flush the varp table.
1251 *
1252 * @param vtable table
1253 */
1254void VarpTable_flush(VarpTable *vtable){
1255    HashTable_for_decl(entry);
1256    VarpEntry *ventry;
1257    unsigned long vtflags, flags;
1258    int flush;
1259
1260    VarpTable_write_lock(vtable, vtflags);
1261    HashTable_for_each(entry, vtable->table){
1262        ventry = entry->value;
1263        VarpEntry_lock(ventry, flags);
1264        flush = (!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) &&
1265                 !VarpEntry_get_flags(ventry, VARP_FLAG_PROBING));               
1266        if(flush){
1267            iprintf("> Flushing:\n");
1268            VarpEntry_print(ventry, iostdout);
1269        }
1270        VarpEntry_unlock(ventry, flags);
1271        if(flush){
1272            VarpTable_remove(vtable, ventry);
1273        }
1274    }
1275    VarpTable_write_unlock(vtable, vtflags);
1276}
1277
1278/** Handle a varp request. Look for a vif with the requested
1279 * vnet and vmac. If find one, reply with the vnet, vmac and our
1280 * address. Otherwise do nothing.
1281 *
1282 * @param skb incoming message
1283 * @param varph varp message
1284 * @return 0 if ok, -ENOENT if no matching vif, or error code
1285 */
1286int varp_handle_request(struct sk_buff *skb, VarpHdr *varph){
1287    int err = -ENOENT;
1288    VnetId *vnet;
1289    Vmac *vmac;
1290    Vif *vif = NULL;
1291
1292    dprintf(">\n");
1293    vnet = &varph->vnet;
1294    vmac = &varph->vmac;
1295    if(vif_lookup(vnet, vmac, &vif)) goto exit;
1296    varp_send(VARP_OP_ANNOUNCE, skb->dev, skb, vnet, vmac);
1297    vif_decref(vif);
1298  exit:
1299    dprintf("< err=%d\n", err);
1300    return err;
1301}
1302
1303/** Announce the vnet and vmac of a vif (gratuitous varp).
1304 *
1305 * @param dev device to send on (may be null)
1306 * @param vif vif
1307 * @return 0 on success, error code otherwise
1308 */
1309int varp_announce_vif(struct net_device *dev, Vif *vif){
1310    int err = 0;
1311    dprintf(">\n");
1312    if(!varp_table){
1313        err = -ENOSYS;
1314        goto exit;
1315    }
1316    err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, &vif->vnet, &vif->vmac);
1317  exit:
1318    dprintf("< err=%d\n", err);
1319    return err;
1320}
1321
1322/** Handle a varp announce message.
1323 * Update the matching ventry if we have one.
1324 *
1325 * @param skb incoming message
1326 * @param varp message
1327 * @return 0 if OK, -ENOENT if no matching entry
1328 */
1329int varp_handle_announce(struct sk_buff *skb, VarpHdr *varph){
1330    int err = 0;
1331
1332    dprintf(">\n");
1333    err = VarpTable_update(varp_table,
1334                           &varph->vnet, &varph->vmac, &varph->addr,
1335                           VARP_STATE_REACHABLE, 
1336                           (VARP_UPDATE_CREATE | VARP_UPDATE_QUEUE));
1337    dprintf("< err=%d\n", err);
1338    return err;
1339}
1340
1341/** Handle an incoming varp message.
1342 *
1343 * @param skb incoming message
1344 * @return 0 if OK, error code otherwise
1345 */
1346int varp_handle_message(struct sk_buff *skb){
1347    // Assume nh, h set, skb->data points at udp hdr (h).
1348    int err = -EINVAL;
1349    VarpHdr *varph; // = (void*)(skb->h.uh + 1);
1350
1351    dprintf("> skb=%p saddr=" IPFMT " daddr=" IPFMT "\n",
1352            skb,
1353            NIPQUAD(skb->nh.iph->saddr),
1354            NIPQUAD(skb->nh.iph->daddr));
1355    if(!varp_table){
1356        err = -ENOSYS;
1357        return err;
1358    }
1359    if(MULTICAST(skb->nh.iph->daddr)){
1360        if(skb->nh.iph->daddr != varp_mcast_addr){
1361            // Ignore multicast packets not addressed to us.
1362            err = 0;
1363            dprintf("> Ignoring daddr=" IPFMT " mcaddr=" IPFMT "\n",
1364                    NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr));
1365            goto exit;
1366        }
1367    }
1368    varph = (void*)skb_pull(skb, sizeof(struct udphdr));
1369    if(skb->len < sizeof(struct VnetMsgHdr)){
1370        wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct VnetMsgHdr));
1371        goto exit;
1372    }
1373    switch(ntohs(varph->hdr.id)){
1374    case VARP_ID: // Varp message. Handled below.
1375        if(skb->len < sizeof(*varph)){
1376            wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(*varph));
1377            goto exit;
1378        }
1379        break;
1380    case VUDP_ID: // Etherip-in-udp packet.
1381        skb_pull(skb, sizeof(struct VnetMsgHdr));
1382        err = etherip_protocol_recv(skb);
1383        goto exit;
1384    case VFWD_ID: // Forwarded.
1385        skb_pull(skb, sizeof(struct VnetMsgHdr));
1386        err = vnet_forward_recv(skb);
1387        goto exit;
1388    default:
1389        // It's not varp at all - ignore it.
1390        wprintf("> Invalid varp id: %d\n", ntohs(varph->hdr.id));
1391        print_skb("INVALID", 0, skb);
1392        goto exit;
1393    }
1394#ifdef DEBUG
1395    {
1396        char vnetbuf[VNET_ID_BUF];
1397        char addrbuf[VARP_ADDR_BUF];
1398        dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n",
1399                NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr));
1400        dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source), ntohs(skb->h.uh->dest));
1401        dprintf("> opcode=%d vnet=%s vmac=" MACFMT " addr=%s\n",
1402                ntohs(varph->hdr.opcode),
1403                VnetId_ntoa(&varph->vnet, vnetbuf),
1404                MAC6TUPLE(varph->vmac.mac),
1405                VarpAddr_ntoa(&varph->addr, addrbuf));
1406        varp_dprint();
1407    }
1408#endif
1409    switch(ntohs(varph->hdr.opcode)){
1410    case VARP_OP_REQUEST:
1411        err = varp_handle_request(skb, varph);
1412        break;
1413    case VARP_OP_ANNOUNCE:
1414        err = varp_handle_announce(skb, varph);
1415        break;
1416    default:
1417        wprintf("> Unknown opcode: %d \n", ntohs(varph->hdr.opcode));
1418        break;
1419    }
1420  exit:
1421    dprintf("< err=%d\n", err);
1422    return err;
1423}
1424
1425/** Send an outgoing packet on the appropriate vnet tunnel.
1426 *
1427 * @param skb outgoing message
1428 * @param vnet vnet (network order)
1429 * @return 0 on success, error code otherwise
1430 */
1431int varp_output(struct sk_buff *skb, VnetId *vnet){
1432    int err = 0;
1433    unsigned char *mac = NULL;
1434    Vmac *vmac = NULL;
1435    VarpEntry *ventry = NULL;
1436#if defined(DEBUG)
1437    char vnetbuf[VNET_ID_BUF];
1438#endif
1439
1440    dprintf("> vnet=%s\n", VnetId_ntoa(vnet, vnetbuf));
1441    if(!varp_table){
1442        err = -ENOSYS;
1443        goto exit;
1444    }
1445    if(!skb->mac.raw){
1446        wprintf("> No ethhdr in skb!\n");
1447        err = -EINVAL;
1448        goto exit;
1449    }
1450    mac = eth_hdr(skb)->h_dest;
1451    vmac = (Vmac*)mac;
1452    if(mac_is_multicast(mac)){
1453        err = varp_multicast(vnet, skb);
1454    } else {
1455        ventry = VarpTable_lookup(varp_table, vnet, vmac, 1);
1456        if(ventry){
1457            err = VarpEntry_output(ventry, skb);
1458            VarpEntry_decref(ventry);
1459        } else {
1460            err = -ENOMEM;
1461        }
1462    }
1463  exit:
1464    dprintf("< err=%d\n", err);
1465    return err;
1466}
1467
1468/** Set the varp multicast address (after initialization).
1469 *
1470 * @param addr address (network order)
1471 * @return 0 on success, error code otherwise
1472 */
1473int varp_set_mcast_addr(uint32_t addr){
1474    int err = 0;
1475    varp_close();
1476    varp_mcast_addr = addr;
1477    err = varp_open(varp_mcast_addr, varp_port);
1478    return err;
1479}
1480
1481/** Initialize the varp multicast address from a module parameter.
1482 *
1483 * @param s address in IPv4 notation
1484 * @return 0 on success, error code otherwise
1485 */
1486static void varp_init_mcast_addr(char *s){
1487    unsigned long v = 0;
1488
1489    dprintf("> %s\n", s);
1490    if(s && (get_inet_addr(s, &v) >= 0)){
1491        varp_mcast_addr = (u32)v;
1492    } else {
1493        varp_mcast_addr = htonl(VARP_MCAST_ADDR);
1494    }
1495}
1496
1497/** Initialize the varp cache.
1498 *
1499 * @return 0 on success, error code otherwise
1500 */
1501int varp_init(void){
1502    int err = 0;
1503   
1504    dprintf(">\n");
1505    varp_table = VarpTable_new();
1506    if(!varp_table){
1507        err = -ENOMEM;
1508        goto exit;
1509    }
1510    VarpTable_schedule(varp_table);
1511    varp_init_mcast_addr(varp_mcaddr);
1512    varp_port = htons(VARP_PORT);
1513
1514    err = varp_open(varp_mcast_addr, varp_port);
1515  exit:
1516    dprintf("< err=%d\n", err);
1517    return err;
1518}
1519
1520/** Close the varp cache.
1521 */
1522void varp_exit(void){
1523    dprintf(">\n");
1524    varp_close();
1525    if(varp_table){
1526        VarpTable *vtable = varp_table;
1527        varp_table = NULL;
1528        VarpTable_free(vtable);
1529    }
1530    dprintf("<\n");
1531}
1532
1533module_param(varp_mcaddr, charp, 0644);
1534module_param(varp_device, charp, 0644);
1535MODULE_PARM_DESC(varp_mcaddr, "VARP multicast address");
1536MODULE_PARM_DESC(varp_device, "VARP network device");
Note: See TracBrowser for help on using the repository browser.