source: trunk/packages/xen-3.1/xen-3.1/tools/vnet/vnet-module/vnet_forward.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 18 years ago

Add xen and xen-common

File size: 10.7 KB
Line 
1/*
2 * Copyright (C) 2005, 2006 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19#ifdef __KERNEL__
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/init.h>
26
27#include <linux/version.h>
28#include <linux/spinlock.h>
29
30#include <linux/skbuff.h>
31#include <linux/net.h>
32#include <linux/netdevice.h>
33#include <linux/in.h>
34#include <linux/inet.h>
35#include <linux/netfilter_bridge.h>
36#include <linux/netfilter_ipv4.h>
37#include <linux/udp.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/checksum.h>
43
44#else
45
46#include <netinet/in.h>
47#include <arpa/inet.h>
48
49#include "sys_kernel.h"
50#include "spinlock.h"
51#include "skbuff.h"
52#include <linux/ip.h>
53#include <linux/udp.h>
54
55#endif
56
57#include <varp.h>
58#include <if_varp.h>
59#include <varp.h>
60#include <skb_util.h>
61#include <skb_context.h>
62
63#include "allocate.h"
64#include "iostream.h"
65#include "hash_table.h"
66#include "vnet_forward.h"
67
68#define MODULE_NAME "VNET"
69#define DEBUG 1
70#undef DEBUG
71#include "debug.h"
72
73extern int _skb_xmit(struct sk_buff *skb, uint32_t saddr);
74
75typedef struct VnetPeer {
76    struct VarpAddr addr;
77    uint16_t port;
78    atomic_t refcount;
79    int tx_packets;
80    int rx_packets;
81} VnetPeer;
82
83static HashTable *vnet_peer_table = NULL;
84static rwlock_t vnet_peer_table_lock = RW_LOCK_UNLOCKED;
85
86#define vnet_peer_read_lock(flags)    read_lock_irqsave(&vnet_peer_table_lock, (flags))
87#define vnet_peer_read_unlock(flags)  read_unlock_irqrestore(&vnet_peer_table_lock, (flags))
88#define vnet_peer_write_lock(flags)   write_lock_irqsave(&vnet_peer_table_lock, (flags))
89#define vnet_peer_write_unlock(flags) write_unlock_irqrestore(&vnet_peer_table_lock, (flags))
90
91static void VnetPeer_decref(VnetPeer *peer){
92    if(!peer) return;
93    if(atomic_dec_and_test(&peer->refcount)){
94        kfree(peer);
95    }
96}
97
98static void VnetPeer_incref(VnetPeer *peer){
99    if(!peer) return;
100    atomic_inc(&peer->refcount);
101}
102
103static void VnetPeer_print(VnetPeer *peer, IOStream *io){
104    char addrbuf[VARP_ADDR_BUF];
105   
106    IOStream_print(io, "(vnet_peer\n");
107    IOStream_print(io, "  (addr %s)\n", VarpAddr_ntoa(&peer->addr, addrbuf));
108    IOStream_print(io, "  (port %d)\n", htons(peer->port));
109    IOStream_print(io, "  (tx_packets %d)\n", peer->tx_packets);
110    IOStream_print(io, "  (rx_packets %d)\n", peer->tx_packets);
111    IOStream_print(io, ")\n");
112}
113
114static int VnetPeer_forward(VnetPeer *peer, struct sk_buff *fwdskb){
115    int err = 0;
116    const int ip_n = sizeof(struct iphdr);
117    const int udp_n = sizeof(struct udphdr);
118    const int vnet_n = sizeof(struct VnetMsgHdr);
119    int head_n = 16 + ip_n + udp_n + vnet_n;
120    int push_n = 0;
121    struct sk_buff *skb = NULL;
122    struct VnetMsgHdr *vhdr;
123    uint32_t saddr = 0;
124    uint16_t sport = varp_port;
125    uint32_t daddr = peer->addr.u.ip4.s_addr;
126    uint16_t dport = varp_port;
127
128    if(!fwdskb) goto exit;
129    if(daddr == fwdskb->nh.iph->saddr){
130        // Don't forward if the skb src addr is the peer addr.
131        dprintf("> Forward loop on " IPFMT "\n", NIPQUAD(daddr));
132        goto exit;
133    }
134    // On entry fwdskb->data should be at fwdskb->nh.raw (adjust if not).
135    // Also fwdskb->h.raw and fwdskb->nh.raw are set.
136    if(fwdskb->data > fwdskb->nh.raw){
137        push_n = fwdskb->data - fwdskb->nh.raw;
138        head_n += push_n;
139    }
140    // If has headroom, copies header (which incs ref on dst),
141    // otherwise only clones header, which does not inc ref on dst.
142    skb = skb_realloc_headroom(fwdskb, head_n);
143    //skb = skb_copy_expand(fwdskb, head_n, 0, GFP_ATOMIC);
144    if(!skb){
145        err = -ENOMEM;
146        goto exit;
147    }
148
149    if(push_n){
150        skb_push(skb, push_n);
151    }
152
153#ifdef DEBUG
154    printk("\nOriginal packet:\n");
155    print_iphdr(__FUNCTION__, skb);
156    skb_print_bits(__FUNCTION__, skb, 0, skb->len);
157#endif
158
159    skb->mac.raw = NULL;
160    vhdr = (void*)skb_push(skb, vnet_n);
161    vhdr->id       = htons(VFWD_ID);
162    vhdr->opcode   = 0;
163
164    // Setup the UDP header.
165    skb->h.raw = skb_push(skb, udp_n);
166    skb->h.uh->source = sport;                  // Source port.
167    skb->h.uh->dest   = dport;                  // Destination port.
168    skb->h.uh->len    = htons(skb->len);        // Total packet length (bytes).
169    skb->h.uh->check  = 0;
170
171    // Setup the IP header.
172    skb->nh.raw = skb_push(skb, ip_n); 
173    skb->nh.iph->version  = 4;                  // Standard version.
174    skb->nh.iph->ihl      = ip_n / 4;           // IP header length (32-bit words).
175    skb->nh.iph->tos      = 0;                  // No special type-of-service.
176    skb->nh.iph->tot_len  = htons(skb->len);    // Total packet length (bytes).
177    skb->nh.iph->id       = 0;                  // No flow id.
178    skb->nh.iph->protocol = IPPROTO_UDP;        // IP protocol number.
179    skb->nh.iph->frag_off = 0;
180    skb->nh.iph->ttl      = 64;                 // Linux default time-to-live.
181    skb->nh.iph->saddr    = saddr;              // Source address.
182    skb->nh.iph->daddr    = daddr;              // Destination address.
183    skb->nh.iph->check    = 0;
184
185#ifdef DEBUG
186    printk("\nWrapped packet:\n");
187    print_iphdr(__FUNCTION__, skb);
188    print_udphdr(__FUNCTION__, skb);
189    skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len);
190#endif
191
192    err = _skb_xmit(skb, saddr);
193    peer->tx_packets++;
194
195  exit:
196    if(err < 0) kfree_skb(skb);
197    return err;
198}
199
200int vnet_peer_get(VarpAddr *addr, VnetPeer **peer){
201    unsigned long flags;
202
203    vnet_peer_read_lock(flags);
204    *peer = HashTable_get(vnet_peer_table, addr);
205    VnetPeer_incref(*peer);
206    vnet_peer_read_unlock(flags);
207    return (*peer ? 0 : -ENOENT);
208}
209
210int vnet_peer_add(VarpAddr *addr, uint16_t port){
211    int err = 0;
212    unsigned long flags;
213    VnetPeer *peer;
214   
215    vnet_peer_write_lock(flags);
216    peer = HashTable_get(vnet_peer_table, addr);
217    if(peer){
218        VnetPeer_incref(peer);
219        goto exit;
220    }
221    peer = ALLOCATE(VnetPeer);
222    if(!peer){
223        err = -ENOMEM;
224        goto exit;
225    }
226    peer->addr = *addr;
227    peer->port = port;
228    VnetPeer_incref(peer);
229    if(!HashTable_add(vnet_peer_table, &peer->addr, peer)){
230        VnetPeer_decref(peer);
231        err = -ENOMEM;
232    }
233  exit:
234    vnet_peer_write_unlock(flags);
235    return err;
236}
237
238int vnet_peer_del(VarpAddr *addr){
239    int ret = 0;
240    unsigned long flags;
241
242    vnet_peer_write_lock(flags);
243    ret = HashTable_remove(vnet_peer_table, addr);
244    vnet_peer_write_unlock(flags);
245    return ret;
246}
247
248void vnet_peer_print(IOStream *io){
249    HashTable_for_decl(entry);
250    unsigned long flags;
251
252    if(!vnet_peer_table) return;
253    vnet_peer_read_lock(flags);
254    HashTable_for_each(entry, vnet_peer_table){
255        VnetPeer *peer = entry->value;
256        VnetPeer_print(peer, io);
257    }
258    vnet_peer_read_unlock(flags);
259}
260
261int vnet_forward_send(struct sk_buff *skb){
262    int err = 0;
263    unsigned long flags;
264    HashTable_for_decl(entry);
265    int count = 0;
266
267    if(!vnet_peer_table){
268        goto exit;
269    }
270    vnet_peer_read_lock(flags);
271    HashTable_for_each(entry, vnet_peer_table){
272        VnetPeer *peer = entry->value;
273        VnetPeer_forward(peer, skb);
274        count++;
275    }
276    vnet_peer_read_unlock(flags);
277  exit:
278    return err;
279}
280
281int vnet_forward_recv(struct sk_buff *skb){
282    int err = 0;
283    VarpAddr addr = { .family = AF_INET };
284    VnetPeer *peer = NULL;
285    unsigned char eth[ETH_HLEN] = {};
286    struct sk_buff *recvskb;
287
288    if(!vnet_peer_table){
289        dprintf("> no table\n");
290        return -ENOSYS;
291    }
292    // On entry mac.raw, h.raw, nh.raw are set.
293    // skb->data points after the fwd vnet header, at the complete
294    // forwarded packet (which has IP hdr, no eth hdr).
295
296    // Save the eth hdr and source addr (peer).
297    memcpy(eth, skb->mac.raw, ETH_HLEN);
298    addr.u.ip4.s_addr = skb->nh.iph->saddr;
299    err = vnet_peer_get(&addr, &peer);
300    if(err){
301        wprintf("> no peer for " IPFMT "\n", NIPQUAD(skb->nh.iph->saddr));
302        goto exit;
303    }
304    peer->rx_packets++;
305    skb->mac.raw = NULL;
306    skb->nh.raw = skb->data;
307    skb->h.raw = (void*)(skb->nh.iph + 1);
308    if(!skb->nh.iph->saddr){
309        skb->nh.iph->saddr = addr.u.ip4.s_addr;
310    }
311#ifdef __KERNEL__
312    // Fix IP options, checksum, skb dst, netfilter state.
313    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
314    skb->dev = NULL;
315    dst_release(skb->dst);
316    skb->dst = NULL;
317    nf_reset(skb);
318#endif // __KERNEL__
319
320    skb->mac.raw = skb->nh.raw - ETH_HLEN;
321    memcpy(skb->mac.raw, eth, ETH_HLEN);
322
323    // Map destination mcast addresses to our mcast address.
324    if(MULTICAST(skb->nh.iph->daddr)){
325        skb->nh.iph->daddr = varp_mcast_addr;
326        //xmit does this: ip_eth_mc_map(varp_mcast_addr, eth_hdr(skb)->h_dest);
327    }
328
329    // Handle (a copy of) it ourselves, because
330    // if it is looped-back by xmit it will be ignored.
331    //recvskb = skb_clone(skb, GFP_ATOMIC);
332    recvskb = pskb_copy(skb, GFP_ATOMIC);
333    if(recvskb){
334        // Data points at the unwrapped iphdr, but varp_handle_message()
335        // expects it to point at the udphdr, so pull.
336        skb_pull(recvskb, sizeof(struct iphdr));
337        if(varp_handle_message(recvskb) <= 0){
338            kfree_skb(recvskb);
339        }
340    }
341    err = _skb_xmit(skb, skb->nh.iph->saddr);
342    if(err >= 0) err = 1;
343  exit:
344    return err;
345}
346
347/** Hash function for keys in the peer table.
348 */
349static Hashcode peer_key_hash_fn(void *k){
350    return hash_hvoid(0, k, sizeof(struct VarpAddr));
351}
352
353/** Equality function for keys in the peer table.
354 */
355static int peer_key_equal_fn(void *k1, void *k2){
356    return memcmp(k1, k2, sizeof(struct VarpAddr)) == 0;
357}
358
359static void peer_entry_free_fn(HashTable *table, HTEntry *entry){
360    if(!entry) return;
361    VnetPeer_decref((VnetPeer*)entry->value);
362    HTEntry_free(entry);
363}
364
365int vnet_forward_init(void){
366    int err = 0;
367    if(vnet_peer_table) goto exit;
368    vnet_peer_table = HashTable_new(0);
369    if(!vnet_peer_table){
370        err = -ENOMEM;
371        goto exit;
372    }
373    vnet_peer_table->key_size = sizeof(struct VarpAddr);
374    vnet_peer_table->key_equal_fn = peer_key_equal_fn;
375    vnet_peer_table->key_hash_fn = peer_key_hash_fn;
376    vnet_peer_table->entry_free_fn = peer_entry_free_fn;
377  exit:
378    return err;
379}
380
381void vnet_forward_exit(void){
382    HashTable_free(vnet_peer_table);
383    vnet_peer_table = NULL;
384}
Note: See TracBrowser for help on using the repository browser.