source: trunk/packages/xen-3.1/xen-3.1/tools/vnet/vnet-module/etherip.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 18 years ago

Add xen and xen-common

File size: 13.6 KB
Line 
1/*
2 * Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; either version 2 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free software Foundation, Inc.,
16 * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19#ifdef __KERNEL__
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/init.h>
26
27#include <linux/version.h>
28
29#include <linux/skbuff.h>
30#include <linux/net.h>
31#include <linux/netdevice.h>
32#include <linux/in.h>
33#include <linux/inet.h>
34#include <linux/netfilter_bridge.h>
35#include <linux/netfilter_ipv4.h>
36#include <linux/icmp.h>
37#include <linux/udp.h>
38
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/checksum.h>
43
44#else
45
46#include <netinet/in.h>
47#include <arpa/inet.h>
48
49#include "sys_kernel.h"
50#include "spinlock.h"
51#include "skbuff.h"
52#include <linux/ip.h>
53#include <linux/udp.h>
54
55#define IP_DF           0x4000          /* Flag: "Don't Fragment"       */
56
57#endif
58
59#include <etherip.h>
60#include <tunnel.h>
61#include <vnet.h>
62#include <varp.h>
63#include <if_varp.h>
64#include <varp.h>
65#include <skb_util.h>
66#include <skb_context.h>
67
68#define MODULE_NAME "VNET"
69#define DEBUG 1
70#undef DEBUG
71#include "debug.h"
72
73/** @file Etherip implementation.
74 * The etherip protocol is used to transport Ethernet frames in IP packets.
75 */
76
77/** Flag controlling whether to use etherip-in-udp encapsulation.
78 * If false we send etherip protocol in IP packets.
79 * If true we send etherip protocol in UDP packets with a vnet header.
80 */
81int etherip_in_udp = 1;
82
83/** Get the vnet label from an etherip header.
84 *
85 * @param hdr header
86 * @@param vnet (in net order)
87 */
88void etheriphdr_get_vnet(struct etheriphdr *hdr, VnetId *vnet){
89#ifdef CONFIG_ETHERIP_EXT
90    *vnet = *(VnetId*)hdr->vnet;
91#else
92    *vnet = (VnetId){};
93    vnet->u.vnet16[VNET_SIZE16 - 1] = (unsigned short)hdr->reserved;
94   
95#endif
96}
97
98/** Set the vnet label in an etherip header.
99 * Also sets the etherip version.
100 *
101 * @param hdr header
102 * @param vnet vnet label (in net order)
103 */
104void etheriphdr_set_vnet(struct etheriphdr *hdr, VnetId *vnet){
105#ifdef CONFIG_ETHERIP_EXT
106    hdr->version = ETHERIP_VERSION;
107    *(VnetId*)hdr->vnet = *vnet;
108#else
109    hdr->version = ETHERIP_VERSION;
110    hdr->reserved = (vnet->u.vnet16[VNET_SIZE16 - 1] & 0x0fff);
111#endif
112}
113
114/** Open an etherip tunnel.
115 *
116 * @param tunnel to open
117 * @return 0 on success, error code otherwise
118 */
119static int etherip_tunnel_open(Tunnel *tunnel){
120    return 0;
121}
122
123/** Close an etherip tunnel.
124 *
125 * @param tunnel to close
126 */
127static void etherip_tunnel_close(Tunnel *tunnel){
128}
129
130
131static inline int skb_make_headroom(struct sk_buff **pskb, struct sk_buff *skb, int head_n){
132    int err = 0;
133    dprintf("> skb=%p headroom=%d head_n=%d\n", skb, skb_headroom(skb), head_n);
134    if(head_n > skb_headroom(skb) || skb_cloned(skb) || skb_shared(skb)){
135        // Expand header the way GRE does.
136        struct sk_buff *new_skb = skb_realloc_headroom(skb, head_n + 16);
137        if(!new_skb){
138            err = -ENOMEM;
139            goto exit;
140        }
141        kfree_skb(skb);
142        *pskb = new_skb;
143    } else {
144        *pskb = skb;
145    }
146  exit:
147    return err;
148}
149   
150/** Send a packet via an etherip tunnel.
151 * Adds etherip header and new ip header around ethernet frame.
152 *
153 * @param tunnel tunnel
154 * @param skb packet
155 * @return 0 on success, error code otherwise
156 */
157static int etherip_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
158    int err = 0;
159    const int ip_n = sizeof(struct iphdr);
160    const int etherip_n = sizeof(struct etheriphdr);
161    const int udp_n = sizeof(struct udphdr);
162    const int vnet_n = sizeof(struct VnetMsgHdr);
163    int head_n = etherip_n + ip_n /* +  ETH_HLEN */;
164    VnetId *vnet = &tunnel->key.vnet;
165    struct etheriphdr *etheriph;
166    u32 saddr = 0;
167
168    if(etherip_in_udp){
169        head_n += vnet_n + udp_n;
170    }
171    err = skb_make_headroom(&skb, skb, head_n);
172    if(err) goto exit;
173
174    // Null the pointer as we are pushing a new IP header.
175    skb->mac.raw = NULL;
176
177    // Setup the etherip header.
178    etheriph = (void*)skb_push(skb, etherip_n);
179    etheriphdr_set_vnet(etheriph, vnet);
180
181    if(etherip_in_udp){
182        // Vnet header.
183        struct VnetMsgHdr *vhdr = (void*)skb_push(skb, vnet_n);
184        vhdr->id     = htons(VUDP_ID);
185        vhdr->opcode = 0;
186
187        // Setup the UDP header.
188        skb->h.raw = skb_push(skb, udp_n);
189        skb->h.uh->source = varp_port;          // Source port.
190        skb->h.uh->dest   = varp_port;          // Destination port.
191        skb->h.uh->len    = htons(skb->len);    // Total packet length (bytes).
192        skb->h.uh->check  = 0;
193    }
194
195    // Setup the IP header.
196    skb->nh.raw = skb_push(skb, ip_n); 
197    skb->nh.iph->version  = 4;                  // Standard version.
198    skb->nh.iph->ihl      = ip_n / 4;           // IP header length (32-bit words).
199    skb->nh.iph->tos      = 0;                  // No special type-of-service.
200    skb->nh.iph->tot_len  = htons(skb->len);    // Total packet length (bytes).
201    skb->nh.iph->id       = 0;                  // No flow id (since no frags).
202    if(etherip_in_udp){
203        skb->nh.iph->protocol = IPPROTO_UDP;    // IP protocol number.
204        skb->nh.iph->frag_off = 0;
205    } else {
206        skb->nh.iph->protocol = IPPROTO_ETHERIP;// IP protocol number.
207        skb->nh.iph->frag_off = htons(IP_DF);   // Don't fragment - can't handle frags.
208    }
209    skb->nh.iph->ttl      = 64;                 // Linux default time-to-live.
210    skb->nh.iph->saddr    = saddr;              // Source address.
211    skb->nh.iph->daddr    = tunnel->key.addr.u.ip4.s_addr; // Destination address.
212    skb->nh.iph->check    = 0;                  // Zero the checksum.
213
214    // Ethernet header will be filled-in by device.
215    err = Tunnel_send(tunnel->base, skb);
216    skb = NULL;
217  exit:
218    if(err && skb){
219        wprintf("< err=%d\n", err);
220        kfree_skb(skb);
221    }
222    return err;
223}
224
225/** Tunnel type for etherip.
226 */
227static TunnelType _etherip_tunnel_type = {
228    .name       = "ETHERIP",
229    .open       = etherip_tunnel_open,
230    .close      = etherip_tunnel_close,
231    .send       = etherip_tunnel_send
232};
233
234TunnelType *etherip_tunnel_type = &_etherip_tunnel_type;
235
236int etherip_tunnel_create(VnetId *vnet, VarpAddr *addr, Tunnel *base, Tunnel **tunnel){
237    return Tunnel_create(etherip_tunnel_type, vnet, addr, base, tunnel);
238}
239
240#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER)
241/** We need our own copy of this as it is no longer exported from the bridge module.
242 */
243static inline void _nf_bridge_save_header(struct sk_buff *skb){
244    int header_size = 16;
245   
246    // Were using this modified to use h_proto instead of skb->protocol.
247    if(skb->protocol == htons(ETH_P_8021Q)){
248        header_size = 18;
249    }
250    memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
251}
252#endif
253
254/** Do etherip receive processing.
255 * Strips the etherip header to extract the ethernet frame, sets
256 * the vnet from the header and re-receives the frame.
257 *
258 * Return code 1 means we now own the packet - the caller must not free it.
259 * Return code < 0 means an error - caller still owns the packet.
260 *
261 * @param skb packet
262 * @return 1 on success, error code otherwise
263 */
264int etherip_protocol_recv(struct sk_buff *skb){
265    int err = 0;
266    const int etherip_n = sizeof(struct etheriphdr);
267    struct etheriphdr *etheriph;
268    Vnet *vinfo = NULL;
269    VnetId vnet = {};
270    u32 saddr, daddr;
271    char vnetbuf[VNET_ID_BUF];
272    struct ethhdr *eth;
273
274    dprintf(">\n");
275    saddr = skb->nh.iph->saddr;
276    daddr = skb->nh.iph->daddr;
277    if(MULTICAST(daddr) && (daddr != varp_mcast_addr)){
278        // Ignore multicast packets not addressed to us.
279        wprintf("> Ignoring mcast skb: src=%u.%u.%u.%u dst=%u.%u.%u.%u"
280                " varp_mcast_addr=%u.%u.%u.%u\n",
281                NIPQUAD(saddr), NIPQUAD(daddr), NIPQUAD(varp_mcast_addr));
282        goto exit;
283    }
284    if(skb->data == skb->mac.raw){
285        // skb->data points at ethernet header.
286        //FIXME: Does this ever happen?
287        //dprintf("> len=%d\n", skb->len);
288        int ip_n = (skb->nh.iph->ihl << 2);
289        int pull_n = ETH_HLEN + ip_n;
290        if (!pskb_may_pull(skb, pull_n)){
291            wprintf("> Malformed skb (eth+ip) src=%u.%u.%u.%u\n",
292                    NIPQUAD(saddr));
293            err = -EINVAL;
294            goto exit;
295        }
296        skb_pull(skb, pull_n);
297    }
298    // Assume skb->data points at etherip header.
299    etheriph = (void*)skb->data;
300    if(etheriph->version != ETHERIP_VERSION){
301        wprintf("> Bad etherip version=%d src=%u.%u.%u.%u\n",
302                etheriph->version, NIPQUAD(saddr));
303        err = -EINVAL;
304        goto exit;
305    }
306    if(!pskb_may_pull(skb, etherip_n)){
307        wprintf("> Malformed skb (etherip) src=%u.%u.%u.%u\n",
308                NIPQUAD(saddr));
309        err = -EINVAL;
310        goto exit;
311    }
312    etheriphdr_get_vnet(etheriph, &vnet);
313    // If vnet is secure, context must include IPSEC ESP.
314    err = vnet_check_context(&vnet, SKB_CONTEXT(skb), &vinfo);
315    if(err){
316        wprintf("> Failed security check vnet=%s src=%u.%u.%u.%u\n",
317                VnetId_ntoa(&vnet, vnetbuf), NIPQUAD(saddr));
318        goto exit;
319    }
320    // Point at the headers in the contained ethernet frame.
321    skb->mac.raw = skb_pull(skb, etherip_n);
322    eth = eth_hdr(skb);
323
324    // Simulate the logic from eth_type_trans()
325    // to set skb->pkt_type and skb->protocol.
326    if(mac_is_multicast(eth->h_dest)){
327        if(mac_is_broadcast(eth->h_dest)){
328            skb->pkt_type = PACKET_BROADCAST;
329        } else {
330            skb->pkt_type = PACKET_MULTICAST;
331        }
332    } else {
333        skb->pkt_type = PACKET_HOST;
334    }
335    if(ntohs(eth->h_proto) >= 1536){
336        skb->protocol = eth->h_proto;
337    } else {
338        skb->protocol = htons(ETH_P_802_2);
339    }
340   
341    // Assuming a standard Ethernet frame.
342    // Should check for protocol? Support ETH_P_8021Q too.
343    skb->nh.raw = skb_pull(skb, ETH_HLEN);
344
345#ifdef __KERNEL__
346    // Fix IP options, checksum, skb dst, netfilter state.
347    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
348    if (skb->ip_summed == CHECKSUM_HW){
349        skb->ip_summed = CHECKSUM_NONE;
350    }
351    dst_release(skb->dst);
352    skb->dst = NULL;
353    nf_reset(skb);
354#ifdef CONFIG_BRIDGE_NETFILTER
355    if(skb->nf_bridge){
356        // Stop the eth header being clobbered by nf_bridge_maybe_copy_header().
357        _nf_bridge_save_header(skb);
358    }
359#endif
360#endif // __KERNEL__
361
362    dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
363            NIPQUAD(skb->nh.iph->saddr),
364            VnetId_ntoa(&vnet, vnetbuf),
365            MAC6TUPLE(eth->h_source),
366            MAC6TUPLE(eth->h_dest));
367    //print_skb(__FUNCTION__, 0, skb);
368
369    {
370        // Know source ip, vnet, vmac, so update the varp cache.
371        // For this to work forwarded vnet packets must have the
372        // original source address.
373        VarpAddr addr = { .family = AF_INET };
374        addr.u.ip4.s_addr = saddr;
375        varp_update(&vnet, eth->h_source, &addr);
376    }
377
378    err = vnet_skb_recv(skb, vinfo);
379  exit:
380    if(vinfo) Vnet_decref(vinfo);
381    dprintf("< skb=%p err=%d\n", skb, err);
382    return err;
383}
384
385
386#ifdef __KERNEL__
387
388/** Handle an ICMP error related to etherip.
389 *
390 * @param skb ICMP error packet
391 * @param info
392 */
393static void etherip_protocol_icmp_err(struct sk_buff *skb, u32 info){
394    struct iphdr *iph = (struct iphdr*)skb->data;
395   
396    wprintf("> ICMP error type=%d code=%d addr=" IPFMT "\n",
397            skb->h.icmph->type, skb->h.icmph->code, NIPQUAD(iph->daddr));
398
399    if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
400        skb->h.icmph->code != ICMP_FRAG_NEEDED){
401        return;
402    }
403    wprintf("> MTU too big addr= " IPFMT "\n", NIPQUAD(iph->daddr)); 
404}
405
406//============================================================================
407#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
408// Code for 2.6 kernel.
409
410/** Etherip protocol. */
411static struct net_protocol etherip_protocol = {
412    .handler     = etherip_protocol_recv,
413    .err_handler = etherip_protocol_icmp_err,
414};
415
416static int etherip_protocol_add(void){
417    return inet_add_protocol(&etherip_protocol, IPPROTO_ETHERIP);
418}
419
420static int etherip_protocol_del(void){
421    return inet_del_protocol(&etherip_protocol, IPPROTO_ETHERIP);
422}
423
424//============================================================================
425#else
426//============================================================================
427// Code for 2.4 kernel.
428
429/** Etherip protocol. */
430static struct inet_protocol etherip_protocol = {
431    .name        = "ETHERIP",
432    .protocol    = IPPROTO_ETHERIP,
433    .handler     = etherip_protocol_recv,
434    .err_handler = etherip_protocol_icmp_err,
435};
436
437static int etherip_protocol_add(void){
438    inet_add_protocol(&etherip_protocol);
439    return 0;
440}
441
442static int etherip_protocol_del(void){
443    return inet_del_protocol(&etherip_protocol);
444}
445
446#endif
447//============================================================================
448
449
450/** Initialize the etherip module.
451 * Registers the etherip protocol.
452 *
453 * @return 0 on success, error code otherwise
454 */
455int __init etherip_module_init(void) {
456    int err = 0;
457    etherip_protocol_add();
458    return err;
459}
460
461/** Finalize the etherip module.
462 * Deregisters the etherip protocol.
463 */
464void __exit etherip_module_exit(void) {
465    if(etherip_protocol_del() < 0){
466        printk(KERN_INFO "%s: can't remove etherip protocol\n", __FUNCTION__);
467    }
468}
469
470#endif // __KERNEL__
Note: See TracBrowser for help on using the repository browser.