1 | /* |
---|
2 | * Copyright (C) 2005, 2006 Mike Wray <mike.wray@hp.com> |
---|
3 | * |
---|
4 | * This program is free software; you can redistribute it and/or modify |
---|
5 | * it under the terms of the GNU General Public License as published by the |
---|
6 | * Free Software Foundation; either version 2 of the License, or (at your |
---|
7 | * option) any later version. |
---|
8 | * |
---|
9 | * This program is distributed in the hope that it will be useful, but |
---|
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
11 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
12 | * for more details. |
---|
13 | * |
---|
14 | * You should have received a copy of the GNU General Public License along |
---|
15 | * with this program; if not, write to the Free software Foundation, Inc., |
---|
16 | * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA |
---|
17 | * |
---|
18 | */ |
---|
19 | #ifdef __KERNEL__ |
---|
20 | |
---|
21 | #include <linux/config.h> |
---|
22 | #include <linux/module.h> |
---|
23 | #include <linux/types.h> |
---|
24 | #include <linux/kernel.h> |
---|
25 | #include <linux/init.h> |
---|
26 | |
---|
27 | #include <linux/version.h> |
---|
28 | #include <linux/spinlock.h> |
---|
29 | |
---|
30 | #include <linux/skbuff.h> |
---|
31 | #include <linux/net.h> |
---|
32 | #include <linux/netdevice.h> |
---|
33 | #include <linux/in.h> |
---|
34 | #include <linux/inet.h> |
---|
35 | #include <linux/netfilter_bridge.h> |
---|
36 | #include <linux/netfilter_ipv4.h> |
---|
37 | #include <linux/udp.h> |
---|
38 | |
---|
39 | #include <net/ip.h> |
---|
40 | #include <net/protocol.h> |
---|
41 | #include <net/route.h> |
---|
42 | #include <net/checksum.h> |
---|
43 | |
---|
44 | #else |
---|
45 | |
---|
46 | #include <netinet/in.h> |
---|
47 | #include <arpa/inet.h> |
---|
48 | |
---|
49 | #include "sys_kernel.h" |
---|
50 | #include "spinlock.h" |
---|
51 | #include "skbuff.h" |
---|
52 | #include <linux/ip.h> |
---|
53 | #include <linux/udp.h> |
---|
54 | |
---|
55 | #endif |
---|
56 | |
---|
57 | #include <varp.h> |
---|
58 | #include <if_varp.h> |
---|
59 | #include <varp.h> |
---|
60 | #include <skb_util.h> |
---|
61 | #include <skb_context.h> |
---|
62 | |
---|
63 | #include "allocate.h" |
---|
64 | #include "iostream.h" |
---|
65 | #include "hash_table.h" |
---|
66 | #include "vnet_forward.h" |
---|
67 | |
---|
68 | #define MODULE_NAME "VNET" |
---|
69 | #define DEBUG 1 |
---|
70 | #undef DEBUG |
---|
71 | #include "debug.h" |
---|
72 | |
---|
73 | extern int _skb_xmit(struct sk_buff *skb, uint32_t saddr); |
---|
74 | |
---|
75 | typedef struct VnetPeer { |
---|
76 | struct VarpAddr addr; |
---|
77 | uint16_t port; |
---|
78 | atomic_t refcount; |
---|
79 | int tx_packets; |
---|
80 | int rx_packets; |
---|
81 | } VnetPeer; |
---|
82 | |
---|
83 | static HashTable *vnet_peer_table = NULL; |
---|
84 | static rwlock_t vnet_peer_table_lock = RW_LOCK_UNLOCKED; |
---|
85 | |
---|
86 | #define vnet_peer_read_lock(flags) read_lock_irqsave(&vnet_peer_table_lock, (flags)) |
---|
87 | #define vnet_peer_read_unlock(flags) read_unlock_irqrestore(&vnet_peer_table_lock, (flags)) |
---|
88 | #define vnet_peer_write_lock(flags) write_lock_irqsave(&vnet_peer_table_lock, (flags)) |
---|
89 | #define vnet_peer_write_unlock(flags) write_unlock_irqrestore(&vnet_peer_table_lock, (flags)) |
---|
90 | |
---|
91 | static void VnetPeer_decref(VnetPeer *peer){ |
---|
92 | if(!peer) return; |
---|
93 | if(atomic_dec_and_test(&peer->refcount)){ |
---|
94 | kfree(peer); |
---|
95 | } |
---|
96 | } |
---|
97 | |
---|
98 | static void VnetPeer_incref(VnetPeer *peer){ |
---|
99 | if(!peer) return; |
---|
100 | atomic_inc(&peer->refcount); |
---|
101 | } |
---|
102 | |
---|
103 | static void VnetPeer_print(VnetPeer *peer, IOStream *io){ |
---|
104 | char addrbuf[VARP_ADDR_BUF]; |
---|
105 | |
---|
106 | IOStream_print(io, "(vnet_peer\n"); |
---|
107 | IOStream_print(io, " (addr %s)\n", VarpAddr_ntoa(&peer->addr, addrbuf)); |
---|
108 | IOStream_print(io, " (port %d)\n", htons(peer->port)); |
---|
109 | IOStream_print(io, " (tx_packets %d)\n", peer->tx_packets); |
---|
110 | IOStream_print(io, " (rx_packets %d)\n", peer->tx_packets); |
---|
111 | IOStream_print(io, ")\n"); |
---|
112 | } |
---|
113 | |
---|
114 | static int VnetPeer_forward(VnetPeer *peer, struct sk_buff *fwdskb){ |
---|
115 | int err = 0; |
---|
116 | const int ip_n = sizeof(struct iphdr); |
---|
117 | const int udp_n = sizeof(struct udphdr); |
---|
118 | const int vnet_n = sizeof(struct VnetMsgHdr); |
---|
119 | int head_n = 16 + ip_n + udp_n + vnet_n; |
---|
120 | int push_n = 0; |
---|
121 | struct sk_buff *skb = NULL; |
---|
122 | struct VnetMsgHdr *vhdr; |
---|
123 | uint32_t saddr = 0; |
---|
124 | uint16_t sport = varp_port; |
---|
125 | uint32_t daddr = peer->addr.u.ip4.s_addr; |
---|
126 | uint16_t dport = varp_port; |
---|
127 | |
---|
128 | if(!fwdskb) goto exit; |
---|
129 | if(daddr == fwdskb->nh.iph->saddr){ |
---|
130 | // Don't forward if the skb src addr is the peer addr. |
---|
131 | dprintf("> Forward loop on " IPFMT "\n", NIPQUAD(daddr)); |
---|
132 | goto exit; |
---|
133 | } |
---|
134 | // On entry fwdskb->data should be at fwdskb->nh.raw (adjust if not). |
---|
135 | // Also fwdskb->h.raw and fwdskb->nh.raw are set. |
---|
136 | if(fwdskb->data > fwdskb->nh.raw){ |
---|
137 | push_n = fwdskb->data - fwdskb->nh.raw; |
---|
138 | head_n += push_n; |
---|
139 | } |
---|
140 | // If has headroom, copies header (which incs ref on dst), |
---|
141 | // otherwise only clones header, which does not inc ref on dst. |
---|
142 | skb = skb_realloc_headroom(fwdskb, head_n); |
---|
143 | //skb = skb_copy_expand(fwdskb, head_n, 0, GFP_ATOMIC); |
---|
144 | if(!skb){ |
---|
145 | err = -ENOMEM; |
---|
146 | goto exit; |
---|
147 | } |
---|
148 | |
---|
149 | if(push_n){ |
---|
150 | skb_push(skb, push_n); |
---|
151 | } |
---|
152 | |
---|
153 | #ifdef DEBUG |
---|
154 | printk("\nOriginal packet:\n"); |
---|
155 | print_iphdr(__FUNCTION__, skb); |
---|
156 | skb_print_bits(__FUNCTION__, skb, 0, skb->len); |
---|
157 | #endif |
---|
158 | |
---|
159 | skb->mac.raw = NULL; |
---|
160 | vhdr = (void*)skb_push(skb, vnet_n); |
---|
161 | vhdr->id = htons(VFWD_ID); |
---|
162 | vhdr->opcode = 0; |
---|
163 | |
---|
164 | // Setup the UDP header. |
---|
165 | skb->h.raw = skb_push(skb, udp_n); |
---|
166 | skb->h.uh->source = sport; // Source port. |
---|
167 | skb->h.uh->dest = dport; // Destination port. |
---|
168 | skb->h.uh->len = htons(skb->len); // Total packet length (bytes). |
---|
169 | skb->h.uh->check = 0; |
---|
170 | |
---|
171 | // Setup the IP header. |
---|
172 | skb->nh.raw = skb_push(skb, ip_n); |
---|
173 | skb->nh.iph->version = 4; // Standard version. |
---|
174 | skb->nh.iph->ihl = ip_n / 4; // IP header length (32-bit words). |
---|
175 | skb->nh.iph->tos = 0; // No special type-of-service. |
---|
176 | skb->nh.iph->tot_len = htons(skb->len); // Total packet length (bytes). |
---|
177 | skb->nh.iph->id = 0; // No flow id. |
---|
178 | skb->nh.iph->protocol = IPPROTO_UDP; // IP protocol number. |
---|
179 | skb->nh.iph->frag_off = 0; |
---|
180 | skb->nh.iph->ttl = 64; // Linux default time-to-live. |
---|
181 | skb->nh.iph->saddr = saddr; // Source address. |
---|
182 | skb->nh.iph->daddr = daddr; // Destination address. |
---|
183 | skb->nh.iph->check = 0; |
---|
184 | |
---|
185 | #ifdef DEBUG |
---|
186 | printk("\nWrapped packet:\n"); |
---|
187 | print_iphdr(__FUNCTION__, skb); |
---|
188 | print_udphdr(__FUNCTION__, skb); |
---|
189 | skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len); |
---|
190 | #endif |
---|
191 | |
---|
192 | err = _skb_xmit(skb, saddr); |
---|
193 | peer->tx_packets++; |
---|
194 | |
---|
195 | exit: |
---|
196 | if(err < 0) kfree_skb(skb); |
---|
197 | return err; |
---|
198 | } |
---|
199 | |
---|
200 | int vnet_peer_get(VarpAddr *addr, VnetPeer **peer){ |
---|
201 | unsigned long flags; |
---|
202 | |
---|
203 | vnet_peer_read_lock(flags); |
---|
204 | *peer = HashTable_get(vnet_peer_table, addr); |
---|
205 | VnetPeer_incref(*peer); |
---|
206 | vnet_peer_read_unlock(flags); |
---|
207 | return (*peer ? 0 : -ENOENT); |
---|
208 | } |
---|
209 | |
---|
210 | int vnet_peer_add(VarpAddr *addr, uint16_t port){ |
---|
211 | int err = 0; |
---|
212 | unsigned long flags; |
---|
213 | VnetPeer *peer; |
---|
214 | |
---|
215 | vnet_peer_write_lock(flags); |
---|
216 | peer = HashTable_get(vnet_peer_table, addr); |
---|
217 | if(peer){ |
---|
218 | VnetPeer_incref(peer); |
---|
219 | goto exit; |
---|
220 | } |
---|
221 | peer = ALLOCATE(VnetPeer); |
---|
222 | if(!peer){ |
---|
223 | err = -ENOMEM; |
---|
224 | goto exit; |
---|
225 | } |
---|
226 | peer->addr = *addr; |
---|
227 | peer->port = port; |
---|
228 | VnetPeer_incref(peer); |
---|
229 | if(!HashTable_add(vnet_peer_table, &peer->addr, peer)){ |
---|
230 | VnetPeer_decref(peer); |
---|
231 | err = -ENOMEM; |
---|
232 | } |
---|
233 | exit: |
---|
234 | vnet_peer_write_unlock(flags); |
---|
235 | return err; |
---|
236 | } |
---|
237 | |
---|
238 | int vnet_peer_del(VarpAddr *addr){ |
---|
239 | int ret = 0; |
---|
240 | unsigned long flags; |
---|
241 | |
---|
242 | vnet_peer_write_lock(flags); |
---|
243 | ret = HashTable_remove(vnet_peer_table, addr); |
---|
244 | vnet_peer_write_unlock(flags); |
---|
245 | return ret; |
---|
246 | } |
---|
247 | |
---|
248 | void vnet_peer_print(IOStream *io){ |
---|
249 | HashTable_for_decl(entry); |
---|
250 | unsigned long flags; |
---|
251 | |
---|
252 | if(!vnet_peer_table) return; |
---|
253 | vnet_peer_read_lock(flags); |
---|
254 | HashTable_for_each(entry, vnet_peer_table){ |
---|
255 | VnetPeer *peer = entry->value; |
---|
256 | VnetPeer_print(peer, io); |
---|
257 | } |
---|
258 | vnet_peer_read_unlock(flags); |
---|
259 | } |
---|
260 | |
---|
261 | int vnet_forward_send(struct sk_buff *skb){ |
---|
262 | int err = 0; |
---|
263 | unsigned long flags; |
---|
264 | HashTable_for_decl(entry); |
---|
265 | int count = 0; |
---|
266 | |
---|
267 | if(!vnet_peer_table){ |
---|
268 | goto exit; |
---|
269 | } |
---|
270 | vnet_peer_read_lock(flags); |
---|
271 | HashTable_for_each(entry, vnet_peer_table){ |
---|
272 | VnetPeer *peer = entry->value; |
---|
273 | VnetPeer_forward(peer, skb); |
---|
274 | count++; |
---|
275 | } |
---|
276 | vnet_peer_read_unlock(flags); |
---|
277 | exit: |
---|
278 | return err; |
---|
279 | } |
---|
280 | |
---|
281 | int vnet_forward_recv(struct sk_buff *skb){ |
---|
282 | int err = 0; |
---|
283 | VarpAddr addr = { .family = AF_INET }; |
---|
284 | VnetPeer *peer = NULL; |
---|
285 | unsigned char eth[ETH_HLEN] = {}; |
---|
286 | struct sk_buff *recvskb; |
---|
287 | |
---|
288 | if(!vnet_peer_table){ |
---|
289 | dprintf("> no table\n"); |
---|
290 | return -ENOSYS; |
---|
291 | } |
---|
292 | // On entry mac.raw, h.raw, nh.raw are set. |
---|
293 | // skb->data points after the fwd vnet header, at the complete |
---|
294 | // forwarded packet (which has IP hdr, no eth hdr). |
---|
295 | |
---|
296 | // Save the eth hdr and source addr (peer). |
---|
297 | memcpy(eth, skb->mac.raw, ETH_HLEN); |
---|
298 | addr.u.ip4.s_addr = skb->nh.iph->saddr; |
---|
299 | err = vnet_peer_get(&addr, &peer); |
---|
300 | if(err){ |
---|
301 | wprintf("> no peer for " IPFMT "\n", NIPQUAD(skb->nh.iph->saddr)); |
---|
302 | goto exit; |
---|
303 | } |
---|
304 | peer->rx_packets++; |
---|
305 | skb->mac.raw = NULL; |
---|
306 | skb->nh.raw = skb->data; |
---|
307 | skb->h.raw = (void*)(skb->nh.iph + 1); |
---|
308 | if(!skb->nh.iph->saddr){ |
---|
309 | skb->nh.iph->saddr = addr.u.ip4.s_addr; |
---|
310 | } |
---|
311 | #ifdef __KERNEL__ |
---|
312 | // Fix IP options, checksum, skb dst, netfilter state. |
---|
313 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); |
---|
314 | skb->dev = NULL; |
---|
315 | dst_release(skb->dst); |
---|
316 | skb->dst = NULL; |
---|
317 | nf_reset(skb); |
---|
318 | #endif // __KERNEL__ |
---|
319 | |
---|
320 | skb->mac.raw = skb->nh.raw - ETH_HLEN; |
---|
321 | memcpy(skb->mac.raw, eth, ETH_HLEN); |
---|
322 | |
---|
323 | // Map destination mcast addresses to our mcast address. |
---|
324 | if(MULTICAST(skb->nh.iph->daddr)){ |
---|
325 | skb->nh.iph->daddr = varp_mcast_addr; |
---|
326 | //xmit does this: ip_eth_mc_map(varp_mcast_addr, eth_hdr(skb)->h_dest); |
---|
327 | } |
---|
328 | |
---|
329 | // Handle (a copy of) it ourselves, because |
---|
330 | // if it is looped-back by xmit it will be ignored. |
---|
331 | //recvskb = skb_clone(skb, GFP_ATOMIC); |
---|
332 | recvskb = pskb_copy(skb, GFP_ATOMIC); |
---|
333 | if(recvskb){ |
---|
334 | // Data points at the unwrapped iphdr, but varp_handle_message() |
---|
335 | // expects it to point at the udphdr, so pull. |
---|
336 | skb_pull(recvskb, sizeof(struct iphdr)); |
---|
337 | if(varp_handle_message(recvskb) <= 0){ |
---|
338 | kfree_skb(recvskb); |
---|
339 | } |
---|
340 | } |
---|
341 | err = _skb_xmit(skb, skb->nh.iph->saddr); |
---|
342 | if(err >= 0) err = 1; |
---|
343 | exit: |
---|
344 | return err; |
---|
345 | } |
---|
346 | |
---|
347 | /** Hash function for keys in the peer table. |
---|
348 | */ |
---|
349 | static Hashcode peer_key_hash_fn(void *k){ |
---|
350 | return hash_hvoid(0, k, sizeof(struct VarpAddr)); |
---|
351 | } |
---|
352 | |
---|
353 | /** Equality function for keys in the peer table. |
---|
354 | */ |
---|
355 | static int peer_key_equal_fn(void *k1, void *k2){ |
---|
356 | return memcmp(k1, k2, sizeof(struct VarpAddr)) == 0; |
---|
357 | } |
---|
358 | |
---|
359 | static void peer_entry_free_fn(HashTable *table, HTEntry *entry){ |
---|
360 | if(!entry) return; |
---|
361 | VnetPeer_decref((VnetPeer*)entry->value); |
---|
362 | HTEntry_free(entry); |
---|
363 | } |
---|
364 | |
---|
365 | int vnet_forward_init(void){ |
---|
366 | int err = 0; |
---|
367 | if(vnet_peer_table) goto exit; |
---|
368 | vnet_peer_table = HashTable_new(0); |
---|
369 | if(!vnet_peer_table){ |
---|
370 | err = -ENOMEM; |
---|
371 | goto exit; |
---|
372 | } |
---|
373 | vnet_peer_table->key_size = sizeof(struct VarpAddr); |
---|
374 | vnet_peer_table->key_equal_fn = peer_key_equal_fn; |
---|
375 | vnet_peer_table->key_hash_fn = peer_key_hash_fn; |
---|
376 | vnet_peer_table->entry_free_fn = peer_entry_free_fn; |
---|
377 | exit: |
---|
378 | return err; |
---|
379 | } |
---|
380 | |
---|
381 | void vnet_forward_exit(void){ |
---|
382 | HashTable_free(vnet_peer_table); |
---|
383 | vnet_peer_table = NULL; |
---|
384 | } |
---|