source: trunk/packages/xen-3.1/xen-3.1/tools/ioemu/patches/xen-mapcache @ 34

Last change on this file since 34 was 34, checked in by hartmans, 17 years ago

Add xen and xen-common

File size: 13.7 KB
Line 
1# HG changeset patch
2# User kfraser@localhost.localdomain
3# Node ID 67a06a9b7b1dca707e1cd3b08ae0a341d6e97b3d
4# Parent  3f0ca90351e268084fbdb733d70fc596cb46537d
5[HVM] qemu: Add guest address-space mapping cache.
6
7On IA32 host or IA32 PAE host, at present, generally, we can't create
8an HVM guest with more than 2G memory, because generally it's almost
9impossible for Qemu to find a large enough and consecutive virtual
10address space to map an HVM guest's whole physical address space.
11The attached patch fixes this issue using dynamic mapping based on
12little blocks of memory.
13
14Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
15Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
16Signed-off-by: Keir Fraser <keir@xensource.com>
17
18Index: ioemu/vl.c
19===================================================================
20--- ioemu.orig/vl.c     2007-05-03 15:12:21.000000000 +0100
21+++ ioemu/vl.c  2007-05-03 15:12:41.000000000 +0100
22@@ -286,7 +286,7 @@
23     for(i = start; i < start + length; i += size) {
24         ioport_write_table[bsize][i] = func;
25         if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
26-            hw_error("register_ioport_read: invalid opaque");
27+            hw_error("register_ioport_write: invalid opaque");
28         ioport_opaque[i] = opaque;
29     }
30     return 0;
31@@ -5894,6 +5894,157 @@
32     suspend_requested = 1;
33 }
34 
35+#if defined(MAPCACHE)
36+
37+#if defined(__i386__)
38+#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
39+#define MCACHE_BUCKET_SHIFT 16
40+#elif defined(__x86_64__)
41+#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
42+#define MCACHE_BUCKET_SHIFT 20
43+#endif
44+
45+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
46+
47+#define BITS_PER_LONG (sizeof(long)*8)
48+#define BITS_TO_LONGS(bits) \
49+    (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
50+#define DECLARE_BITMAP(name,bits) \
51+    unsigned long name[BITS_TO_LONGS(bits)]
52+#define test_bit(bit,map) \
53+    (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG))))
54+
55+struct map_cache {
56+    unsigned long paddr_index;
57+    uint8_t      *vaddr_base;
58+    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>PAGE_SHIFT);
59+};
60+
61+static struct map_cache *mapcache_entry;
62+static unsigned long nr_buckets;
63+
64+/* For most cases (>99.9%), the page address is the same. */
65+static unsigned long last_address_index = ~0UL;
66+static uint8_t      *last_address_vaddr;
67+
68+static int qemu_map_cache_init(void)
69+{
70+    unsigned long size;
71+
72+    nr_buckets = (((MAX_MCACHE_SIZE >> PAGE_SHIFT) +
73+                   (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1) >>
74+                  (MCACHE_BUCKET_SHIFT - PAGE_SHIFT));
75+    fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
76+
77+    /*
78+     * Use mmap() directly: lets us allocate a big hash table with no up-front
79+     * cost in storage space. The OS will allocate memory only for the buckets
80+     * that we actually use. All others will contain all zeroes.
81+     */
82+    size = nr_buckets * sizeof(struct map_cache);
83+    size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
84+    mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE,
85+                          MAP_SHARED|MAP_ANONYMOUS, 0, 0);
86+    if (mapcache_entry == MAP_FAILED) {
87+        errno = ENOMEM;
88+        return -1;
89+    }
90+
91+    return 0;
92+}
93+
94+static void qemu_remap_bucket(struct map_cache *entry,
95+                              unsigned long address_index)
96+{
97+    uint8_t *vaddr_base;
98+    unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
99+    unsigned int i, j;
100+
101+    if (entry->vaddr_base != NULL) {
102+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
103+        if (errno) {
104+            fprintf(logfile, "unmap fails %d\n", errno);
105+            exit(-1);
106+        }
107+    }
108+
109+    for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++)
110+        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i;
111+
112+    vaddr_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE,
113+                                      pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT);
114+    if (vaddr_base == NULL) {
115+        fprintf(logfile, "xc_map_foreign_batch error %d\n", errno);
116+        exit(-1);
117+    }
118+
119+    entry->vaddr_base  = vaddr_base;
120+    entry->paddr_index = address_index;
121+
122+    for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i += BITS_PER_LONG) {
123+        unsigned long word = 0;
124+        j = ((i + BITS_PER_LONG) > (MCACHE_BUCKET_SIZE >> PAGE_SHIFT)) ?
125+            (MCACHE_BUCKET_SIZE >> PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG;
126+        while (j > 0)
127+            word = (word << 1) | !(pfns[i + --j] & 0xF0000000UL);
128+        entry->valid_mapping[i / BITS_PER_LONG] = word;
129+    }
130+}
131+
132+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)
133+{
134+    struct map_cache *entry;
135+    unsigned long address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
136+    unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
137+
138+    if (address_index == last_address_index)
139+        return last_address_vaddr + address_offset;
140+
141+    entry = &mapcache_entry[address_index % nr_buckets];
142+
143+    if (entry->vaddr_base == NULL || entry->paddr_index != address_index ||
144+        !test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping))
145+        qemu_remap_bucket(entry, address_index);
146+
147+    if (!test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping))
148+        return NULL;
149+
150+    last_address_index = address_index;
151+    last_address_vaddr = entry->vaddr_base;
152+
153+    return last_address_vaddr + address_offset;
154+}
155+
156+void qemu_invalidate_map_cache(void)
157+{
158+    unsigned long i;
159+
160+    mapcache_lock();
161+
162+    for (i = 0; i < nr_buckets; i++) {
163+        struct map_cache *entry = &mapcache_entry[i];
164+
165+        if (entry->vaddr_base == NULL)
166+            continue;
167+
168+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
169+        if (errno) {
170+            fprintf(logfile, "unmap fails %d\n", errno);
171+            exit(-1);
172+        }
173+
174+        entry->paddr_index = 0;
175+        entry->vaddr_base  = NULL;
176+    }
177+
178+    last_address_index =  ~0UL;
179+    last_address_vaddr = NULL;
180+
181+    mapcache_unlock();
182+}
183+
184+#endif /* defined(MAPCACHE) */
185+
186 int main(int argc, char **argv)
187 {
188 #ifdef CONFIG_GDBSTUB
189@@ -5930,8 +6081,11 @@
190     unsigned long ioreq_pfn;
191     extern void *shared_page;
192     extern void *buffered_io_page;
193-    extern void *buffered_pio_page;
194+#ifdef __ia64__
195     unsigned long nr_pages;
196+    xen_pfn_t *page_array;
197+    extern void *buffered_pio_page;
198+#endif
199 
200     char qemu_dm_logfilename[64];
201 
202@@ -6221,6 +6375,7 @@
203                 break;
204             case QEMU_OPTION_m:
205                 ram_size = atol(optarg) * 1024 * 1024;
206+                ram_size = (uint64_t)atol(optarg) * 1024 * 1024;
207                 if (ram_size <= 0)
208                     help();
209 #ifndef CONFIG_DM
210@@ -6482,30 +6637,15 @@
211 
212 #if defined(__i386__) || defined(__x86_64__)
213 
214-    nr_pages = ram_size/PAGE_SIZE;
215-
216-    page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t));
217-    if (page_array == NULL) {
218-        fprintf(logfile, "malloc returned error %d\n", errno);
219-        exit(-1);
220-    }
221-
222-    for ( i = 0; i < nr_pages; i++)
223-        page_array[i] = i;
224-
225-    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
226-                                         PROT_READ|PROT_WRITE, page_array,
227-                                         nr_pages);
228-    if (phys_ram_base == NULL) {
229-        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
230+    if (qemu_map_cache_init()) {
231+        fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno);
232         exit(-1);
233     }
234 
235     xc_get_hvm_param(xc_handle, domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
236     fprintf(logfile, "shared page at pfn %lx\n", ioreq_pfn);
237     shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
238-                                       PROT_READ|PROT_WRITE,
239-                                       page_array[ioreq_pfn]);
240+                                       PROT_READ|PROT_WRITE, ioreq_pfn);
241     if (shared_page == NULL) {
242         fprintf(logfile, "map shared IO page returned error %d\n", errno);
243         exit(-1);
244@@ -6514,15 +6654,12 @@
245     xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
246     fprintf(logfile, "buffered io page at pfn %lx\n", ioreq_pfn);
247     buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
248-                                            PROT_READ|PROT_WRITE,
249-                                            page_array[ioreq_pfn]);
250+                                            PROT_READ|PROT_WRITE, ioreq_pfn);
251     if (buffered_io_page == NULL) {
252         fprintf(logfile, "map buffered IO page returned error %d\n", errno);
253         exit(-1);
254     }
255 
256-    free(page_array);
257-
258 #elif defined(__ia64__)
259 
260     nr_pages = ram_size/PAGE_SIZE;
261Index: ioemu/target-i386-dm/exec-dm.c
262===================================================================
263--- ioemu.orig/target-i386-dm/exec-dm.c 2007-05-03 15:10:22.000000000 +0100
264+++ ioemu/target-i386-dm/exec-dm.c      2007-05-03 15:12:34.000000000 +0100
265@@ -36,6 +36,7 @@
266 
267 #include "cpu.h"
268 #include "exec-all.h"
269+#include "vl.h"
270 
271 //#define DEBUG_TB_INVALIDATE
272 //#define DEBUG_FLUSH
273@@ -127,10 +128,17 @@
274 FILE *logfile;
275 int loglevel;
276 
277+#ifdef MAPCACHE
278+pthread_mutex_t mapcache_mutex;
279+#endif
280+
281 void cpu_exec_init(CPUState *env)
282 {
283     CPUState **penv;
284     int cpu_index;
285+#ifdef MAPCACHE
286+    pthread_mutexattr_t mxattr;
287+#endif
288 
289     env->next_cpu = NULL;
290     penv = &first_cpu;
291@@ -144,6 +152,14 @@
292 
293     /* alloc dirty bits array */
294     phys_ram_dirty = qemu_malloc(phys_ram_size >> TARGET_PAGE_BITS);
295+
296+#ifdef MAPCACHE
297+    /* setup memory access mutex to protect mapcache */
298+    pthread_mutexattr_init(&mxattr);
299+    pthread_mutexattr_settype(&mxattr, PTHREAD_MUTEX_RECURSIVE);
300+    pthread_mutex_init(&mapcache_mutex, &mxattr);
301+    pthread_mutexattr_destroy(&mxattr);
302+#endif
303 }
304 
305 /* enable or disable low levels log */
306@@ -409,16 +425,11 @@
307         return 0;
308 }
309 
310-static inline int paddr_is_ram(target_phys_addr_t addr)
311-{
312-    /* Is this guest physical address RAM-backed? */
313-#if defined(CONFIG_DM) && (defined(__i386__) || defined(__x86_64__))
314-    return ((addr < HVM_BELOW_4G_MMIO_START) ||
315-            (addr >= HVM_BELOW_4G_MMIO_START + HVM_BELOW_4G_MMIO_LENGTH));
316-#else
317-    return (addr < ram_size);
318+#if defined(__i386__) || defined(__x86_64__)
319+#define phys_ram_addr(x) (qemu_map_cache(x))
320+#elif defined(__ia64__)
321+#define phys_ram_addr(x) ((addr < ram_size) ? (phys_ram_base + (x)) : NULL)
322 #endif
323-}
324 
325 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
326                             int len, int is_write)
327@@ -426,13 +437,15 @@
328     int l, io_index;
329     uint8_t *ptr;
330     uint32_t val;
331-   
332+
333+    mapcache_lock();
334+
335     while (len > 0) {
336         /* How much can we copy before the next page boundary? */
337         l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK);
338         if (l > len)
339             l = len;
340-       
341+
342         io_index = iomem_index(addr);
343         if (is_write) {
344             if (io_index) {
345@@ -452,11 +465,11 @@
346                     io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
347                     l = 1;
348                 }
349-            } else if (paddr_is_ram(addr)) {
350+            } else if ((ptr = phys_ram_addr(addr)) != NULL) {
351                 /* Reading from RAM */
352-                memcpy(phys_ram_base + addr, buf, l);
353+                memcpy(ptr, buf, l);
354 #ifdef __ia64__
355-                sync_icache((unsigned long)(phys_ram_base + addr), l);
356+                sync_icache(ptr, l);
357 #endif
358             }
359         } else {
360@@ -477,9 +490,9 @@
361                     stb_raw(buf, val);
362                     l = 1;
363                 }
364-            } else if (paddr_is_ram(addr)) {
365+            } else if ((ptr = phys_ram_addr(addr)) != NULL) {
366                 /* Reading from RAM */
367-                memcpy(buf, phys_ram_base + addr, l);
368+                memcpy(buf, ptr, l);
369             } else {
370                 /* Neither RAM nor known MMIO space */
371                 memset(buf, 0xff, len);
372@@ -489,6 +502,8 @@
373         buf += l;
374         addr += l;
375     }
376+
377+    mapcache_unlock();
378 }
379 #endif
380 
381Index: ioemu/vl.h
382===================================================================
383--- ioemu.orig/vl.h     2007-05-03 15:12:20.000000000 +0100
384+++ ioemu/vl.h  2007-05-03 15:12:34.000000000 +0100
385@@ -156,6 +156,28 @@
386 
387 extern FILE *logfile;
388 
389+
390+#if defined(__i386__) || defined(__x86_64__)
391+
392+#define MAPCACHE
393+
394+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);
395+void     qemu_invalidate_map_cache(void);
396+
397+#include <pthread.h>
398+extern  pthread_mutex_t mapcache_mutex;
399+#define mapcache_lock() pthread_mutex_lock(&mapcache_mutex)
400+#define mapcache_unlock() pthread_mutex_unlock(&mapcache_mutex)
401+
402+#else
403+
404+#define qemu_invalidate_map_cache() ((void)0)
405+
406+#define mapcache_lock()   ((void)0)
407+#define mapcache_unlock() ((void)0)
408+
409+#endif
410+
411 extern int xc_handle;
412 extern int domid;
413 
414Index: ioemu/target-i386-dm/cpu.h
415===================================================================
416--- ioemu.orig/target-i386-dm/cpu.h     2007-05-03 15:10:22.000000000 +0100
417+++ ioemu/target-i386-dm/cpu.h  2007-05-03 15:12:21.000000000 +0100
418@@ -25,7 +25,8 @@
419 #ifdef TARGET_X86_64
420 #define TARGET_LONG_BITS 64
421 #else
422-#define TARGET_LONG_BITS 32
423+/* #define TARGET_LONG_BITS 32 */
424+#define TARGET_LONG_BITS 64 /* for Qemu map cache */
425 #endif
426 
427 /* target supports implicit self modifying code */
428Index: ioemu/target-i386-dm/helper2.c
429===================================================================
430--- ioemu.orig/target-i386-dm/helper2.c 2007-05-03 15:12:19.000000000 +0100
431+++ ioemu/target-i386-dm/helper2.c      2007-05-03 15:12:21.000000000 +0100
432@@ -526,6 +526,9 @@
433     case IOREQ_TYPE_TIMEOFFSET:
434         cpu_ioreq_timeoffset(env, req);
435         break;
436+    case IOREQ_TYPE_INVALIDATE:
437+        qemu_invalidate_map_cache();
438+        break;
439     default:
440         hw_error("Invalid ioreq type 0x%x\n", req->type);
441     }
Note: See TracBrowser for help on using the repository browser.