Index: ioemu/hw/vga.c =================================================================== --- ioemu.orig/hw/vga.c 2007-05-02 10:32:35.000000000 +0100 +++ ioemu/hw/vga.c 2007-05-02 10:35:05.000000000 +0100 @@ -1359,6 +1359,105 @@ } } +static inline int cmp_vram(VGAState *s, int offset, int n) +{ + long *vp, *sp; + + if (s->vram_shadow == NULL) + return 1; + vp = (long *)(s->vram_ptr + offset); + sp = (long *)(s->vram_shadow + offset); + while ((n -= sizeof(*vp)) >= 0) { + if (*vp++ != *sp++) { + memcpy(sp - 1, vp - 1, n + sizeof(*vp)); + return 1; + } + } + return 0; +} + +#ifdef USE_SSE2 + +#include +#include +#include + +int sse2_ok = 1; + +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + +#ifdef __x86_64__ +#define __bx "rbx" +#else +#define __bx "ebx" +#endif + __asm__("push %%"__bx"; cpuid; pop %%"__bx + : "=a" (eax), "=d" (edx) + : "0" (op) + : "cx"); +#undef __bx + + return edx; +} + +jmp_buf sse_jbuf; + +void intr(int sig) +{ + sse2_ok = 0; + longjmp(sse_jbuf, 1); +} + +void check_sse2(void) +{ + /* Check 1: What does CPUID say? */ + if ((cpuid_edx(1) & 0x4000000) == 0) { + sse2_ok = 0; + return; + } + + /* Check 2: Can we use SSE2 in anger? */ + signal(SIGILL, intr); + if (setjmp(sse_jbuf) == 0) + __asm__("xorps %xmm0,%xmm0\n"); +} + +int vram_dirty(VGAState *s, int offset, int n) +{ + __m128i *sp, *vp; + + if (s->vram_shadow == NULL) + return 1; + if (sse2_ok == 0) + return cmp_vram(s, offset, n); + vp = (__m128i *)(s->vram_ptr + offset); + sp = (__m128i *)(s->vram_shadow + offset); + while ((n -= sizeof(*vp)) >= 0) { + if (_mm_movemask_epi8(_mm_cmpeq_epi8(*sp, *vp)) != 0xffff) { + while (n >= 0) { + _mm_store_si128(sp++, _mm_load_si128(vp++)); + n -= sizeof(*vp); + } + return 1; + } + sp++; + vp++; + } + return 0; +} +#else /* !USE_SSE2 */ +int vram_dirty(VGAState *s, int offset, int n) +{ + return cmp_vram(s, offset, n); +} + +void check_sse2(void) +{ +} +#endif /* !USE_SSE2 */ + /* * graphic modes */ @@ -1454,6 +1553,11 @@ printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n", width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]); #endif + + for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE) + if (vram_dirty(s, y, TARGET_PAGE_SIZE)) + cpu_physical_memory_set_dirty(s->vram_offset + y); + addr1 = (s->start_addr * 4); bwidth = width * 4; y_start = -1; @@ -1889,7 +1993,18 @@ vga_reset(s); - s->vram_ptr = qemu_malloc(vga_ram_size); + check_sse2(); + s->vram_shadow = qemu_malloc(vga_ram_size+TARGET_PAGE_SIZE+1); + if (s->vram_shadow == NULL) + fprintf(stderr, "Cannot allocate %d bytes for VRAM shadow, " + "mouse will be slow\n", vga_ram_size); + s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE - 1) + & ~(TARGET_PAGE_SIZE - 1)); + + /* Video RAM must be 128-bit aligned for SSE optimizations later */ + s->vram_alloc = qemu_malloc(vga_ram_size + 15); + s->vram_ptr = (uint8_t *)((long)(s->vram_alloc + 15) & ~15L); + s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; s->ds = ds; @@ -2013,7 +2128,7 @@ } if (!vga_ram_base) { - vga_ram_base = qemu_malloc(vga_ram_size); + vga_ram_base = qemu_malloc(vga_ram_size + TARGET_PAGE_SIZE + 1); if (!vga_ram_base) { fprintf(stderr, "reallocate error\n"); return NULL; @@ -2021,8 +2136,10 @@ } /* XXX lock needed? */ + old_pointer = s->vram_alloc; + s->vram_alloc = vga_ram_base; + vga_ram_base = (uint8_t *)((long)(vga_ram_base + 15) & ~15L); memcpy(vga_ram_base, s->vram_ptr, vga_ram_size); - old_pointer = s->vram_ptr; s->vram_ptr = vga_ram_base; return old_pointer; Index: ioemu/hw/vga_int.h =================================================================== --- ioemu.orig/hw/vga_int.h 2007-05-02 10:32:35.000000000 +0100 +++ ioemu/hw/vga_int.h 2007-05-02 10:35:10.000000000 +0100 @@ -78,7 +78,9 @@ #define VGA_MAX_HEIGHT 2048 #define VGA_STATE_COMMON \ + uint8_t *vram_alloc; \ uint8_t *vram_ptr; \ + uint8_t *vram_shadow; \ unsigned long vram_offset; \ unsigned int vram_size; \ unsigned long bios_offset; \