| 1 | #ifndef __X86_STRING_H__ |
|---|
| 2 | #define __X86_STRING_H__ |
|---|
| 3 | |
|---|
| 4 | #include <xen/config.h> |
|---|
| 5 | |
|---|
| 6 | static inline void *__variable_memcpy(void *to, const void *from, size_t n) |
|---|
| 7 | { |
|---|
| 8 | long d0, d1, d2; |
|---|
| 9 | __asm__ __volatile__ ( |
|---|
| 10 | " rep ; movs"__OS"\n" |
|---|
| 11 | " mov %4,%3 \n" |
|---|
| 12 | " rep ; movsb \n" |
|---|
| 13 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) |
|---|
| 14 | : "0" (n/BYTES_PER_LONG), "r" (n%BYTES_PER_LONG), "1" (to), "2" (from) |
|---|
| 15 | : "memory" ); |
|---|
| 16 | return to; |
|---|
| 17 | } |
|---|
| 18 | |
|---|
| 19 | /* |
|---|
| 20 | * This looks horribly ugly, but the compiler can optimize it totally, |
|---|
| 21 | * as the count is constant. |
|---|
| 22 | */ |
|---|
| 23 | static always_inline void * __constant_memcpy( |
|---|
| 24 | void * to, const void * from, size_t n) |
|---|
| 25 | { |
|---|
| 26 | switch ( n ) |
|---|
| 27 | { |
|---|
| 28 | case 0: |
|---|
| 29 | return to; |
|---|
| 30 | case 1: |
|---|
| 31 | *(u8 *)to = *(const u8 *)from; |
|---|
| 32 | return to; |
|---|
| 33 | case 2: |
|---|
| 34 | *(u16 *)to = *(const u16 *)from; |
|---|
| 35 | return to; |
|---|
| 36 | case 3: |
|---|
| 37 | *(u16 *)to = *(const u16 *)from; |
|---|
| 38 | *(2+(u8 *)to) = *(2+(const u8 *)from); |
|---|
| 39 | return to; |
|---|
| 40 | case 4: |
|---|
| 41 | *(u32 *)to = *(const u32 *)from; |
|---|
| 42 | return to; |
|---|
| 43 | case 5: |
|---|
| 44 | *(u32 *)to = *(const u32 *)from; |
|---|
| 45 | *(4+(u8 *)to) = *(4+(const u8 *)from); |
|---|
| 46 | return to; |
|---|
| 47 | case 6: |
|---|
| 48 | *(u32 *)to = *(const u32 *)from; |
|---|
| 49 | *(2+(u16 *)to) = *(2+(const u16 *)from); |
|---|
| 50 | return to; |
|---|
| 51 | case 7: |
|---|
| 52 | *(u32 *)to = *(const u32 *)from; |
|---|
| 53 | *(2+(u16 *)to) = *(2+(const u16 *)from); |
|---|
| 54 | *(6+(u8 *)to) = *(6+(const u8 *)from); |
|---|
| 55 | return to; |
|---|
| 56 | case 8: |
|---|
| 57 | *(u64 *)to = *(const u64 *)from; |
|---|
| 58 | return to; |
|---|
| 59 | case 12: |
|---|
| 60 | *(u64 *)to = *(const u64 *)from; |
|---|
| 61 | *(2+(u32 *)to) = *(2+(const u32 *)from); |
|---|
| 62 | return to; |
|---|
| 63 | case 16: |
|---|
| 64 | *(u64 *)to = *(const u64 *)from; |
|---|
| 65 | *(1+(u64 *)to) = *(1+(const u64 *)from); |
|---|
| 66 | return to; |
|---|
| 67 | case 20: |
|---|
| 68 | *(u64 *)to = *(const u64 *)from; |
|---|
| 69 | *(1+(u64 *)to) = *(1+(const u64 *)from); |
|---|
| 70 | *(4+(u32 *)to) = *(4+(const u32 *)from); |
|---|
| 71 | return to; |
|---|
| 72 | } |
|---|
| 73 | #define COMMON(x) \ |
|---|
| 74 | __asm__ __volatile__ ( \ |
|---|
| 75 | "rep ; movs"__OS \ |
|---|
| 76 | x \ |
|---|
| 77 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ |
|---|
| 78 | : "0" (n/BYTES_PER_LONG), "1" (to), "2" (from) \ |
|---|
| 79 | : "memory" ); |
|---|
| 80 | { |
|---|
| 81 | long d0, d1, d2; |
|---|
| 82 | switch ( n % BYTES_PER_LONG ) |
|---|
| 83 | { |
|---|
| 84 | case 0: COMMON(""); return to; |
|---|
| 85 | case 1: COMMON("\n\tmovsb"); return to; |
|---|
| 86 | case 2: COMMON("\n\tmovsw"); return to; |
|---|
| 87 | case 3: COMMON("\n\tmovsw\n\tmovsb"); return to; |
|---|
| 88 | case 4: COMMON("\n\tmovsl"); return to; |
|---|
| 89 | case 5: COMMON("\n\tmovsl\n\tmovsb"); return to; |
|---|
| 90 | case 6: COMMON("\n\tmovsl\n\tmovsw"); return to; |
|---|
| 91 | case 7: COMMON("\n\tmovsl\n\tmovsw\n\tmovsb"); return to; |
|---|
| 92 | } |
|---|
| 93 | } |
|---|
| 94 | #undef COMMON |
|---|
| 95 | return to; |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | #define __HAVE_ARCH_MEMCPY |
|---|
| 99 | #define memcpy(t,f,n) (__memcpy((t),(f),(n))) |
|---|
| 100 | static always_inline |
|---|
| 101 | void *__memcpy(void *t, const void *f, size_t n) |
|---|
| 102 | { |
|---|
| 103 | return (__builtin_constant_p(n) ? |
|---|
| 104 | __constant_memcpy((t),(f),(n)) : |
|---|
| 105 | __variable_memcpy((t),(f),(n))); |
|---|
| 106 | } |
|---|
| 107 | |
|---|
| 108 | /* Some version of gcc don't have this builtin. It's non-critical anyway. */ |
|---|
| 109 | #define __HAVE_ARCH_MEMMOVE |
|---|
| 110 | extern void *memmove(void *dest, const void *src, size_t n); |
|---|
| 111 | |
|---|
| 112 | #define __HAVE_ARCH_MEMCMP |
|---|
| 113 | #define memcmp __builtin_memcmp |
|---|
| 114 | |
|---|
| 115 | static inline void *__memset_generic(void *s, char c, size_t count) |
|---|
| 116 | { |
|---|
| 117 | long d0, d1; |
|---|
| 118 | __asm__ __volatile__ ( |
|---|
| 119 | "rep ; stosb" |
|---|
| 120 | : "=&c" (d0), "=&D" (d1) : "a" (c), "1" (s), "0" (count) : "memory" ); |
|---|
| 121 | return s; |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | /* we might want to write optimized versions of these later */ |
|---|
| 125 | #define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) |
|---|
| 126 | |
|---|
| 127 | /* |
|---|
| 128 | * memset(x,0,y) is a reasonably common thing to do, so we want to fill |
|---|
| 129 | * things 32 bits at a time even when we don't know the size of the |
|---|
| 130 | * area at compile-time.. |
|---|
| 131 | */ |
|---|
| 132 | static inline void *__constant_c_memset(void *s, unsigned long c, size_t count) |
|---|
| 133 | { |
|---|
| 134 | long d0, d1; |
|---|
| 135 | __asm__ __volatile__( |
|---|
| 136 | " rep ; stos"__OS"\n" |
|---|
| 137 | " mov %3,%4 \n" |
|---|
| 138 | " rep ; stosb \n" |
|---|
| 139 | : "=&c" (d0), "=&D" (d1) |
|---|
| 140 | : "a" (c), "r" (count%BYTES_PER_LONG), |
|---|
| 141 | "0" (count/BYTES_PER_LONG), "1" (s) |
|---|
| 142 | : "memory" ); |
|---|
| 143 | return s; |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | /* |
|---|
| 147 | * This looks horribly ugly, but the compiler can optimize it totally, |
|---|
| 148 | * as we by now know that both pattern and count is constant.. |
|---|
| 149 | */ |
|---|
| 150 | static always_inline void *__constant_c_and_count_memset( |
|---|
| 151 | void *s, unsigned long pattern, size_t count) |
|---|
| 152 | { |
|---|
| 153 | switch ( count ) |
|---|
| 154 | { |
|---|
| 155 | case 0: |
|---|
| 156 | return s; |
|---|
| 157 | case 1: |
|---|
| 158 | *(u8 *)s = pattern; |
|---|
| 159 | return s; |
|---|
| 160 | case 2: |
|---|
| 161 | *(u16 *)s = pattern; |
|---|
| 162 | return s; |
|---|
| 163 | case 3: |
|---|
| 164 | *(u16 *)s = pattern; |
|---|
| 165 | *(2+(u8 *)s) = pattern; |
|---|
| 166 | return s; |
|---|
| 167 | case 4: |
|---|
| 168 | *(u32 *)s = pattern; |
|---|
| 169 | return s; |
|---|
| 170 | case 5: |
|---|
| 171 | *(u32 *)s = pattern; |
|---|
| 172 | *(4+(u8 *)s) = pattern; |
|---|
| 173 | return s; |
|---|
| 174 | case 6: |
|---|
| 175 | *(u32 *)s = pattern; |
|---|
| 176 | *(2+(u16 *)s) = pattern; |
|---|
| 177 | return s; |
|---|
| 178 | case 7: |
|---|
| 179 | *(u32 *)s = pattern; |
|---|
| 180 | *(2+(u16 *)s) = pattern; |
|---|
| 181 | *(6+(u8 *)s) = pattern; |
|---|
| 182 | return s; |
|---|
| 183 | case 8: |
|---|
| 184 | *(u64 *)s = pattern; |
|---|
| 185 | return s; |
|---|
| 186 | } |
|---|
| 187 | #define COMMON(x) \ |
|---|
| 188 | __asm__ __volatile__ ( \ |
|---|
| 189 | "rep ; stos"__OS \ |
|---|
| 190 | x \ |
|---|
| 191 | : "=&c" (d0), "=&D" (d1) \ |
|---|
| 192 | : "a" (pattern), "0" (count/BYTES_PER_LONG), "1" (s) \ |
|---|
| 193 | : "memory" ) |
|---|
| 194 | { |
|---|
| 195 | long d0, d1; |
|---|
| 196 | switch ( count % BYTES_PER_LONG ) |
|---|
| 197 | { |
|---|
| 198 | case 0: COMMON(""); return s; |
|---|
| 199 | case 1: COMMON("\n\tstosb"); return s; |
|---|
| 200 | case 2: COMMON("\n\tstosw"); return s; |
|---|
| 201 | case 3: COMMON("\n\tstosw\n\tstosb"); return s; |
|---|
| 202 | case 4: COMMON("\n\tstosl"); return s; |
|---|
| 203 | case 5: COMMON("\n\tstosl\n\tstosb"); return s; |
|---|
| 204 | case 6: COMMON("\n\tstosl\n\tstosw"); return s; |
|---|
| 205 | case 7: COMMON("\n\tstosl\n\tstosw\n\tstosb"); return s; |
|---|
| 206 | } |
|---|
| 207 | } |
|---|
| 208 | #undef COMMON |
|---|
| 209 | return s; |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | #define __constant_c_x_memset(s, c, count) \ |
|---|
| 213 | (__builtin_constant_p(count) ? \ |
|---|
| 214 | __constant_c_and_count_memset((s),(c),(count)) : \ |
|---|
| 215 | __constant_c_memset((s),(c),(count))) |
|---|
| 216 | |
|---|
| 217 | #define __var_x_memset(s, c, count) \ |
|---|
| 218 | (__builtin_constant_p(count) ? \ |
|---|
| 219 | __constant_count_memset((s),(c),(count)) : \ |
|---|
| 220 | __memset_generic((s),(c),(count))) |
|---|
| 221 | |
|---|
| 222 | #ifdef CONFIG_X86_64 |
|---|
| 223 | #define MEMSET_PATTERN_MUL 0x0101010101010101UL |
|---|
| 224 | #else |
|---|
| 225 | #define MEMSET_PATTERN_MUL 0x01010101UL |
|---|
| 226 | #endif |
|---|
| 227 | |
|---|
| 228 | #define __HAVE_ARCH_MEMSET |
|---|
| 229 | #define memset(s, c, count) (__memset((s),(c),(count))) |
|---|
| 230 | #define __memset(s, c, count) \ |
|---|
| 231 | (__builtin_constant_p(c) ? \ |
|---|
| 232 | __constant_c_x_memset((s),(MEMSET_PATTERN_MUL*(unsigned char)(c)),(count)) : \ |
|---|
| 233 | __var_x_memset((s),(c),(count))) |
|---|
| 234 | |
|---|
| 235 | #endif /* __X86_STRING_H__ */ |
|---|