| 1 | #include <xen/config.h> |
|---|
| 2 | #include <xen/init.h> |
|---|
| 3 | #include <xen/bitops.h> |
|---|
| 4 | #include <xen/mm.h> |
|---|
| 5 | #include <xen/smp.h> |
|---|
| 6 | #include <asm/io.h> |
|---|
| 7 | #include <asm/msr.h> |
|---|
| 8 | #include <asm/processor.h> |
|---|
| 9 | #include <asm/hvm/support.h> |
|---|
| 10 | |
|---|
| 11 | #include "cpu.h" |
|---|
| 12 | |
|---|
| 13 | /* |
|---|
| 14 | * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush |
|---|
| 15 | * filter on AMD 64-bit processors. |
|---|
| 16 | */ |
|---|
| 17 | static int flush_filter_force; |
|---|
| 18 | static void flush_filter(char *s) |
|---|
| 19 | { |
|---|
| 20 | if (!strcmp(s, "off")) |
|---|
| 21 | flush_filter_force = -1; |
|---|
| 22 | if (!strcmp(s, "on")) |
|---|
| 23 | flush_filter_force = 1; |
|---|
| 24 | } |
|---|
| 25 | custom_param("amd_flush_filter", flush_filter); |
|---|
| 26 | |
|---|
| 27 | #define num_physpages 0 |
|---|
| 28 | |
|---|
| 29 | /* |
|---|
| 30 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause |
|---|
| 31 | * misexecution of code under Linux. Owners of such processors should |
|---|
| 32 | * contact AMD for precise details and a CPU swap. |
|---|
| 33 | * |
|---|
| 34 | * See http://www.multimania.com/poulot/k6bug.html |
|---|
| 35 | * http://www.amd.com/K6/k6docs/revgd.html |
|---|
| 36 | * |
|---|
| 37 | * The following test is erm.. interesting. AMD neglected to up |
|---|
| 38 | * the chip setting when fixing the bug but they also tweaked some |
|---|
| 39 | * performance at the same time.. |
|---|
| 40 | */ |
|---|
| 41 | |
|---|
| 42 | extern void vide(void); |
|---|
| 43 | __asm__(".text\n.align 4\nvide: ret"); |
|---|
| 44 | |
|---|
| 45 | /* Can this system suffer from TSC drift due to C1 clock ramping? */ |
|---|
| 46 | static int c1_ramping_may_cause_clock_drift(struct cpuinfo_x86 *c) |
|---|
| 47 | { |
|---|
| 48 | if (c->x86 < 0xf) { |
|---|
| 49 | /* |
|---|
| 50 | * TSC drift doesn't exist on 7th Gen or less |
|---|
| 51 | * However, OS still needs to consider effects |
|---|
| 52 | * of P-state changes on TSC |
|---|
| 53 | */ |
|---|
| 54 | return 0; |
|---|
| 55 | } else if (cpuid_edx(0x80000007) & (1<<8)) { |
|---|
| 56 | /* |
|---|
| 57 | * CPUID.AdvPowerMgmtInfo.TscInvariant |
|---|
| 58 | * EDX bit 8, 8000_0007 |
|---|
| 59 | * Invariant TSC on 8th Gen or newer, use it |
|---|
| 60 | * (assume all cores have invariant TSC) |
|---|
| 61 | */ |
|---|
| 62 | return 0; |
|---|
| 63 | } |
|---|
| 64 | return 1; |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | /* PCI access functions. Should be safe to use 0xcf8/0xcfc port accesses here. */ |
|---|
| 68 | static u8 pci_read_byte(u32 bus, u32 dev, u32 fn, u32 reg) |
|---|
| 69 | { |
|---|
| 70 | outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8); |
|---|
| 71 | return inb(0xcfc + (reg & 3)); |
|---|
| 72 | } |
|---|
| 73 | |
|---|
| 74 | static void pci_write_byte(u32 bus, u32 dev, u32 fn, u32 reg, u8 val) |
|---|
| 75 | { |
|---|
| 76 | outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8); |
|---|
| 77 | outb(val, 0xcfc + (reg & 3)); |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | /* |
|---|
| 81 | * Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation |
|---|
| 82 | * cores only. Assume BIOS has setup all Northbridges equivalently. |
|---|
| 83 | */ |
|---|
| 84 | static void disable_c1_ramping(void) |
|---|
| 85 | { |
|---|
| 86 | u8 pmm7; |
|---|
| 87 | int node; |
|---|
| 88 | |
|---|
| 89 | for (node=0; node < NR_CPUS; node++) { |
|---|
| 90 | /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */ |
|---|
| 91 | pmm7 = pci_read_byte(0, 0x18+node, 0x3, 0x87); |
|---|
| 92 | /* Invalid read means we've updated every Northbridge. */ |
|---|
| 93 | if (pmm7 == 0xFF) |
|---|
| 94 | break; |
|---|
| 95 | pmm7 &= 0xFC; /* clear pmm7[1:0] */ |
|---|
| 96 | pci_write_byte(0, 0x18+node, 0x3, 0x87, pmm7); |
|---|
| 97 | printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node); |
|---|
| 98 | } |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | static void __init init_amd(struct cpuinfo_x86 *c) |
|---|
| 102 | { |
|---|
| 103 | u32 l, h; |
|---|
| 104 | int mbytes = num_physpages >> (20-PAGE_SHIFT); |
|---|
| 105 | int r; |
|---|
| 106 | |
|---|
| 107 | #ifdef CONFIG_SMP |
|---|
| 108 | unsigned long long value; |
|---|
| 109 | |
|---|
| 110 | /* Disable TLB flush filter by setting HWCR.FFDIS on K8 |
|---|
| 111 | * bit 6 of msr C001_0015 |
|---|
| 112 | * |
|---|
| 113 | * Errata 63 for SH-B3 steppings |
|---|
| 114 | * Errata 122 for all steppings (F+ have it disabled by default) |
|---|
| 115 | */ |
|---|
| 116 | if (c->x86 == 15) { |
|---|
| 117 | rdmsrl(MSR_K7_HWCR, value); |
|---|
| 118 | value |= 1 << 6; |
|---|
| 119 | wrmsrl(MSR_K7_HWCR, value); |
|---|
| 120 | } |
|---|
| 121 | #endif |
|---|
| 122 | |
|---|
| 123 | /* |
|---|
| 124 | * FIXME: We should handle the K5 here. Set up the write |
|---|
| 125 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, |
|---|
| 126 | * no bus pipeline) |
|---|
| 127 | */ |
|---|
| 128 | |
|---|
| 129 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; |
|---|
| 130 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ |
|---|
| 131 | clear_bit(0*32+31, c->x86_capability); |
|---|
| 132 | |
|---|
| 133 | r = get_model_name(c); |
|---|
| 134 | |
|---|
| 135 | switch(c->x86) |
|---|
| 136 | { |
|---|
| 137 | case 4: |
|---|
| 138 | /* |
|---|
| 139 | * General Systems BIOSen alias the cpu frequency registers |
|---|
| 140 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux |
|---|
| 141 | * drivers subsequently pokes it, and changes the CPU speed. |
|---|
| 142 | * Workaround : Remove the unneeded alias. |
|---|
| 143 | */ |
|---|
| 144 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ |
|---|
| 145 | #define CBAR_ENB (0x80000000) |
|---|
| 146 | #define CBAR_KEY (0X000000CB) |
|---|
| 147 | if (c->x86_model==9 || c->x86_model == 10) { |
|---|
| 148 | if (inl (CBAR) & CBAR_ENB) |
|---|
| 149 | outl (0 | CBAR_KEY, CBAR); |
|---|
| 150 | } |
|---|
| 151 | break; |
|---|
| 152 | case 5: |
|---|
| 153 | if( c->x86_model < 6 ) |
|---|
| 154 | { |
|---|
| 155 | /* Based on AMD doc 20734R - June 2000 */ |
|---|
| 156 | if ( c->x86_model == 0 ) { |
|---|
| 157 | clear_bit(X86_FEATURE_APIC, c->x86_capability); |
|---|
| 158 | set_bit(X86_FEATURE_PGE, c->x86_capability); |
|---|
| 159 | } |
|---|
| 160 | break; |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | if ( c->x86_model == 6 && c->x86_mask == 1 ) { |
|---|
| 164 | const int K6_BUG_LOOP = 1000000; |
|---|
| 165 | int n; |
|---|
| 166 | void (*f_vide)(void); |
|---|
| 167 | unsigned long d, d2; |
|---|
| 168 | |
|---|
| 169 | printk(KERN_INFO "AMD K6 stepping B detected - "); |
|---|
| 170 | |
|---|
| 171 | /* |
|---|
| 172 | * It looks like AMD fixed the 2.6.2 bug and improved indirect |
|---|
| 173 | * calls at the same time. |
|---|
| 174 | */ |
|---|
| 175 | |
|---|
| 176 | n = K6_BUG_LOOP; |
|---|
| 177 | f_vide = vide; |
|---|
| 178 | rdtscl(d); |
|---|
| 179 | while (n--) |
|---|
| 180 | f_vide(); |
|---|
| 181 | rdtscl(d2); |
|---|
| 182 | d = d2-d; |
|---|
| 183 | |
|---|
| 184 | /* Knock these two lines out if it debugs out ok */ |
|---|
| 185 | printk(KERN_INFO "AMD K6 stepping B detected - "); |
|---|
| 186 | /* -- cut here -- */ |
|---|
| 187 | if (d > 20*K6_BUG_LOOP) |
|---|
| 188 | printk("system stability may be impaired when more than 32 MB are used.\n"); |
|---|
| 189 | else |
|---|
| 190 | printk("probably OK (after B9730xxxx).\n"); |
|---|
| 191 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | /* K6 with old style WHCR */ |
|---|
| 195 | if (c->x86_model < 8 || |
|---|
| 196 | (c->x86_model== 8 && c->x86_mask < 8)) { |
|---|
| 197 | /* We can only write allocate on the low 508Mb */ |
|---|
| 198 | if(mbytes>508) |
|---|
| 199 | mbytes=508; |
|---|
| 200 | |
|---|
| 201 | rdmsr(MSR_K6_WHCR, l, h); |
|---|
| 202 | if ((l&0x0000FFFF)==0) { |
|---|
| 203 | unsigned long flags; |
|---|
| 204 | l=(1<<0)|((mbytes/4)<<1); |
|---|
| 205 | local_irq_save(flags); |
|---|
| 206 | wbinvd(); |
|---|
| 207 | wrmsr(MSR_K6_WHCR, l, h); |
|---|
| 208 | local_irq_restore(flags); |
|---|
| 209 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", |
|---|
| 210 | mbytes); |
|---|
| 211 | } |
|---|
| 212 | break; |
|---|
| 213 | } |
|---|
| 214 | |
|---|
| 215 | if ((c->x86_model == 8 && c->x86_mask >7) || |
|---|
| 216 | c->x86_model == 9 || c->x86_model == 13) { |
|---|
| 217 | /* The more serious chips .. */ |
|---|
| 218 | |
|---|
| 219 | if(mbytes>4092) |
|---|
| 220 | mbytes=4092; |
|---|
| 221 | |
|---|
| 222 | rdmsr(MSR_K6_WHCR, l, h); |
|---|
| 223 | if ((l&0xFFFF0000)==0) { |
|---|
| 224 | unsigned long flags; |
|---|
| 225 | l=((mbytes>>2)<<22)|(1<<16); |
|---|
| 226 | local_irq_save(flags); |
|---|
| 227 | wbinvd(); |
|---|
| 228 | wrmsr(MSR_K6_WHCR, l, h); |
|---|
| 229 | local_irq_restore(flags); |
|---|
| 230 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", |
|---|
| 231 | mbytes); |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | /* Set MTRR capability flag if appropriate */ |
|---|
| 235 | if (c->x86_model == 13 || c->x86_model == 9 || |
|---|
| 236 | (c->x86_model == 8 && c->x86_mask >= 8)) |
|---|
| 237 | set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); |
|---|
| 238 | break; |
|---|
| 239 | } |
|---|
| 240 | |
|---|
| 241 | if (c->x86_model == 10) { |
|---|
| 242 | /* AMD Geode LX is model 10 */ |
|---|
| 243 | /* placeholder for any needed mods */ |
|---|
| 244 | break; |
|---|
| 245 | } |
|---|
| 246 | break; |
|---|
| 247 | case 6: /* An Athlon/Duron */ |
|---|
| 248 | |
|---|
| 249 | /* Bit 15 of Athlon specific MSR 15, needs to be 0 |
|---|
| 250 | * to enable SSE on Palomino/Morgan/Barton CPU's. |
|---|
| 251 | * If the BIOS didn't enable it already, enable it here. |
|---|
| 252 | */ |
|---|
| 253 | if (c->x86_model >= 6 && c->x86_model <= 10) { |
|---|
| 254 | if (!cpu_has(c, X86_FEATURE_XMM)) { |
|---|
| 255 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); |
|---|
| 256 | rdmsr(MSR_K7_HWCR, l, h); |
|---|
| 257 | l &= ~0x00008000; |
|---|
| 258 | wrmsr(MSR_K7_HWCR, l, h); |
|---|
| 259 | set_bit(X86_FEATURE_XMM, c->x86_capability); |
|---|
| 260 | } |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | /* It's been determined by AMD that Athlons since model 8 stepping 1 |
|---|
| 264 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx |
|---|
| 265 | * As per AMD technical note 27212 0.2 |
|---|
| 266 | */ |
|---|
| 267 | if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { |
|---|
| 268 | rdmsr(MSR_K7_CLK_CTL, l, h); |
|---|
| 269 | if ((l & 0xfff00000) != 0x20000000) { |
|---|
| 270 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, |
|---|
| 271 | ((l & 0x000fffff)|0x20000000)); |
|---|
| 272 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); |
|---|
| 273 | } |
|---|
| 274 | } |
|---|
| 275 | break; |
|---|
| 276 | } |
|---|
| 277 | |
|---|
| 278 | switch (c->x86) { |
|---|
| 279 | case 15: |
|---|
| 280 | set_bit(X86_FEATURE_K8, c->x86_capability); |
|---|
| 281 | break; |
|---|
| 282 | case 6: |
|---|
| 283 | set_bit(X86_FEATURE_K7, c->x86_capability); |
|---|
| 284 | break; |
|---|
| 285 | } |
|---|
| 286 | |
|---|
| 287 | if (c->x86 == 15) { |
|---|
| 288 | rdmsr(MSR_K7_HWCR, l, h); |
|---|
| 289 | printk(KERN_INFO "CPU%d: AMD Flush Filter %sabled", |
|---|
| 290 | smp_processor_id(), (l & (1<<6)) ? "dis" : "en"); |
|---|
| 291 | if ((flush_filter_force > 0) && (l & (1<<6))) { |
|---|
| 292 | l &= ~(1<<6); |
|---|
| 293 | printk(" -> Forcibly enabled"); |
|---|
| 294 | } else if ((flush_filter_force < 0) && !(l & (1<<6))) { |
|---|
| 295 | l |= 1<<6; |
|---|
| 296 | printk(" -> Forcibly disabled"); |
|---|
| 297 | } |
|---|
| 298 | wrmsr(MSR_K7_HWCR, l, h); |
|---|
| 299 | printk("\n"); |
|---|
| 300 | } |
|---|
| 301 | |
|---|
| 302 | display_cacheinfo(c); |
|---|
| 303 | |
|---|
| 304 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
|---|
| 305 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
|---|
| 306 | if (c->x86_max_cores & (c->x86_max_cores - 1)) |
|---|
| 307 | c->x86_max_cores = 1; |
|---|
| 308 | } |
|---|
| 309 | |
|---|
| 310 | if (cpuid_eax(0x80000000) >= 0x80000007) { |
|---|
| 311 | c->x86_power = cpuid_edx(0x80000007); |
|---|
| 312 | if (c->x86_power & (1<<8)) |
|---|
| 313 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); |
|---|
| 314 | } |
|---|
| 315 | |
|---|
| 316 | #ifdef CONFIG_X86_HT |
|---|
| 317 | /* |
|---|
| 318 | * On a AMD dual core setup the lower bits of the APIC id |
|---|
| 319 | * distingush the cores. Assumes number of cores is a power |
|---|
| 320 | * of two. |
|---|
| 321 | */ |
|---|
| 322 | if (c->x86_max_cores > 1) { |
|---|
| 323 | int cpu = smp_processor_id(); |
|---|
| 324 | unsigned bits = 0; |
|---|
| 325 | while ((1 << bits) < c->x86_max_cores) |
|---|
| 326 | bits++; |
|---|
| 327 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); |
|---|
| 328 | phys_proc_id[cpu] >>= bits; |
|---|
| 329 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", |
|---|
| 330 | cpu, c->x86_max_cores, cpu_core_id[cpu]); |
|---|
| 331 | } |
|---|
| 332 | #endif |
|---|
| 333 | |
|---|
| 334 | /* Prevent TSC drift in non single-processor, single-core platforms. */ |
|---|
| 335 | if ((smp_processor_id() == 1) && c1_ramping_may_cause_clock_drift(c)) |
|---|
| 336 | disable_c1_ramping(); |
|---|
| 337 | |
|---|
| 338 | start_svm(); |
|---|
| 339 | } |
|---|
| 340 | |
|---|
| 341 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
|---|
| 342 | { |
|---|
| 343 | /* AMD errata T13 (order #21922) */ |
|---|
| 344 | if ((c->x86 == 6)) { |
|---|
| 345 | if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ |
|---|
| 346 | size = 64; |
|---|
| 347 | if (c->x86_model == 4 && |
|---|
| 348 | (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ |
|---|
| 349 | size = 256; |
|---|
| 350 | } |
|---|
| 351 | return size; |
|---|
| 352 | } |
|---|
| 353 | |
|---|
| 354 | static struct cpu_dev amd_cpu_dev __initdata = { |
|---|
| 355 | .c_vendor = "AMD", |
|---|
| 356 | .c_ident = { "AuthenticAMD" }, |
|---|
| 357 | .c_models = { |
|---|
| 358 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = |
|---|
| 359 | { |
|---|
| 360 | [3] = "486 DX/2", |
|---|
| 361 | [7] = "486 DX/2-WB", |
|---|
| 362 | [8] = "486 DX/4", |
|---|
| 363 | [9] = "486 DX/4-WB", |
|---|
| 364 | [14] = "Am5x86-WT", |
|---|
| 365 | [15] = "Am5x86-WB" |
|---|
| 366 | } |
|---|
| 367 | }, |
|---|
| 368 | }, |
|---|
| 369 | .c_init = init_amd, |
|---|
| 370 | .c_identify = generic_identify, |
|---|
| 371 | .c_size_cache = amd_size_cache, |
|---|
| 372 | }; |
|---|
| 373 | |
|---|
| 374 | int __init amd_init_cpu(void) |
|---|
| 375 | { |
|---|
| 376 | cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; |
|---|
| 377 | return 0; |
|---|
| 378 | } |
|---|
| 379 | |
|---|
| 380 | //early_arch_initcall(amd_init_cpu); |
|---|