1 | /* |
---|
2 | * Copyright (C) 1999-2002 Hewlett-Packard Co |
---|
3 | * Stephane Eranian <eranian@hpl.hp.com> |
---|
4 | * David Mosberger-Tang <davidm@hpl.hp.com> |
---|
5 | * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> |
---|
6 | * |
---|
7 | * 1/06/01 davidm Tuned for Itanium. |
---|
8 | * 2/12/02 kchen Tuned for both Itanium and McKinley |
---|
9 | * 3/08/02 davidm Some more tweaking |
---|
10 | */ |
---|
11 | #include <linux/config.h> |
---|
12 | |
---|
13 | #include <asm/asmmacro.h> |
---|
14 | #include <asm/page.h> |
---|
15 | |
---|
16 | #ifdef CONFIG_ITANIUM |
---|
17 | # define L3_LINE_SIZE 64 // Itanium L3 line size |
---|
18 | # define PREFETCH_LINES 9 // magic number |
---|
19 | #else |
---|
20 | # define L3_LINE_SIZE 128 // McKinley L3 line size |
---|
21 | # define PREFETCH_LINES 12 // magic number |
---|
22 | #endif |
---|
23 | |
---|
24 | #define saved_lc r2 |
---|
25 | #define dst_fetch r3 |
---|
26 | #define dst1 r8 |
---|
27 | #define dst2 r9 |
---|
28 | #define dst3 r10 |
---|
29 | #define dst4 r11 |
---|
30 | |
---|
31 | #define dst_last r31 |
---|
32 | |
---|
33 | GLOBAL_ENTRY(clear_page) |
---|
34 | .prologue |
---|
35 | .regstk 1,0,0,0 |
---|
36 | mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until |
---|
37 | .save ar.lc, saved_lc |
---|
38 | mov saved_lc = ar.lc |
---|
39 | |
---|
40 | .body |
---|
41 | mov ar.lc = (PREFETCH_LINES - 1) |
---|
42 | mov dst_fetch = in0 |
---|
43 | adds dst1 = 16, in0 |
---|
44 | adds dst2 = 32, in0 |
---|
45 | ;; |
---|
46 | .fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE |
---|
47 | adds dst3 = 48, in0 // executing this multiple times is harmless |
---|
48 | br.cloop.sptk.few .fetch |
---|
49 | ;; |
---|
50 | addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch |
---|
51 | mov ar.lc = r16 // one L3 line per iteration |
---|
52 | adds dst4 = 64, in0 |
---|
53 | ;; |
---|
54 | #ifdef CONFIG_ITANIUM |
---|
55 | // Optimized for Itanium |
---|
56 | 1: stf.spill.nta [dst1] = f0, 64 |
---|
57 | stf.spill.nta [dst2] = f0, 64 |
---|
58 | cmp.lt p8,p0=dst_fetch, dst_last |
---|
59 | ;; |
---|
60 | #else |
---|
61 | // Optimized for McKinley |
---|
62 | 1: stf.spill.nta [dst1] = f0, 64 |
---|
63 | stf.spill.nta [dst2] = f0, 64 |
---|
64 | stf.spill.nta [dst3] = f0, 64 |
---|
65 | stf.spill.nta [dst4] = f0, 128 |
---|
66 | cmp.lt p8,p0=dst_fetch, dst_last |
---|
67 | ;; |
---|
68 | stf.spill.nta [dst1] = f0, 64 |
---|
69 | stf.spill.nta [dst2] = f0, 64 |
---|
70 | #endif |
---|
71 | stf.spill.nta [dst3] = f0, 64 |
---|
72 | (p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE |
---|
73 | br.cloop.sptk.few 1b |
---|
74 | ;; |
---|
75 | mov ar.lc = saved_lc // restore lc |
---|
76 | br.ret.sptk.many rp |
---|
77 | END(clear_page) |
---|