1 | /* |
---|
2 | * Here is where the ball gets rolling as far as the kernel is concerned. |
---|
3 | * When control is transferred to _start, the bootload has already |
---|
4 | * loaded us to the correct address. All that's left to do here is |
---|
5 | * to set up the kernel's global pointer and jump to the kernel |
---|
6 | * entry point. |
---|
7 | * |
---|
8 | * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co |
---|
9 | * David Mosberger-Tang <davidm@hpl.hp.com> |
---|
10 | * Stephane Eranian <eranian@hpl.hp.com> |
---|
11 | * Copyright (C) 1999 VA Linux Systems |
---|
12 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> |
---|
13 | * Copyright (C) 1999 Intel Corp. |
---|
14 | * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> |
---|
15 | * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> |
---|
16 | * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> |
---|
17 | * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. |
---|
18 | * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com> |
---|
19 | * Support for CPU Hotplug |
---|
20 | */ |
---|
21 | |
---|
22 | |
---|
23 | #include <asm/asmmacro.h> |
---|
24 | #include <asm/fpu.h> |
---|
25 | #include <asm/kregs.h> |
---|
26 | #include <asm/mmu_context.h> |
---|
27 | #include <asm/asm-offsets.h> |
---|
28 | #include <asm/pal.h> |
---|
29 | #include <asm/pgtable.h> |
---|
30 | #include <asm/processor.h> |
---|
31 | #include <asm/ptrace.h> |
---|
32 | #include <asm/system.h> |
---|
33 | #include <asm/mca_asm.h> |
---|
34 | |
---|
35 | #ifdef CONFIG_HOTPLUG_CPU |
---|
36 | #define SAL_PSR_BITS_TO_SET \ |
---|
37 | (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) |
---|
38 | |
---|
39 | #define SAVE_FROM_REG(src, ptr, dest) \ |
---|
40 | mov dest=src;; \ |
---|
41 | st8 [ptr]=dest,0x08 |
---|
42 | |
---|
43 | #define RESTORE_REG(reg, ptr, _tmp) \ |
---|
44 | ld8 _tmp=[ptr],0x08;; \ |
---|
45 | mov reg=_tmp |
---|
46 | |
---|
47 | #define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ |
---|
48 | mov ar.lc=IA64_NUM_DBG_REGS-1;; \ |
---|
49 | mov _idx=0;; \ |
---|
50 | 1: \ |
---|
51 | SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ |
---|
52 | add _idx=1,_idx;; \ |
---|
53 | br.cloop.sptk.many 1b |
---|
54 | |
---|
55 | #define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ |
---|
56 | mov ar.lc=IA64_NUM_DBG_REGS-1;; \ |
---|
57 | mov _idx=0;; \ |
---|
58 | _lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ |
---|
59 | add _idx=1, _idx;; \ |
---|
60 | br.cloop.sptk.many _lbl |
---|
61 | |
---|
62 | #define SAVE_ONE_RR(num, _reg, _tmp) \ |
---|
63 | movl _tmp=(num<<61);; \ |
---|
64 | mov _reg=rr[_tmp] |
---|
65 | |
---|
66 | #define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ |
---|
67 | SAVE_ONE_RR(0,_r0, _tmp);; \ |
---|
68 | SAVE_ONE_RR(1,_r1, _tmp);; \ |
---|
69 | SAVE_ONE_RR(2,_r2, _tmp);; \ |
---|
70 | SAVE_ONE_RR(3,_r3, _tmp);; \ |
---|
71 | SAVE_ONE_RR(4,_r4, _tmp);; \ |
---|
72 | SAVE_ONE_RR(5,_r5, _tmp);; \ |
---|
73 | SAVE_ONE_RR(6,_r6, _tmp);; \ |
---|
74 | SAVE_ONE_RR(7,_r7, _tmp);; |
---|
75 | |
---|
76 | #define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ |
---|
77 | st8 [ptr]=_r0, 8;; \ |
---|
78 | st8 [ptr]=_r1, 8;; \ |
---|
79 | st8 [ptr]=_r2, 8;; \ |
---|
80 | st8 [ptr]=_r3, 8;; \ |
---|
81 | st8 [ptr]=_r4, 8;; \ |
---|
82 | st8 [ptr]=_r5, 8;; \ |
---|
83 | st8 [ptr]=_r6, 8;; \ |
---|
84 | st8 [ptr]=_r7, 8;; |
---|
85 | |
---|
86 | #define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ |
---|
87 | mov ar.lc=0x08-1;; \ |
---|
88 | movl _idx1=0x00;; \ |
---|
89 | RestRR: \ |
---|
90 | dep.z _idx2=_idx1,61,3;; \ |
---|
91 | ld8 _tmp=[ptr],8;; \ |
---|
92 | mov rr[_idx2]=_tmp;; \ |
---|
93 | srlz.d;; \ |
---|
94 | add _idx1=1,_idx1;; \ |
---|
95 | br.cloop.sptk.few RestRR |
---|
96 | |
---|
97 | #define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ |
---|
98 | movl reg1=sal_state_for_booting_cpu;; \ |
---|
99 | ld8 reg2=[reg1];; |
---|
100 | |
---|
101 | /* |
---|
102 | * Adjust region registers saved before starting to save |
---|
103 | * break regs and rest of the states that need to be preserved. |
---|
104 | */ |
---|
105 | #define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ |
---|
106 | SAVE_FROM_REG(b0,_reg1,_reg2);; \ |
---|
107 | SAVE_FROM_REG(b1,_reg1,_reg2);; \ |
---|
108 | SAVE_FROM_REG(b2,_reg1,_reg2);; \ |
---|
109 | SAVE_FROM_REG(b3,_reg1,_reg2);; \ |
---|
110 | SAVE_FROM_REG(b4,_reg1,_reg2);; \ |
---|
111 | SAVE_FROM_REG(b5,_reg1,_reg2);; \ |
---|
112 | st8 [_reg1]=r1,0x08;; \ |
---|
113 | st8 [_reg1]=r12,0x08;; \ |
---|
114 | st8 [_reg1]=r13,0x08;; \ |
---|
115 | SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ |
---|
116 | SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ |
---|
117 | SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ |
---|
118 | SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ |
---|
119 | SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ |
---|
120 | SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ |
---|
121 | SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ |
---|
122 | SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ |
---|
123 | SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ |
---|
124 | SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ |
---|
125 | SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ |
---|
126 | SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ |
---|
127 | SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ |
---|
128 | st8 [_reg1]=r4,0x08;; \ |
---|
129 | st8 [_reg1]=r5,0x08;; \ |
---|
130 | st8 [_reg1]=r6,0x08;; \ |
---|
131 | st8 [_reg1]=r7,0x08;; \ |
---|
132 | st8 [_reg1]=_pred,0x08;; \ |
---|
133 | SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ |
---|
134 | stf.spill.nta [_reg1]=f2,16;; \ |
---|
135 | stf.spill.nta [_reg1]=f3,16;; \ |
---|
136 | stf.spill.nta [_reg1]=f4,16;; \ |
---|
137 | stf.spill.nta [_reg1]=f5,16;; \ |
---|
138 | stf.spill.nta [_reg1]=f16,16;; \ |
---|
139 | stf.spill.nta [_reg1]=f17,16;; \ |
---|
140 | stf.spill.nta [_reg1]=f18,16;; \ |
---|
141 | stf.spill.nta [_reg1]=f19,16;; \ |
---|
142 | stf.spill.nta [_reg1]=f20,16;; \ |
---|
143 | stf.spill.nta [_reg1]=f21,16;; \ |
---|
144 | stf.spill.nta [_reg1]=f22,16;; \ |
---|
145 | stf.spill.nta [_reg1]=f23,16;; \ |
---|
146 | stf.spill.nta [_reg1]=f24,16;; \ |
---|
147 | stf.spill.nta [_reg1]=f25,16;; \ |
---|
148 | stf.spill.nta [_reg1]=f26,16;; \ |
---|
149 | stf.spill.nta [_reg1]=f27,16;; \ |
---|
150 | stf.spill.nta [_reg1]=f28,16;; \ |
---|
151 | stf.spill.nta [_reg1]=f29,16;; \ |
---|
152 | stf.spill.nta [_reg1]=f30,16;; \ |
---|
153 | stf.spill.nta [_reg1]=f31,16;; |
---|
154 | |
---|
155 | #else |
---|
156 | #define SET_AREA_FOR_BOOTING_CPU(a1, a2) |
---|
157 | #define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) |
---|
158 | #define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) |
---|
159 | #define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) |
---|
160 | #endif |
---|
161 | |
---|
162 | #define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ |
---|
163 | movl _tmp1=(num << 61);; \ |
---|
164 | mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ |
---|
165 | mov rr[_tmp1]=_tmp2 |
---|
166 | |
---|
167 | .section __special_page_section,"ax" |
---|
168 | |
---|
169 | .global empty_zero_page |
---|
170 | empty_zero_page: |
---|
171 | .skip PAGE_SIZE |
---|
172 | |
---|
173 | .global swapper_pg_dir |
---|
174 | swapper_pg_dir: |
---|
175 | .skip PAGE_SIZE |
---|
176 | |
---|
177 | .rodata |
---|
178 | halt_msg: |
---|
179 | stringz "Halting kernel\n" |
---|
180 | |
---|
181 | .text |
---|
182 | |
---|
183 | .global start_ap |
---|
184 | |
---|
185 | /* |
---|
186 | * Start the kernel. When the bootloader passes control to _start(), r28 |
---|
187 | * points to the address of the boot parameter area. Execution reaches |
---|
188 | * here in physical mode. |
---|
189 | */ |
---|
190 | GLOBAL_ENTRY(_start) |
---|
191 | start_ap: |
---|
192 | .prologue |
---|
193 | .save rp, r0 // terminate unwind chain with a NULL rp |
---|
194 | .body |
---|
195 | |
---|
196 | rsm psr.i | psr.ic |
---|
197 | ;; |
---|
198 | srlz.i |
---|
199 | ;; |
---|
200 | { |
---|
201 | flushrs // must be first insn in group |
---|
202 | srlz.i |
---|
203 | } |
---|
204 | ;; |
---|
205 | /* |
---|
206 | * Save the region registers, predicate before they get clobbered |
---|
207 | */ |
---|
208 | SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); |
---|
209 | mov r25=pr;; |
---|
210 | |
---|
211 | /* |
---|
212 | * Initialize kernel region registers: |
---|
213 | * rr[0]: VHPT enabled, page size = PAGE_SHIFT |
---|
214 | * rr[1]: VHPT enabled, page size = PAGE_SHIFT |
---|
215 | * rr[2]: VHPT enabled, page size = PAGE_SHIFT |
---|
216 | * rr[3]: VHPT enabled, page size = PAGE_SHIFT |
---|
217 | * rr[4]: VHPT enabled, page size = PAGE_SHIFT |
---|
218 | * rr[5]: VHPT enabled, page size = PAGE_SHIFT |
---|
219 | * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT |
---|
220 | * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT |
---|
221 | * We initialize all of them to prevent inadvertently assuming |
---|
222 | * something about the state of address translation early in boot. |
---|
223 | */ |
---|
224 | SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; |
---|
225 | SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; |
---|
226 | SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; |
---|
227 | SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; |
---|
228 | SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; |
---|
229 | SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; |
---|
230 | SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; |
---|
231 | SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; |
---|
232 | /* |
---|
233 | * Now pin mappings into the TLB for kernel text and data |
---|
234 | */ |
---|
235 | mov r18=KERNEL_TR_PAGE_SHIFT<<2 |
---|
236 | movl r17=KERNEL_START |
---|
237 | ;; |
---|
238 | mov cr.itir=r18 |
---|
239 | mov cr.ifa=r17 |
---|
240 | mov r16=IA64_TR_KERNEL |
---|
241 | mov r3=ip |
---|
242 | movl r18=PAGE_KERNEL |
---|
243 | ;; |
---|
244 | dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT |
---|
245 | ;; |
---|
246 | or r18=r2,r18 |
---|
247 | ;; |
---|
248 | srlz.i |
---|
249 | ;; |
---|
250 | itr.i itr[r16]=r18 |
---|
251 | ;; |
---|
252 | itr.d dtr[r16]=r18 |
---|
253 | ;; |
---|
254 | srlz.i |
---|
255 | |
---|
256 | /* |
---|
257 | * Switch into virtual mode: |
---|
258 | */ |
---|
259 | movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ |
---|
260 | |IA64_PSR_DI) |
---|
261 | ;; |
---|
262 | mov cr.ipsr=r16 |
---|
263 | movl r17=1f |
---|
264 | ;; |
---|
265 | mov cr.iip=r17 |
---|
266 | mov cr.ifs=r0 |
---|
267 | ;; |
---|
268 | rfi |
---|
269 | ;; |
---|
270 | 1: // now we are in virtual mode |
---|
271 | |
---|
272 | SET_AREA_FOR_BOOTING_CPU(r2, r16); |
---|
273 | |
---|
274 | STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); |
---|
275 | SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) |
---|
276 | ;; |
---|
277 | |
---|
278 | // set IVT entry point---can't access I/O ports without it |
---|
279 | movl r3=ia64_ivt |
---|
280 | ;; |
---|
281 | mov cr.iva=r3 |
---|
282 | movl r2=FPSR_DEFAULT |
---|
283 | ;; |
---|
284 | srlz.i |
---|
285 | movl gp=__gp |
---|
286 | |
---|
287 | mov ar.fpsr=r2 |
---|
288 | ;; |
---|
289 | |
---|
290 | #define isAP p2 // are we an Application Processor? |
---|
291 | #define isBP p3 // are we the Bootstrap Processor? |
---|
292 | |
---|
293 | #ifdef CONFIG_SMP |
---|
294 | /* |
---|
295 | * Find the init_task for the currently booting CPU. At poweron, and in |
---|
296 | * UP mode, task_for_booting_cpu is NULL. |
---|
297 | */ |
---|
298 | movl r3=task_for_booting_cpu |
---|
299 | ;; |
---|
300 | ld8 r3=[r3] |
---|
301 | movl r2=init_task |
---|
302 | ;; |
---|
303 | cmp.eq isBP,isAP=r3,r0 |
---|
304 | ;; |
---|
305 | (isAP) mov r2=r3 |
---|
306 | #else |
---|
307 | movl r2=init_task |
---|
308 | cmp.eq isBP,isAP=r0,r0 |
---|
309 | #endif |
---|
310 | ;; |
---|
311 | tpa r3=r2 // r3 == phys addr of task struct |
---|
312 | mov r16=-1 |
---|
313 | (isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it |
---|
314 | |
---|
315 | // load mapping for stack (virtaddr in r2, physaddr in r3) |
---|
316 | rsm psr.ic |
---|
317 | movl r17=PAGE_KERNEL |
---|
318 | ;; |
---|
319 | srlz.d |
---|
320 | dep r18=0,r3,0,12 |
---|
321 | ;; |
---|
322 | or r18=r17,r18 |
---|
323 | dep r2=-1,r3,61,3 // IMVA of task |
---|
324 | ;; |
---|
325 | mov r17=rr[r2] |
---|
326 | shr.u r16=r3,IA64_GRANULE_SHIFT |
---|
327 | ;; |
---|
328 | dep r17=0,r17,8,24 |
---|
329 | ;; |
---|
330 | mov cr.itir=r17 |
---|
331 | mov cr.ifa=r2 |
---|
332 | |
---|
333 | mov r19=IA64_TR_CURRENT_STACK |
---|
334 | ;; |
---|
335 | itr.d dtr[r19]=r18 |
---|
336 | ;; |
---|
337 | ssm psr.ic |
---|
338 | srlz.d |
---|
339 | ;; |
---|
340 | |
---|
341 | .load_current: |
---|
342 | // load the "current" pointer (r13) and ar.k6 with the current task |
---|
343 | mov IA64_KR(CURRENT)=r2 // virtual address |
---|
344 | mov IA64_KR(CURRENT_STACK)=r16 |
---|
345 | mov r13=r2 |
---|
346 | /* |
---|
347 | * Reserve space at the top of the stack for "struct pt_regs". Kernel |
---|
348 | * threads don't store interesting values in that structure, but the space |
---|
349 | * still needs to be there because time-critical stuff such as the context |
---|
350 | * switching can be implemented more efficiently (for example, __switch_to() |
---|
351 | * always sets the psr.dfh bit of the task it is switching to). |
---|
352 | */ |
---|
353 | |
---|
354 | addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 |
---|
355 | addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE |
---|
356 | mov ar.rsc=0 // place RSE in enforced lazy mode |
---|
357 | ;; |
---|
358 | loadrs // clear the dirty partition |
---|
359 | mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base |
---|
360 | ;; |
---|
361 | mov ar.bspstore=r2 // establish the new RSE stack |
---|
362 | ;; |
---|
363 | mov ar.rsc=0x3 // place RSE in eager mode |
---|
364 | |
---|
365 | (isBP) dep r28=-1,r28,61,3 // make address virtual |
---|
366 | (isBP) movl r2=ia64_boot_param |
---|
367 | ;; |
---|
368 | (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader |
---|
369 | |
---|
370 | #ifdef CONFIG_XEN |
---|
371 | // Note: isBP is used by the subprogram. |
---|
372 | br.call.sptk.many rp=early_xen_setup |
---|
373 | ;; |
---|
374 | #endif |
---|
375 | |
---|
376 | #ifdef CONFIG_SMP |
---|
377 | (isAP) br.call.sptk.many rp=start_secondary |
---|
378 | .ret0: |
---|
379 | (isAP) br.cond.sptk self |
---|
380 | #endif |
---|
381 | |
---|
382 | // This is executed by the bootstrap processor (bsp) only: |
---|
383 | |
---|
384 | #ifdef CONFIG_IA64_FW_EMU |
---|
385 | // initialize PAL & SAL emulator: |
---|
386 | br.call.sptk.many rp=sys_fw_init |
---|
387 | .ret1: |
---|
388 | #endif |
---|
389 | br.call.sptk.many rp=start_kernel |
---|
390 | .ret2: addl r3=@ltoff(halt_msg),gp |
---|
391 | ;; |
---|
392 | alloc r2=ar.pfs,8,0,2,0 |
---|
393 | ;; |
---|
394 | ld8 out0=[r3] |
---|
395 | br.call.sptk.many b0=console_print |
---|
396 | |
---|
397 | self: hint @pause |
---|
398 | br.sptk.many self // endless loop |
---|
399 | END(_start) |
---|
400 | |
---|
401 | GLOBAL_ENTRY(ia64_save_debug_regs) |
---|
402 | alloc r16=ar.pfs,1,0,0,0 |
---|
403 | mov r20=ar.lc // preserve ar.lc |
---|
404 | mov ar.lc=IA64_NUM_DBG_REGS-1 |
---|
405 | mov r18=0 |
---|
406 | add r19=IA64_NUM_DBG_REGS*8,in0 |
---|
407 | ;; |
---|
408 | 1: mov r16=dbr[r18] |
---|
409 | #ifdef CONFIG_ITANIUM |
---|
410 | ;; |
---|
411 | srlz.d |
---|
412 | #endif |
---|
413 | mov r17=ibr[r18] |
---|
414 | add r18=1,r18 |
---|
415 | ;; |
---|
416 | st8.nta [in0]=r16,8 |
---|
417 | st8.nta [r19]=r17,8 |
---|
418 | br.cloop.sptk.many 1b |
---|
419 | ;; |
---|
420 | mov ar.lc=r20 // restore ar.lc |
---|
421 | br.ret.sptk.many rp |
---|
422 | END(ia64_save_debug_regs) |
---|
423 | |
---|
424 | GLOBAL_ENTRY(ia64_load_debug_regs) |
---|
425 | alloc r16=ar.pfs,1,0,0,0 |
---|
426 | lfetch.nta [in0] |
---|
427 | mov r20=ar.lc // preserve ar.lc |
---|
428 | add r19=IA64_NUM_DBG_REGS*8,in0 |
---|
429 | mov ar.lc=IA64_NUM_DBG_REGS-1 |
---|
430 | mov r18=-1 |
---|
431 | ;; |
---|
432 | 1: ld8.nta r16=[in0],8 |
---|
433 | ld8.nta r17=[r19],8 |
---|
434 | add r18=1,r18 |
---|
435 | ;; |
---|
436 | mov dbr[r18]=r16 |
---|
437 | #ifdef CONFIG_ITANIUM |
---|
438 | ;; |
---|
439 | srlz.d // Errata 132 (NoFix status) |
---|
440 | #endif |
---|
441 | mov ibr[r18]=r17 |
---|
442 | br.cloop.sptk.many 1b |
---|
443 | ;; |
---|
444 | mov ar.lc=r20 // restore ar.lc |
---|
445 | br.ret.sptk.many rp |
---|
446 | END(ia64_load_debug_regs) |
---|
447 | |
---|
448 | GLOBAL_ENTRY(__ia64_save_fpu) |
---|
449 | alloc r2=ar.pfs,1,4,0,0 |
---|
450 | adds loc0=96*16-16,in0 |
---|
451 | adds loc1=96*16-16-128,in0 |
---|
452 | ;; |
---|
453 | stf.spill.nta [loc0]=f127,-256 |
---|
454 | stf.spill.nta [loc1]=f119,-256 |
---|
455 | ;; |
---|
456 | stf.spill.nta [loc0]=f111,-256 |
---|
457 | stf.spill.nta [loc1]=f103,-256 |
---|
458 | ;; |
---|
459 | stf.spill.nta [loc0]=f95,-256 |
---|
460 | stf.spill.nta [loc1]=f87,-256 |
---|
461 | ;; |
---|
462 | stf.spill.nta [loc0]=f79,-256 |
---|
463 | stf.spill.nta [loc1]=f71,-256 |
---|
464 | ;; |
---|
465 | stf.spill.nta [loc0]=f63,-256 |
---|
466 | stf.spill.nta [loc1]=f55,-256 |
---|
467 | adds loc2=96*16-32,in0 |
---|
468 | ;; |
---|
469 | stf.spill.nta [loc0]=f47,-256 |
---|
470 | stf.spill.nta [loc1]=f39,-256 |
---|
471 | adds loc3=96*16-32-128,in0 |
---|
472 | ;; |
---|
473 | stf.spill.nta [loc2]=f126,-256 |
---|
474 | stf.spill.nta [loc3]=f118,-256 |
---|
475 | ;; |
---|
476 | stf.spill.nta [loc2]=f110,-256 |
---|
477 | stf.spill.nta [loc3]=f102,-256 |
---|
478 | ;; |
---|
479 | stf.spill.nta [loc2]=f94,-256 |
---|
480 | stf.spill.nta [loc3]=f86,-256 |
---|
481 | ;; |
---|
482 | stf.spill.nta [loc2]=f78,-256 |
---|
483 | stf.spill.nta [loc3]=f70,-256 |
---|
484 | ;; |
---|
485 | stf.spill.nta [loc2]=f62,-256 |
---|
486 | stf.spill.nta [loc3]=f54,-256 |
---|
487 | adds loc0=96*16-48,in0 |
---|
488 | ;; |
---|
489 | stf.spill.nta [loc2]=f46,-256 |
---|
490 | stf.spill.nta [loc3]=f38,-256 |
---|
491 | adds loc1=96*16-48-128,in0 |
---|
492 | ;; |
---|
493 | stf.spill.nta [loc0]=f125,-256 |
---|
494 | stf.spill.nta [loc1]=f117,-256 |
---|
495 | ;; |
---|
496 | stf.spill.nta [loc0]=f109,-256 |
---|
497 | stf.spill.nta [loc1]=f101,-256 |
---|
498 | ;; |
---|
499 | stf.spill.nta [loc0]=f93,-256 |
---|
500 | stf.spill.nta [loc1]=f85,-256 |
---|
501 | ;; |
---|
502 | stf.spill.nta [loc0]=f77,-256 |
---|
503 | stf.spill.nta [loc1]=f69,-256 |
---|
504 | ;; |
---|
505 | stf.spill.nta [loc0]=f61,-256 |
---|
506 | stf.spill.nta [loc1]=f53,-256 |
---|
507 | adds loc2=96*16-64,in0 |
---|
508 | ;; |
---|
509 | stf.spill.nta [loc0]=f45,-256 |
---|
510 | stf.spill.nta [loc1]=f37,-256 |
---|
511 | adds loc3=96*16-64-128,in0 |
---|
512 | ;; |
---|
513 | stf.spill.nta [loc2]=f124,-256 |
---|
514 | stf.spill.nta [loc3]=f116,-256 |
---|
515 | ;; |
---|
516 | stf.spill.nta [loc2]=f108,-256 |
---|
517 | stf.spill.nta [loc3]=f100,-256 |
---|
518 | ;; |
---|
519 | stf.spill.nta [loc2]=f92,-256 |
---|
520 | stf.spill.nta [loc3]=f84,-256 |
---|
521 | ;; |
---|
522 | stf.spill.nta [loc2]=f76,-256 |
---|
523 | stf.spill.nta [loc3]=f68,-256 |
---|
524 | ;; |
---|
525 | stf.spill.nta [loc2]=f60,-256 |
---|
526 | stf.spill.nta [loc3]=f52,-256 |
---|
527 | adds loc0=96*16-80,in0 |
---|
528 | ;; |
---|
529 | stf.spill.nta [loc2]=f44,-256 |
---|
530 | stf.spill.nta [loc3]=f36,-256 |
---|
531 | adds loc1=96*16-80-128,in0 |
---|
532 | ;; |
---|
533 | stf.spill.nta [loc0]=f123,-256 |
---|
534 | stf.spill.nta [loc1]=f115,-256 |
---|
535 | ;; |
---|
536 | stf.spill.nta [loc0]=f107,-256 |
---|
537 | stf.spill.nta [loc1]=f99,-256 |
---|
538 | ;; |
---|
539 | stf.spill.nta [loc0]=f91,-256 |
---|
540 | stf.spill.nta [loc1]=f83,-256 |
---|
541 | ;; |
---|
542 | stf.spill.nta [loc0]=f75,-256 |
---|
543 | stf.spill.nta [loc1]=f67,-256 |
---|
544 | ;; |
---|
545 | stf.spill.nta [loc0]=f59,-256 |
---|
546 | stf.spill.nta [loc1]=f51,-256 |
---|
547 | adds loc2=96*16-96,in0 |
---|
548 | ;; |
---|
549 | stf.spill.nta [loc0]=f43,-256 |
---|
550 | stf.spill.nta [loc1]=f35,-256 |
---|
551 | adds loc3=96*16-96-128,in0 |
---|
552 | ;; |
---|
553 | stf.spill.nta [loc2]=f122,-256 |
---|
554 | stf.spill.nta [loc3]=f114,-256 |
---|
555 | ;; |
---|
556 | stf.spill.nta [loc2]=f106,-256 |
---|
557 | stf.spill.nta [loc3]=f98,-256 |
---|
558 | ;; |
---|
559 | stf.spill.nta [loc2]=f90,-256 |
---|
560 | stf.spill.nta [loc3]=f82,-256 |
---|
561 | ;; |
---|
562 | stf.spill.nta [loc2]=f74,-256 |
---|
563 | stf.spill.nta [loc3]=f66,-256 |
---|
564 | ;; |
---|
565 | stf.spill.nta [loc2]=f58,-256 |
---|
566 | stf.spill.nta [loc3]=f50,-256 |
---|
567 | adds loc0=96*16-112,in0 |
---|
568 | ;; |
---|
569 | stf.spill.nta [loc2]=f42,-256 |
---|
570 | stf.spill.nta [loc3]=f34,-256 |
---|
571 | adds loc1=96*16-112-128,in0 |
---|
572 | ;; |
---|
573 | stf.spill.nta [loc0]=f121,-256 |
---|
574 | stf.spill.nta [loc1]=f113,-256 |
---|
575 | ;; |
---|
576 | stf.spill.nta [loc0]=f105,-256 |
---|
577 | stf.spill.nta [loc1]=f97,-256 |
---|
578 | ;; |
---|
579 | stf.spill.nta [loc0]=f89,-256 |
---|
580 | stf.spill.nta [loc1]=f81,-256 |
---|
581 | ;; |
---|
582 | stf.spill.nta [loc0]=f73,-256 |
---|
583 | stf.spill.nta [loc1]=f65,-256 |
---|
584 | ;; |
---|
585 | stf.spill.nta [loc0]=f57,-256 |
---|
586 | stf.spill.nta [loc1]=f49,-256 |
---|
587 | adds loc2=96*16-128,in0 |
---|
588 | ;; |
---|
589 | stf.spill.nta [loc0]=f41,-256 |
---|
590 | stf.spill.nta [loc1]=f33,-256 |
---|
591 | adds loc3=96*16-128-128,in0 |
---|
592 | ;; |
---|
593 | stf.spill.nta [loc2]=f120,-256 |
---|
594 | stf.spill.nta [loc3]=f112,-256 |
---|
595 | ;; |
---|
596 | stf.spill.nta [loc2]=f104,-256 |
---|
597 | stf.spill.nta [loc3]=f96,-256 |
---|
598 | ;; |
---|
599 | stf.spill.nta [loc2]=f88,-256 |
---|
600 | stf.spill.nta [loc3]=f80,-256 |
---|
601 | ;; |
---|
602 | stf.spill.nta [loc2]=f72,-256 |
---|
603 | stf.spill.nta [loc3]=f64,-256 |
---|
604 | ;; |
---|
605 | stf.spill.nta [loc2]=f56,-256 |
---|
606 | stf.spill.nta [loc3]=f48,-256 |
---|
607 | ;; |
---|
608 | stf.spill.nta [loc2]=f40 |
---|
609 | stf.spill.nta [loc3]=f32 |
---|
610 | br.ret.sptk.many rp |
---|
611 | END(__ia64_save_fpu) |
---|
612 | |
---|
613 | GLOBAL_ENTRY(__ia64_load_fpu) |
---|
614 | alloc r2=ar.pfs,1,2,0,0 |
---|
615 | adds r3=128,in0 |
---|
616 | adds r14=256,in0 |
---|
617 | adds r15=384,in0 |
---|
618 | mov loc0=512 |
---|
619 | mov loc1=-1024+16 |
---|
620 | ;; |
---|
621 | ldf.fill.nta f32=[in0],loc0 |
---|
622 | ldf.fill.nta f40=[ r3],loc0 |
---|
623 | ldf.fill.nta f48=[r14],loc0 |
---|
624 | ldf.fill.nta f56=[r15],loc0 |
---|
625 | ;; |
---|
626 | ldf.fill.nta f64=[in0],loc0 |
---|
627 | ldf.fill.nta f72=[ r3],loc0 |
---|
628 | ldf.fill.nta f80=[r14],loc0 |
---|
629 | ldf.fill.nta f88=[r15],loc0 |
---|
630 | ;; |
---|
631 | ldf.fill.nta f96=[in0],loc1 |
---|
632 | ldf.fill.nta f104=[ r3],loc1 |
---|
633 | ldf.fill.nta f112=[r14],loc1 |
---|
634 | ldf.fill.nta f120=[r15],loc1 |
---|
635 | ;; |
---|
636 | ldf.fill.nta f33=[in0],loc0 |
---|
637 | ldf.fill.nta f41=[ r3],loc0 |
---|
638 | ldf.fill.nta f49=[r14],loc0 |
---|
639 | ldf.fill.nta f57=[r15],loc0 |
---|
640 | ;; |
---|
641 | ldf.fill.nta f65=[in0],loc0 |
---|
642 | ldf.fill.nta f73=[ r3],loc0 |
---|
643 | ldf.fill.nta f81=[r14],loc0 |
---|
644 | ldf.fill.nta f89=[r15],loc0 |
---|
645 | ;; |
---|
646 | ldf.fill.nta f97=[in0],loc1 |
---|
647 | ldf.fill.nta f105=[ r3],loc1 |
---|
648 | ldf.fill.nta f113=[r14],loc1 |
---|
649 | ldf.fill.nta f121=[r15],loc1 |
---|
650 | ;; |
---|
651 | ldf.fill.nta f34=[in0],loc0 |
---|
652 | ldf.fill.nta f42=[ r3],loc0 |
---|
653 | ldf.fill.nta f50=[r14],loc0 |
---|
654 | ldf.fill.nta f58=[r15],loc0 |
---|
655 | ;; |
---|
656 | ldf.fill.nta f66=[in0],loc0 |
---|
657 | ldf.fill.nta f74=[ r3],loc0 |
---|
658 | ldf.fill.nta f82=[r14],loc0 |
---|
659 | ldf.fill.nta f90=[r15],loc0 |
---|
660 | ;; |
---|
661 | ldf.fill.nta f98=[in0],loc1 |
---|
662 | ldf.fill.nta f106=[ r3],loc1 |
---|
663 | ldf.fill.nta f114=[r14],loc1 |
---|
664 | ldf.fill.nta f122=[r15],loc1 |
---|
665 | ;; |
---|
666 | ldf.fill.nta f35=[in0],loc0 |
---|
667 | ldf.fill.nta f43=[ r3],loc0 |
---|
668 | ldf.fill.nta f51=[r14],loc0 |
---|
669 | ldf.fill.nta f59=[r15],loc0 |
---|
670 | ;; |
---|
671 | ldf.fill.nta f67=[in0],loc0 |
---|
672 | ldf.fill.nta f75=[ r3],loc0 |
---|
673 | ldf.fill.nta f83=[r14],loc0 |
---|
674 | ldf.fill.nta f91=[r15],loc0 |
---|
675 | ;; |
---|
676 | ldf.fill.nta f99=[in0],loc1 |
---|
677 | ldf.fill.nta f107=[ r3],loc1 |
---|
678 | ldf.fill.nta f115=[r14],loc1 |
---|
679 | ldf.fill.nta f123=[r15],loc1 |
---|
680 | ;; |
---|
681 | ldf.fill.nta f36=[in0],loc0 |
---|
682 | ldf.fill.nta f44=[ r3],loc0 |
---|
683 | ldf.fill.nta f52=[r14],loc0 |
---|
684 | ldf.fill.nta f60=[r15],loc0 |
---|
685 | ;; |
---|
686 | ldf.fill.nta f68=[in0],loc0 |
---|
687 | ldf.fill.nta f76=[ r3],loc0 |
---|
688 | ldf.fill.nta f84=[r14],loc0 |
---|
689 | ldf.fill.nta f92=[r15],loc0 |
---|
690 | ;; |
---|
691 | ldf.fill.nta f100=[in0],loc1 |
---|
692 | ldf.fill.nta f108=[ r3],loc1 |
---|
693 | ldf.fill.nta f116=[r14],loc1 |
---|
694 | ldf.fill.nta f124=[r15],loc1 |
---|
695 | ;; |
---|
696 | ldf.fill.nta f37=[in0],loc0 |
---|
697 | ldf.fill.nta f45=[ r3],loc0 |
---|
698 | ldf.fill.nta f53=[r14],loc0 |
---|
699 | ldf.fill.nta f61=[r15],loc0 |
---|
700 | ;; |
---|
701 | ldf.fill.nta f69=[in0],loc0 |
---|
702 | ldf.fill.nta f77=[ r3],loc0 |
---|
703 | ldf.fill.nta f85=[r14],loc0 |
---|
704 | ldf.fill.nta f93=[r15],loc0 |
---|
705 | ;; |
---|
706 | ldf.fill.nta f101=[in0],loc1 |
---|
707 | ldf.fill.nta f109=[ r3],loc1 |
---|
708 | ldf.fill.nta f117=[r14],loc1 |
---|
709 | ldf.fill.nta f125=[r15],loc1 |
---|
710 | ;; |
---|
711 | ldf.fill.nta f38 =[in0],loc0 |
---|
712 | ldf.fill.nta f46 =[ r3],loc0 |
---|
713 | ldf.fill.nta f54 =[r14],loc0 |
---|
714 | ldf.fill.nta f62 =[r15],loc0 |
---|
715 | ;; |
---|
716 | ldf.fill.nta f70 =[in0],loc0 |
---|
717 | ldf.fill.nta f78 =[ r3],loc0 |
---|
718 | ldf.fill.nta f86 =[r14],loc0 |
---|
719 | ldf.fill.nta f94 =[r15],loc0 |
---|
720 | ;; |
---|
721 | ldf.fill.nta f102=[in0],loc1 |
---|
722 | ldf.fill.nta f110=[ r3],loc1 |
---|
723 | ldf.fill.nta f118=[r14],loc1 |
---|
724 | ldf.fill.nta f126=[r15],loc1 |
---|
725 | ;; |
---|
726 | ldf.fill.nta f39 =[in0],loc0 |
---|
727 | ldf.fill.nta f47 =[ r3],loc0 |
---|
728 | ldf.fill.nta f55 =[r14],loc0 |
---|
729 | ldf.fill.nta f63 =[r15],loc0 |
---|
730 | ;; |
---|
731 | ldf.fill.nta f71 =[in0],loc0 |
---|
732 | ldf.fill.nta f79 =[ r3],loc0 |
---|
733 | ldf.fill.nta f87 =[r14],loc0 |
---|
734 | ldf.fill.nta f95 =[r15],loc0 |
---|
735 | ;; |
---|
736 | ldf.fill.nta f103=[in0] |
---|
737 | ldf.fill.nta f111=[ r3] |
---|
738 | ldf.fill.nta f119=[r14] |
---|
739 | ldf.fill.nta f127=[r15] |
---|
740 | br.ret.sptk.many rp |
---|
741 | END(__ia64_load_fpu) |
---|
742 | |
---|
743 | GLOBAL_ENTRY(__ia64_init_fpu) |
---|
744 | stf.spill [sp]=f0 // M3 |
---|
745 | mov f32=f0 // F |
---|
746 | nop.b 0 |
---|
747 | |
---|
748 | ldfps f33,f34=[sp] // M0 |
---|
749 | ldfps f35,f36=[sp] // M1 |
---|
750 | mov f37=f0 // F |
---|
751 | ;; |
---|
752 | |
---|
753 | setf.s f38=r0 // M2 |
---|
754 | setf.s f39=r0 // M3 |
---|
755 | mov f40=f0 // F |
---|
756 | |
---|
757 | ldfps f41,f42=[sp] // M0 |
---|
758 | ldfps f43,f44=[sp] // M1 |
---|
759 | mov f45=f0 // F |
---|
760 | |
---|
761 | setf.s f46=r0 // M2 |
---|
762 | setf.s f47=r0 // M3 |
---|
763 | mov f48=f0 // F |
---|
764 | |
---|
765 | ldfps f49,f50=[sp] // M0 |
---|
766 | ldfps f51,f52=[sp] // M1 |
---|
767 | mov f53=f0 // F |
---|
768 | |
---|
769 | setf.s f54=r0 // M2 |
---|
770 | setf.s f55=r0 // M3 |
---|
771 | mov f56=f0 // F |
---|
772 | |
---|
773 | ldfps f57,f58=[sp] // M0 |
---|
774 | ldfps f59,f60=[sp] // M1 |
---|
775 | mov f61=f0 // F |
---|
776 | |
---|
777 | setf.s f62=r0 // M2 |
---|
778 | setf.s f63=r0 // M3 |
---|
779 | mov f64=f0 // F |
---|
780 | |
---|
781 | ldfps f65,f66=[sp] // M0 |
---|
782 | ldfps f67,f68=[sp] // M1 |
---|
783 | mov f69=f0 // F |
---|
784 | |
---|
785 | setf.s f70=r0 // M2 |
---|
786 | setf.s f71=r0 // M3 |
---|
787 | mov f72=f0 // F |
---|
788 | |
---|
789 | ldfps f73,f74=[sp] // M0 |
---|
790 | ldfps f75,f76=[sp] // M1 |
---|
791 | mov f77=f0 // F |
---|
792 | |
---|
793 | setf.s f78=r0 // M2 |
---|
794 | setf.s f79=r0 // M3 |
---|
795 | mov f80=f0 // F |
---|
796 | |
---|
797 | ldfps f81,f82=[sp] // M0 |
---|
798 | ldfps f83,f84=[sp] // M1 |
---|
799 | mov f85=f0 // F |
---|
800 | |
---|
801 | setf.s f86=r0 // M2 |
---|
802 | setf.s f87=r0 // M3 |
---|
803 | mov f88=f0 // F |
---|
804 | |
---|
805 | /* |
---|
806 | * When the instructions are cached, it would be faster to initialize |
---|
807 | * the remaining registers with simply mov instructions (F-unit). |
---|
808 | * This gets the time down to ~29 cycles. However, this would use up |
---|
809 | * 33 bundles, whereas continuing with the above pattern yields |
---|
810 | * 10 bundles and ~30 cycles. |
---|
811 | */ |
---|
812 | |
---|
813 | ldfps f89,f90=[sp] // M0 |
---|
814 | ldfps f91,f92=[sp] // M1 |
---|
815 | mov f93=f0 // F |
---|
816 | |
---|
817 | setf.s f94=r0 // M2 |
---|
818 | setf.s f95=r0 // M3 |
---|
819 | mov f96=f0 // F |
---|
820 | |
---|
821 | ldfps f97,f98=[sp] // M0 |
---|
822 | ldfps f99,f100=[sp] // M1 |
---|
823 | mov f101=f0 // F |
---|
824 | |
---|
825 | setf.s f102=r0 // M2 |
---|
826 | setf.s f103=r0 // M3 |
---|
827 | mov f104=f0 // F |
---|
828 | |
---|
829 | ldfps f105,f106=[sp] // M0 |
---|
830 | ldfps f107,f108=[sp] // M1 |
---|
831 | mov f109=f0 // F |
---|
832 | |
---|
833 | setf.s f110=r0 // M2 |
---|
834 | setf.s f111=r0 // M3 |
---|
835 | mov f112=f0 // F |
---|
836 | |
---|
837 | ldfps f113,f114=[sp] // M0 |
---|
838 | ldfps f115,f116=[sp] // M1 |
---|
839 | mov f117=f0 // F |
---|
840 | |
---|
841 | setf.s f118=r0 // M2 |
---|
842 | setf.s f119=r0 // M3 |
---|
843 | mov f120=f0 // F |
---|
844 | |
---|
845 | ldfps f121,f122=[sp] // M0 |
---|
846 | ldfps f123,f124=[sp] // M1 |
---|
847 | mov f125=f0 // F |
---|
848 | |
---|
849 | setf.s f126=r0 // M2 |
---|
850 | setf.s f127=r0 // M3 |
---|
851 | br.ret.sptk.many rp // F |
---|
852 | END(__ia64_init_fpu) |
---|
853 | |
---|
854 | /* |
---|
855 | * Switch execution mode from virtual to physical |
---|
856 | * |
---|
857 | * Inputs: |
---|
858 | * r16 = new psr to establish |
---|
859 | * Output: |
---|
860 | * r19 = old virtual address of ar.bsp |
---|
861 | * r20 = old virtual address of sp |
---|
862 | * |
---|
863 | * Note: RSE must already be in enforced lazy mode |
---|
864 | */ |
---|
865 | GLOBAL_ENTRY(ia64_switch_mode_phys) |
---|
866 | { |
---|
867 | rsm psr.i | psr.ic // disable interrupts and interrupt collection |
---|
868 | mov r15=ip |
---|
869 | } |
---|
870 | ;; |
---|
871 | { |
---|
872 | flushrs // must be first insn in group |
---|
873 | srlz.i |
---|
874 | } |
---|
875 | ;; |
---|
876 | mov cr.ipsr=r16 // set new PSR |
---|
877 | add r3=1f-ia64_switch_mode_phys,r15 |
---|
878 | |
---|
879 | mov r19=ar.bsp |
---|
880 | mov r20=sp |
---|
881 | mov r14=rp // get return address into a general register |
---|
882 | ;; |
---|
883 | |
---|
884 | // going to physical mode, use tpa to translate virt->phys |
---|
885 | tpa r17=r19 |
---|
886 | tpa r3=r3 |
---|
887 | tpa sp=sp |
---|
888 | tpa r14=r14 |
---|
889 | ;; |
---|
890 | |
---|
891 | mov r18=ar.rnat // save ar.rnat |
---|
892 | mov ar.bspstore=r17 // this steps on ar.rnat |
---|
893 | mov cr.iip=r3 |
---|
894 | mov cr.ifs=r0 |
---|
895 | ;; |
---|
896 | mov ar.rnat=r18 // restore ar.rnat |
---|
897 | rfi // must be last insn in group |
---|
898 | ;; |
---|
899 | 1: mov rp=r14 |
---|
900 | br.ret.sptk.many rp |
---|
901 | END(ia64_switch_mode_phys) |
---|
902 | |
---|
903 | /* |
---|
904 | * Switch execution mode from physical to virtual |
---|
905 | * |
---|
906 | * Inputs: |
---|
907 | * r16 = new psr to establish |
---|
908 | * r19 = new bspstore to establish |
---|
909 | * r20 = new sp to establish |
---|
910 | * |
---|
911 | * Note: RSE must already be in enforced lazy mode |
---|
912 | */ |
---|
913 | GLOBAL_ENTRY(ia64_switch_mode_virt) |
---|
914 | { |
---|
915 | rsm psr.i | psr.ic // disable interrupts and interrupt collection |
---|
916 | mov r15=ip |
---|
917 | } |
---|
918 | ;; |
---|
919 | { |
---|
920 | flushrs // must be first insn in group |
---|
921 | srlz.i |
---|
922 | } |
---|
923 | ;; |
---|
924 | mov cr.ipsr=r16 // set new PSR |
---|
925 | add r3=1f-ia64_switch_mode_virt,r15 |
---|
926 | |
---|
927 | mov r14=rp // get return address into a general register |
---|
928 | ;; |
---|
929 | |
---|
930 | // going to virtual |
---|
931 | // - for code addresses, set upper bits of addr to KERNEL_START |
---|
932 | // - for stack addresses, copy from input argument |
---|
933 | movl r18=KERNEL_START |
---|
934 | dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT |
---|
935 | dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT |
---|
936 | mov sp=r20 |
---|
937 | ;; |
---|
938 | or r3=r3,r18 |
---|
939 | or r14=r14,r18 |
---|
940 | ;; |
---|
941 | |
---|
942 | mov r18=ar.rnat // save ar.rnat |
---|
943 | mov ar.bspstore=r19 // this steps on ar.rnat |
---|
944 | mov cr.iip=r3 |
---|
945 | mov cr.ifs=r0 |
---|
946 | ;; |
---|
947 | mov ar.rnat=r18 // restore ar.rnat |
---|
948 | rfi // must be last insn in group |
---|
949 | ;; |
---|
950 | 1: mov rp=r14 |
---|
951 | br.ret.sptk.many rp |
---|
952 | END(ia64_switch_mode_virt) |
---|
953 | |
---|
954 | GLOBAL_ENTRY(ia64_delay_loop) |
---|
955 | .prologue |
---|
956 | { nop 0 // work around GAS unwind info generation bug... |
---|
957 | .save ar.lc,r2 |
---|
958 | mov r2=ar.lc |
---|
959 | .body |
---|
960 | ;; |
---|
961 | mov ar.lc=r32 |
---|
962 | } |
---|
963 | ;; |
---|
964 | // force loop to be 32-byte aligned (GAS bug means we cannot use .align |
---|
965 | // inside function body without corrupting unwind info). |
---|
966 | { nop 0 } |
---|
967 | 1: br.cloop.sptk.few 1b |
---|
968 | ;; |
---|
969 | mov ar.lc=r2 |
---|
970 | br.ret.sptk.many rp |
---|
971 | END(ia64_delay_loop) |
---|
972 | |
---|
973 | /* |
---|
974 | * Return a CPU-local timestamp in nano-seconds. This timestamp is |
---|
975 | * NOT synchronized across CPUs its return value must never be |
---|
976 | * compared against the values returned on another CPU. The usage in |
---|
977 | * kernel/sched.c ensures that. |
---|
978 | * |
---|
979 | * The return-value of sched_clock() is NOT supposed to wrap-around. |
---|
980 | * If it did, it would cause some scheduling hiccups (at the worst). |
---|
981 | * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even |
---|
982 | * that would happen only once every 5+ years. |
---|
983 | * |
---|
984 | * The code below basically calculates: |
---|
985 | * |
---|
986 | * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT |
---|
987 | * |
---|
988 | * except that the multiplication and the shift are done with 128-bit |
---|
989 | * intermediate precision so that we can produce a full 64-bit result. |
---|
990 | */ |
---|
991 | GLOBAL_ENTRY(sched_clock) |
---|
992 | addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 |
---|
993 | mov.m r9=ar.itc // fetch cycle-counter (35 cyc) |
---|
994 | ;; |
---|
995 | ldf8 f8=[r8] |
---|
996 | ;; |
---|
997 | setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... |
---|
998 | ;; |
---|
999 | xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) |
---|
1000 | xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product |
---|
1001 | ;; |
---|
1002 | getf.sig r8=f10 // (5 cyc) |
---|
1003 | getf.sig r9=f11 |
---|
1004 | ;; |
---|
1005 | shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT |
---|
1006 | br.ret.sptk.many rp |
---|
1007 | END(sched_clock) |
---|
1008 | |
---|
1009 | GLOBAL_ENTRY(start_kernel_thread) |
---|
1010 | .prologue |
---|
1011 | .save rp, r0 // this is the end of the call-chain |
---|
1012 | .body |
---|
1013 | alloc r2 = ar.pfs, 0, 0, 2, 0 |
---|
1014 | mov out0 = r9 |
---|
1015 | mov out1 = r11;; |
---|
1016 | br.call.sptk.many rp = kernel_thread_helper;; |
---|
1017 | mov out0 = r8 |
---|
1018 | br.call.sptk.many rp = sys_exit;; |
---|
1019 | 1: br.sptk.few 1b // not reached |
---|
1020 | END(start_kernel_thread) |
---|
1021 | |
---|
1022 | #ifdef CONFIG_IA64_BRL_EMU |
---|
1023 | |
---|
1024 | /* |
---|
1025 | * Assembly routines used by brl_emu.c to set preserved register state. |
---|
1026 | */ |
---|
1027 | |
---|
1028 | #define SET_REG(reg) \ |
---|
1029 | GLOBAL_ENTRY(ia64_set_##reg); \ |
---|
1030 | alloc r16=ar.pfs,1,0,0,0; \ |
---|
1031 | mov reg=r32; \ |
---|
1032 | ;; \ |
---|
1033 | br.ret.sptk.many rp; \ |
---|
1034 | END(ia64_set_##reg) |
---|
1035 | |
---|
1036 | SET_REG(b1); |
---|
1037 | SET_REG(b2); |
---|
1038 | SET_REG(b3); |
---|
1039 | SET_REG(b4); |
---|
1040 | SET_REG(b5); |
---|
1041 | |
---|
1042 | #endif /* CONFIG_IA64_BRL_EMU */ |
---|
1043 | |
---|
1044 | #ifdef CONFIG_SMP |
---|
1045 | /* |
---|
1046 | * This routine handles spinlock contention. It uses a non-standard calling |
---|
1047 | * convention to avoid converting leaf routines into interior routines. Because |
---|
1048 | * of this special convention, there are several restrictions: |
---|
1049 | * |
---|
1050 | * - do not use gp relative variables, this code is called from the kernel |
---|
1051 | * and from modules, r1 is undefined. |
---|
1052 | * - do not use stacked registers, the caller owns them. |
---|
1053 | * - do not use the scratch stack space, the caller owns it. |
---|
1054 | * - do not use any registers other than the ones listed below |
---|
1055 | * |
---|
1056 | * Inputs: |
---|
1057 | * ar.pfs - saved CFM of caller |
---|
1058 | * ar.ccv - 0 (and available for use) |
---|
1059 | * r27 - flags from spin_lock_irqsave or 0. Must be preserved. |
---|
1060 | * r28 - available for use. |
---|
1061 | * r29 - available for use. |
---|
1062 | * r30 - available for use. |
---|
1063 | * r31 - address of lock, available for use. |
---|
1064 | * b6 - return address |
---|
1065 | * p14 - available for use. |
---|
1066 | * p15 - used to track flag status. |
---|
1067 | * |
---|
1068 | * If you patch this code to use more registers, do not forget to update |
---|
1069 | * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. |
---|
1070 | */ |
---|
1071 | |
---|
1072 | #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) |
---|
1073 | |
---|
1074 | GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) |
---|
1075 | .prologue |
---|
1076 | .save ar.pfs, r0 // this code effectively has a zero frame size |
---|
1077 | .save rp, r28 |
---|
1078 | .body |
---|
1079 | nop 0 |
---|
1080 | tbit.nz p15,p0=r27,IA64_PSR_I_BIT |
---|
1081 | .restore sp // pop existing prologue after next insn |
---|
1082 | mov b6 = r28 |
---|
1083 | .prologue |
---|
1084 | .save ar.pfs, r0 |
---|
1085 | .altrp b6 |
---|
1086 | .body |
---|
1087 | ;; |
---|
1088 | (p15) ssm psr.i // reenable interrupts if they were on |
---|
1089 | // DavidM says that srlz.d is slow and is not required in this case |
---|
1090 | .wait: |
---|
1091 | // exponential backoff, kdb, lockmeter etc. go in here |
---|
1092 | hint @pause |
---|
1093 | ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word |
---|
1094 | nop 0 |
---|
1095 | ;; |
---|
1096 | cmp4.ne p14,p0=r30,r0 |
---|
1097 | (p14) br.cond.sptk.few .wait |
---|
1098 | (p15) rsm psr.i // disable interrupts if we reenabled them |
---|
1099 | br.cond.sptk.few b6 // lock is now free, try to acquire |
---|
1100 | .global ia64_spinlock_contention_pre3_4_end // for kernprof |
---|
1101 | ia64_spinlock_contention_pre3_4_end: |
---|
1102 | END(ia64_spinlock_contention_pre3_4) |
---|
1103 | |
---|
1104 | #else |
---|
1105 | |
---|
1106 | GLOBAL_ENTRY(ia64_spinlock_contention) |
---|
1107 | .prologue |
---|
1108 | .altrp b6 |
---|
1109 | .body |
---|
1110 | tbit.nz p15,p0=r27,IA64_PSR_I_BIT |
---|
1111 | ;; |
---|
1112 | .wait: |
---|
1113 | (p15) ssm psr.i // reenable interrupts if they were on |
---|
1114 | // DavidM says that srlz.d is slow and is not required in this case |
---|
1115 | .wait2: |
---|
1116 | // exponential backoff, kdb, lockmeter etc. go in here |
---|
1117 | hint @pause |
---|
1118 | ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word |
---|
1119 | ;; |
---|
1120 | cmp4.ne p14,p0=r30,r0 |
---|
1121 | mov r30 = 1 |
---|
1122 | (p14) br.cond.sptk.few .wait2 |
---|
1123 | (p15) rsm psr.i // disable interrupts if we reenabled them |
---|
1124 | ;; |
---|
1125 | cmpxchg4.acq r30=[r31], r30, ar.ccv |
---|
1126 | ;; |
---|
1127 | cmp4.ne p14,p0=r0,r30 |
---|
1128 | (p14) br.cond.sptk.few .wait |
---|
1129 | |
---|
1130 | br.ret.sptk.many b6 // lock is now taken |
---|
1131 | END(ia64_spinlock_contention) |
---|
1132 | |
---|
1133 | #endif |
---|
1134 | |
---|
1135 | #ifdef CONFIG_HOTPLUG_CPU |
---|
1136 | GLOBAL_ENTRY(ia64_jump_to_sal) |
---|
1137 | alloc r16=ar.pfs,1,0,0,0;; |
---|
1138 | rsm psr.i | psr.ic |
---|
1139 | { |
---|
1140 | flushrs |
---|
1141 | srlz.i |
---|
1142 | } |
---|
1143 | tpa r25=in0 |
---|
1144 | movl r18=tlb_purge_done;; |
---|
1145 | DATA_VA_TO_PA(r18);; |
---|
1146 | mov b1=r18 // Return location |
---|
1147 | movl r18=ia64_do_tlb_purge;; |
---|
1148 | DATA_VA_TO_PA(r18);; |
---|
1149 | mov b2=r18 // doing tlb_flush work |
---|
1150 | mov ar.rsc=0 // Put RSE in enforced lazy, LE mode |
---|
1151 | movl r17=1f;; |
---|
1152 | DATA_VA_TO_PA(r17);; |
---|
1153 | mov cr.iip=r17 |
---|
1154 | movl r16=SAL_PSR_BITS_TO_SET;; |
---|
1155 | mov cr.ipsr=r16 |
---|
1156 | mov cr.ifs=r0;; |
---|
1157 | rfi;; |
---|
1158 | 1: |
---|
1159 | /* |
---|
1160 | * Invalidate all TLB data/inst |
---|
1161 | */ |
---|
1162 | br.sptk.many b2;; // jump to tlb purge code |
---|
1163 | |
---|
1164 | tlb_purge_done: |
---|
1165 | RESTORE_REGION_REGS(r25, r17,r18,r19);; |
---|
1166 | RESTORE_REG(b0, r25, r17);; |
---|
1167 | RESTORE_REG(b1, r25, r17);; |
---|
1168 | RESTORE_REG(b2, r25, r17);; |
---|
1169 | RESTORE_REG(b3, r25, r17);; |
---|
1170 | RESTORE_REG(b4, r25, r17);; |
---|
1171 | RESTORE_REG(b5, r25, r17);; |
---|
1172 | ld8 r1=[r25],0x08;; |
---|
1173 | ld8 r12=[r25],0x08;; |
---|
1174 | ld8 r13=[r25],0x08;; |
---|
1175 | RESTORE_REG(ar.fpsr, r25, r17);; |
---|
1176 | RESTORE_REG(ar.pfs, r25, r17);; |
---|
1177 | RESTORE_REG(ar.rnat, r25, r17);; |
---|
1178 | RESTORE_REG(ar.unat, r25, r17);; |
---|
1179 | RESTORE_REG(ar.bspstore, r25, r17);; |
---|
1180 | RESTORE_REG(cr.dcr, r25, r17);; |
---|
1181 | RESTORE_REG(cr.iva, r25, r17);; |
---|
1182 | RESTORE_REG(cr.pta, r25, r17);; |
---|
1183 | RESTORE_REG(cr.itv, r25, r17);; |
---|
1184 | RESTORE_REG(cr.pmv, r25, r17);; |
---|
1185 | RESTORE_REG(cr.cmcv, r25, r17);; |
---|
1186 | RESTORE_REG(cr.lrr0, r25, r17);; |
---|
1187 | RESTORE_REG(cr.lrr1, r25, r17);; |
---|
1188 | ld8 r4=[r25],0x08;; |
---|
1189 | ld8 r5=[r25],0x08;; |
---|
1190 | ld8 r6=[r25],0x08;; |
---|
1191 | ld8 r7=[r25],0x08;; |
---|
1192 | ld8 r17=[r25],0x08;; |
---|
1193 | mov pr=r17,-1;; |
---|
1194 | RESTORE_REG(ar.lc, r25, r17);; |
---|
1195 | /* |
---|
1196 | * Now Restore floating point regs |
---|
1197 | */ |
---|
1198 | ldf.fill.nta f2=[r25],16;; |
---|
1199 | ldf.fill.nta f3=[r25],16;; |
---|
1200 | ldf.fill.nta f4=[r25],16;; |
---|
1201 | ldf.fill.nta f5=[r25],16;; |
---|
1202 | ldf.fill.nta f16=[r25],16;; |
---|
1203 | ldf.fill.nta f17=[r25],16;; |
---|
1204 | ldf.fill.nta f18=[r25],16;; |
---|
1205 | ldf.fill.nta f19=[r25],16;; |
---|
1206 | ldf.fill.nta f20=[r25],16;; |
---|
1207 | ldf.fill.nta f21=[r25],16;; |
---|
1208 | ldf.fill.nta f22=[r25],16;; |
---|
1209 | ldf.fill.nta f23=[r25],16;; |
---|
1210 | ldf.fill.nta f24=[r25],16;; |
---|
1211 | ldf.fill.nta f25=[r25],16;; |
---|
1212 | ldf.fill.nta f26=[r25],16;; |
---|
1213 | ldf.fill.nta f27=[r25],16;; |
---|
1214 | ldf.fill.nta f28=[r25],16;; |
---|
1215 | ldf.fill.nta f29=[r25],16;; |
---|
1216 | ldf.fill.nta f30=[r25],16;; |
---|
1217 | ldf.fill.nta f31=[r25],16;; |
---|
1218 | |
---|
1219 | /* |
---|
1220 | * Now that we have done all the register restores |
---|
1221 | * we are now ready for the big DIVE to SAL Land |
---|
1222 | */ |
---|
1223 | ssm psr.ic;; |
---|
1224 | srlz.d;; |
---|
1225 | br.ret.sptk.many b0;; |
---|
1226 | END(ia64_jump_to_sal) |
---|
1227 | #endif /* CONFIG_HOTPLUG_CPU */ |
---|
1228 | |
---|
1229 | #endif /* CONFIG_SMP */ |
---|