1 | /* |
---|
2 | * linux/arch/x86_64/entry.S |
---|
3 | * |
---|
4 | * Copyright (C) 1991, 1992 Linus Torvalds |
---|
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs |
---|
6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
---|
7 | * |
---|
8 | * $Id$ |
---|
9 | * |
---|
10 | * Jun Nakajima <jun.nakajima@intel.com> |
---|
11 | * Asit Mallick <asit.k.mallick@intel.com> |
---|
12 | * Modified for Xen |
---|
13 | */ |
---|
14 | |
---|
15 | /* |
---|
16 | * entry.S contains the system-call and fault low-level handling routines. |
---|
17 | * |
---|
18 | * NOTE: This code handles signal-recognition, which happens every time |
---|
19 | * after an interrupt and after each system call. |
---|
20 | * |
---|
21 | * Normal syscalls and interrupts don't save a full stack frame, this is |
---|
22 | * only done for syscall tracing, signals or fork/exec et.al. |
---|
23 | * |
---|
24 | * A note on terminology: |
---|
25 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
---|
26 | * at the top of the kernel process stack. |
---|
27 | * - partial stack frame: partially saved registers upto R11. |
---|
28 | * - full stack frame: Like partial stack frame, but all register saved. |
---|
29 | * |
---|
30 | * TODO: |
---|
31 | * - schedule it carefully for the final hardware. |
---|
32 | */ |
---|
33 | |
---|
34 | #define ASSEMBLY 1 |
---|
35 | #include <linux/linkage.h> |
---|
36 | #include <asm/segment.h> |
---|
37 | #include <asm/smp.h> |
---|
38 | #include <asm/cache.h> |
---|
39 | #include <asm/errno.h> |
---|
40 | #include <asm/dwarf2.h> |
---|
41 | #include <asm/calling.h> |
---|
42 | #include <asm/asm-offsets.h> |
---|
43 | #include <asm/msr.h> |
---|
44 | #include <asm/unistd.h> |
---|
45 | #include <asm/thread_info.h> |
---|
46 | #include <asm/hw_irq.h> |
---|
47 | #include <asm/page.h> |
---|
48 | #include <asm/irqflags.h> |
---|
49 | #include <asm/errno.h> |
---|
50 | #include <xen/interface/arch-x86_64.h> |
---|
51 | #include <xen/interface/features.h> |
---|
52 | |
---|
53 | #include "irq_vectors.h" |
---|
54 | |
---|
55 | #include "xen_entry.S" |
---|
56 | |
---|
57 | .code64 |
---|
58 | |
---|
59 | #ifndef CONFIG_PREEMPT |
---|
60 | #define retint_kernel retint_restore_args |
---|
61 | #endif |
---|
62 | |
---|
63 | |
---|
64 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET |
---|
65 | #ifdef CONFIG_TRACE_IRQFLAGS |
---|
66 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ |
---|
67 | jnc 1f |
---|
68 | TRACE_IRQS_ON |
---|
69 | 1: |
---|
70 | #endif |
---|
71 | .endm |
---|
72 | |
---|
73 | NMI_MASK = 0x80000000 |
---|
74 | |
---|
75 | /* |
---|
76 | * C code is not supposed to know about undefined top of stack. Every time |
---|
77 | * a C function with an pt_regs argument is called from the SYSCALL based |
---|
78 | * fast path FIXUP_TOP_OF_STACK is needed. |
---|
79 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
---|
80 | * manipulation. |
---|
81 | */ |
---|
82 | |
---|
83 | /* %rsp:at FRAMEEND */ |
---|
84 | .macro FIXUP_TOP_OF_STACK tmp |
---|
85 | movq $__USER_CS,CS(%rsp) |
---|
86 | movq $-1,RCX(%rsp) |
---|
87 | .endm |
---|
88 | |
---|
89 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 |
---|
90 | .endm |
---|
91 | |
---|
92 | .macro FAKE_STACK_FRAME child_rip |
---|
93 | /* push in order ss, rsp, eflags, cs, rip */ |
---|
94 | xorl %eax, %eax |
---|
95 | pushq %rax /* ss */ |
---|
96 | CFI_ADJUST_CFA_OFFSET 8 |
---|
97 | /*CFI_REL_OFFSET ss,0*/ |
---|
98 | pushq %rax /* rsp */ |
---|
99 | CFI_ADJUST_CFA_OFFSET 8 |
---|
100 | CFI_REL_OFFSET rsp,0 |
---|
101 | pushq $(1<<9) /* eflags - interrupts on */ |
---|
102 | CFI_ADJUST_CFA_OFFSET 8 |
---|
103 | /*CFI_REL_OFFSET rflags,0*/ |
---|
104 | pushq $__KERNEL_CS /* cs */ |
---|
105 | CFI_ADJUST_CFA_OFFSET 8 |
---|
106 | /*CFI_REL_OFFSET cs,0*/ |
---|
107 | pushq \child_rip /* rip */ |
---|
108 | CFI_ADJUST_CFA_OFFSET 8 |
---|
109 | CFI_REL_OFFSET rip,0 |
---|
110 | pushq %rax /* orig rax */ |
---|
111 | CFI_ADJUST_CFA_OFFSET 8 |
---|
112 | .endm |
---|
113 | |
---|
114 | .macro UNFAKE_STACK_FRAME |
---|
115 | addq $8*6, %rsp |
---|
116 | CFI_ADJUST_CFA_OFFSET -(6*8) |
---|
117 | .endm |
---|
118 | |
---|
119 | .macro CFI_DEFAULT_STACK start=1,adj=0 |
---|
120 | .if \start |
---|
121 | CFI_STARTPROC simple |
---|
122 | CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET) |
---|
123 | .else |
---|
124 | CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET) |
---|
125 | .endif |
---|
126 | .if \adj == 0 |
---|
127 | CFI_REL_OFFSET r15,R15 |
---|
128 | CFI_REL_OFFSET r14,R14 |
---|
129 | CFI_REL_OFFSET r13,R13 |
---|
130 | CFI_REL_OFFSET r12,R12 |
---|
131 | CFI_REL_OFFSET rbp,RBP |
---|
132 | CFI_REL_OFFSET rbx,RBX |
---|
133 | .endif |
---|
134 | CFI_REL_OFFSET r11,R11 |
---|
135 | CFI_REL_OFFSET r10,R10 |
---|
136 | CFI_REL_OFFSET r9,R9 |
---|
137 | CFI_REL_OFFSET r8,R8 |
---|
138 | CFI_REL_OFFSET rax,RAX |
---|
139 | CFI_REL_OFFSET rcx,RCX |
---|
140 | CFI_REL_OFFSET rdx,RDX |
---|
141 | CFI_REL_OFFSET rsi,RSI |
---|
142 | CFI_REL_OFFSET rdi,RDI |
---|
143 | CFI_REL_OFFSET rip,RIP |
---|
144 | /*CFI_REL_OFFSET cs,CS*/ |
---|
145 | /*CFI_REL_OFFSET rflags,EFLAGS*/ |
---|
146 | CFI_REL_OFFSET rsp,RSP |
---|
147 | /*CFI_REL_OFFSET ss,SS*/ |
---|
148 | .endm |
---|
149 | |
---|
150 | /* |
---|
151 | * Must be consistent with the definition in arch-x86/xen-x86_64.h: |
---|
152 | * struct iret_context { |
---|
153 | * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; |
---|
154 | * }; |
---|
155 | * with rax, r11, and rcx being taken care of in the hypercall stub. |
---|
156 | */ |
---|
157 | .macro HYPERVISOR_IRET flag |
---|
158 | testb $3,1*8(%rsp) |
---|
159 | jnz 2f |
---|
160 | testl $NMI_MASK,2*8(%rsp) |
---|
161 | jnz 2f |
---|
162 | |
---|
163 | cmpb $0,(xen_features+XENFEAT_supervisor_mode_kernel)(%rip) |
---|
164 | jne 1f |
---|
165 | |
---|
166 | /* Direct iret to kernel space. Correct CS and SS. */ |
---|
167 | orl $3,1*8(%rsp) |
---|
168 | orl $3,4*8(%rsp) |
---|
169 | 1: iretq |
---|
170 | |
---|
171 | 2: /* Slow iret via hypervisor. */ |
---|
172 | andl $~NMI_MASK, 2*8(%rsp) |
---|
173 | pushq $\flag |
---|
174 | jmp hypercall_page + (__HYPERVISOR_iret * 32) |
---|
175 | .endm |
---|
176 | |
---|
177 | /* |
---|
178 | * A newly forked process directly context switches into this. |
---|
179 | */ |
---|
180 | /* rdi: prev */ |
---|
181 | ENTRY(ret_from_fork) |
---|
182 | CFI_DEFAULT_STACK |
---|
183 | call schedule_tail |
---|
184 | GET_THREAD_INFO(%rcx) |
---|
185 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) |
---|
186 | jnz rff_trace |
---|
187 | rff_action: |
---|
188 | RESTORE_REST |
---|
189 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? |
---|
190 | je int_ret_from_sys_call |
---|
191 | testl $_TIF_IA32,threadinfo_flags(%rcx) |
---|
192 | jnz int_ret_from_sys_call |
---|
193 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET |
---|
194 | jmp ret_from_sys_call |
---|
195 | rff_trace: |
---|
196 | movq %rsp,%rdi |
---|
197 | call syscall_trace_leave |
---|
198 | GET_THREAD_INFO(%rcx) |
---|
199 | jmp rff_action |
---|
200 | CFI_ENDPROC |
---|
201 | END(ret_from_fork) |
---|
202 | |
---|
203 | /* |
---|
204 | * initial frame state for interrupts and exceptions |
---|
205 | */ |
---|
206 | .macro _frame ref |
---|
207 | CFI_STARTPROC simple |
---|
208 | CFI_DEF_CFA rsp,SS+8-\ref |
---|
209 | /*CFI_REL_OFFSET ss,SS-\ref*/ |
---|
210 | CFI_REL_OFFSET rsp,RSP-\ref |
---|
211 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ |
---|
212 | /*CFI_REL_OFFSET cs,CS-\ref*/ |
---|
213 | CFI_REL_OFFSET rip,RIP-\ref |
---|
214 | .endm |
---|
215 | |
---|
216 | /* |
---|
217 | * System call entry. Upto 6 arguments in registers are supported. |
---|
218 | * |
---|
219 | * SYSCALL does not save anything on the stack and does not change the |
---|
220 | * stack pointer. |
---|
221 | */ |
---|
222 | |
---|
223 | /* |
---|
224 | * Register setup: |
---|
225 | * rax system call number |
---|
226 | * rdi arg0 |
---|
227 | * rcx return address for syscall/sysret, C arg3 |
---|
228 | * rsi arg1 |
---|
229 | * rdx arg2 |
---|
230 | * r10 arg3 (--> moved to rcx for C) |
---|
231 | * r8 arg4 |
---|
232 | * r9 arg5 |
---|
233 | * r11 eflags for syscall/sysret, temporary for C |
---|
234 | * r12-r15,rbp,rbx saved by C code, not touched. |
---|
235 | * |
---|
236 | * Interrupts are off on entry. |
---|
237 | * Only called from user space. |
---|
238 | * |
---|
239 | * XXX if we had a free scratch register we could save the RSP into the stack frame |
---|
240 | * and report it properly in ps. Unfortunately we haven't. |
---|
241 | * |
---|
242 | * When user can change the frames always force IRET. That is because |
---|
243 | * it deals with uncanonical addresses better. SYSRET has trouble |
---|
244 | * with them due to bugs in both AMD and Intel CPUs. |
---|
245 | */ |
---|
246 | |
---|
247 | ENTRY(system_call) |
---|
248 | _frame (RIP-0x10) |
---|
249 | SAVE_ARGS -8,0 |
---|
250 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
---|
251 | /* |
---|
252 | * No need to follow this irqs off/on section - it's straight |
---|
253 | * and short: |
---|
254 | */ |
---|
255 | XEN_UNBLOCK_EVENTS(%r11) |
---|
256 | GET_THREAD_INFO(%rcx) |
---|
257 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) |
---|
258 | CFI_REMEMBER_STATE |
---|
259 | jnz tracesys |
---|
260 | cmpq $__NR_syscall_max,%rax |
---|
261 | ja badsys |
---|
262 | movq %r10,%rcx |
---|
263 | call *sys_call_table(,%rax,8) # XXX: rip relative |
---|
264 | movq %rax,RAX-ARGOFFSET(%rsp) |
---|
265 | /* |
---|
266 | * Syscall return path ending with SYSRET (fast path) |
---|
267 | * Has incomplete stack frame and undefined top of stack. |
---|
268 | */ |
---|
269 | .globl ret_from_sys_call |
---|
270 | ret_from_sys_call: |
---|
271 | movl $_TIF_ALLWORK_MASK,%edi |
---|
272 | /* edi: flagmask */ |
---|
273 | sysret_check: |
---|
274 | GET_THREAD_INFO(%rcx) |
---|
275 | XEN_BLOCK_EVENTS(%rsi) |
---|
276 | TRACE_IRQS_OFF |
---|
277 | movl threadinfo_flags(%rcx),%edx |
---|
278 | andl %edi,%edx |
---|
279 | CFI_REMEMBER_STATE |
---|
280 | jnz sysret_careful |
---|
281 | /* |
---|
282 | * sysretq will re-enable interrupts: |
---|
283 | */ |
---|
284 | TRACE_IRQS_ON |
---|
285 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
286 | RESTORE_ARGS 0,8,0 |
---|
287 | HYPERVISOR_IRET VGCF_IN_SYSCALL |
---|
288 | |
---|
289 | /* Handle reschedules */ |
---|
290 | /* edx: work, edi: workmask */ |
---|
291 | sysret_careful: |
---|
292 | CFI_RESTORE_STATE |
---|
293 | bt $TIF_NEED_RESCHED,%edx |
---|
294 | jnc sysret_signal |
---|
295 | TRACE_IRQS_ON |
---|
296 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
297 | pushq %rdi |
---|
298 | CFI_ADJUST_CFA_OFFSET 8 |
---|
299 | call schedule |
---|
300 | popq %rdi |
---|
301 | CFI_ADJUST_CFA_OFFSET -8 |
---|
302 | jmp sysret_check |
---|
303 | |
---|
304 | /* Handle a signal */ |
---|
305 | sysret_signal: |
---|
306 | TRACE_IRQS_ON |
---|
307 | /* sti */ |
---|
308 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
309 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
---|
310 | jz 1f |
---|
311 | |
---|
312 | /* Really a signal */ |
---|
313 | /* edx: work flags (arg3) */ |
---|
314 | leaq do_notify_resume(%rip),%rax |
---|
315 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
---|
316 | xorl %esi,%esi # oldset -> arg2 |
---|
317 | call ptregscall_common |
---|
318 | 1: movl $_TIF_NEED_RESCHED,%edi |
---|
319 | /* Use IRET because user could have changed frame. This |
---|
320 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
---|
321 | XEN_BLOCK_EVENTS(%rsi) |
---|
322 | TRACE_IRQS_OFF |
---|
323 | jmp int_with_check |
---|
324 | |
---|
325 | badsys: |
---|
326 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
---|
327 | jmp ret_from_sys_call |
---|
328 | |
---|
329 | /* Do syscall tracing */ |
---|
330 | tracesys: |
---|
331 | CFI_RESTORE_STATE |
---|
332 | SAVE_REST |
---|
333 | movq $-ENOSYS,RAX(%rsp) |
---|
334 | FIXUP_TOP_OF_STACK %rdi |
---|
335 | movq %rsp,%rdi |
---|
336 | call syscall_trace_enter |
---|
337 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ |
---|
338 | RESTORE_REST |
---|
339 | cmpq $__NR_syscall_max,%rax |
---|
340 | ja 1f |
---|
341 | movq %r10,%rcx /* fixup for C */ |
---|
342 | call *sys_call_table(,%rax,8) |
---|
343 | 1: movq %rax,RAX-ARGOFFSET(%rsp) |
---|
344 | /* Use IRET because user could have changed frame */ |
---|
345 | jmp int_ret_from_sys_call |
---|
346 | CFI_ENDPROC |
---|
347 | END(system_call) |
---|
348 | |
---|
349 | /* |
---|
350 | * Syscall return path ending with IRET. |
---|
351 | * Has correct top of stack, but partial stack frame. |
---|
352 | */ |
---|
353 | ENTRY(int_ret_from_sys_call) |
---|
354 | CFI_STARTPROC simple |
---|
355 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET |
---|
356 | /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ |
---|
357 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
---|
358 | /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ |
---|
359 | /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ |
---|
360 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
---|
361 | CFI_REL_OFFSET rdx,RDX-ARGOFFSET |
---|
362 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET |
---|
363 | CFI_REL_OFFSET rax,RAX-ARGOFFSET |
---|
364 | CFI_REL_OFFSET rdi,RDI-ARGOFFSET |
---|
365 | CFI_REL_OFFSET rsi,RSI-ARGOFFSET |
---|
366 | CFI_REL_OFFSET r8,R8-ARGOFFSET |
---|
367 | CFI_REL_OFFSET r9,R9-ARGOFFSET |
---|
368 | CFI_REL_OFFSET r10,R10-ARGOFFSET |
---|
369 | CFI_REL_OFFSET r11,R11-ARGOFFSET |
---|
370 | XEN_BLOCK_EVENTS(%rsi) |
---|
371 | TRACE_IRQS_OFF |
---|
372 | testb $3,CS-ARGOFFSET(%rsp) |
---|
373 | jnz 1f |
---|
374 | /* Need to set the proper %ss (not NULL) for ring 3 iretq */ |
---|
375 | movl $__KERNEL_DS,SS-ARGOFFSET(%rsp) |
---|
376 | jmp retint_restore_args # retrun from ring3 kernel |
---|
377 | 1: |
---|
378 | movl $_TIF_ALLWORK_MASK,%edi |
---|
379 | /* edi: mask to check */ |
---|
380 | int_with_check: |
---|
381 | GET_THREAD_INFO(%rcx) |
---|
382 | movl threadinfo_flags(%rcx),%edx |
---|
383 | andl %edi,%edx |
---|
384 | jnz int_careful |
---|
385 | andl $~TS_COMPAT,threadinfo_status(%rcx) |
---|
386 | jmp retint_restore_args |
---|
387 | |
---|
388 | /* Either reschedule or signal or syscall exit tracking needed. */ |
---|
389 | /* First do a reschedule test. */ |
---|
390 | /* edx: work, edi: workmask */ |
---|
391 | int_careful: |
---|
392 | bt $TIF_NEED_RESCHED,%edx |
---|
393 | jnc int_very_careful |
---|
394 | TRACE_IRQS_ON |
---|
395 | /* sti */ |
---|
396 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
397 | pushq %rdi |
---|
398 | CFI_ADJUST_CFA_OFFSET 8 |
---|
399 | call schedule |
---|
400 | popq %rdi |
---|
401 | CFI_ADJUST_CFA_OFFSET -8 |
---|
402 | XEN_BLOCK_EVENTS(%rsi) |
---|
403 | TRACE_IRQS_OFF |
---|
404 | jmp int_with_check |
---|
405 | |
---|
406 | /* handle signals and tracing -- both require a full stack frame */ |
---|
407 | int_very_careful: |
---|
408 | TRACE_IRQS_ON |
---|
409 | /* sti */ |
---|
410 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
411 | SAVE_REST |
---|
412 | /* Check for syscall exit trace */ |
---|
413 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx |
---|
414 | jz int_signal |
---|
415 | pushq %rdi |
---|
416 | CFI_ADJUST_CFA_OFFSET 8 |
---|
417 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
---|
418 | call syscall_trace_leave |
---|
419 | popq %rdi |
---|
420 | CFI_ADJUST_CFA_OFFSET -8 |
---|
421 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
---|
422 | XEN_BLOCK_EVENTS(%rsi) |
---|
423 | TRACE_IRQS_OFF |
---|
424 | jmp int_restore_rest |
---|
425 | |
---|
426 | int_signal: |
---|
427 | testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx |
---|
428 | jz 1f |
---|
429 | movq %rsp,%rdi # &ptregs -> arg1 |
---|
430 | xorl %esi,%esi # oldset -> arg2 |
---|
431 | call do_notify_resume |
---|
432 | 1: movl $_TIF_NEED_RESCHED,%edi |
---|
433 | int_restore_rest: |
---|
434 | RESTORE_REST |
---|
435 | XEN_BLOCK_EVENTS(%rsi) |
---|
436 | TRACE_IRQS_OFF |
---|
437 | jmp int_with_check |
---|
438 | CFI_ENDPROC |
---|
439 | END(int_ret_from_sys_call) |
---|
440 | |
---|
441 | /* |
---|
442 | * Certain special system calls that need to save a complete full stack frame. |
---|
443 | */ |
---|
444 | |
---|
445 | .macro PTREGSCALL label,func,arg |
---|
446 | .globl \label |
---|
447 | \label: |
---|
448 | leaq \func(%rip),%rax |
---|
449 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ |
---|
450 | jmp ptregscall_common |
---|
451 | END(\label) |
---|
452 | .endm |
---|
453 | |
---|
454 | CFI_STARTPROC |
---|
455 | |
---|
456 | PTREGSCALL stub_clone, sys_clone, %r8 |
---|
457 | PTREGSCALL stub_fork, sys_fork, %rdi |
---|
458 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
---|
459 | PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx |
---|
460 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx |
---|
461 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
---|
462 | |
---|
463 | ENTRY(ptregscall_common) |
---|
464 | popq %r11 |
---|
465 | CFI_ADJUST_CFA_OFFSET -8 |
---|
466 | CFI_REGISTER rip, r11 |
---|
467 | SAVE_REST |
---|
468 | movq %r11, %r15 |
---|
469 | CFI_REGISTER rip, r15 |
---|
470 | FIXUP_TOP_OF_STACK %r11 |
---|
471 | call *%rax |
---|
472 | RESTORE_TOP_OF_STACK %r11 |
---|
473 | movq %r15, %r11 |
---|
474 | CFI_REGISTER rip, r11 |
---|
475 | RESTORE_REST |
---|
476 | pushq %r11 |
---|
477 | CFI_ADJUST_CFA_OFFSET 8 |
---|
478 | CFI_REL_OFFSET rip, 0 |
---|
479 | ret |
---|
480 | CFI_ENDPROC |
---|
481 | END(ptregscall_common) |
---|
482 | |
---|
483 | ENTRY(stub_execve) |
---|
484 | CFI_STARTPROC |
---|
485 | popq %r11 |
---|
486 | CFI_ADJUST_CFA_OFFSET -8 |
---|
487 | CFI_REGISTER rip, r11 |
---|
488 | SAVE_REST |
---|
489 | FIXUP_TOP_OF_STACK %r11 |
---|
490 | call sys_execve |
---|
491 | RESTORE_TOP_OF_STACK %r11 |
---|
492 | movq %rax,RAX(%rsp) |
---|
493 | RESTORE_REST |
---|
494 | jmp int_ret_from_sys_call |
---|
495 | CFI_ENDPROC |
---|
496 | END(stub_execve) |
---|
497 | |
---|
498 | /* |
---|
499 | * sigreturn is special because it needs to restore all registers on return. |
---|
500 | * This cannot be done with SYSRET, so use the IRET return path instead. |
---|
501 | */ |
---|
502 | ENTRY(stub_rt_sigreturn) |
---|
503 | CFI_STARTPROC |
---|
504 | addq $8, %rsp |
---|
505 | CFI_ADJUST_CFA_OFFSET -8 |
---|
506 | SAVE_REST |
---|
507 | movq %rsp,%rdi |
---|
508 | FIXUP_TOP_OF_STACK %r11 |
---|
509 | call sys_rt_sigreturn |
---|
510 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
---|
511 | RESTORE_REST |
---|
512 | jmp int_ret_from_sys_call |
---|
513 | CFI_ENDPROC |
---|
514 | END(stub_rt_sigreturn) |
---|
515 | |
---|
516 | /* initial frame state for interrupts (and exceptions without error code) */ |
---|
517 | #define INTR_FRAME _frame (RIP-0x10); \ |
---|
518 | CFI_REL_OFFSET rcx,0; \ |
---|
519 | CFI_REL_OFFSET r11,8 |
---|
520 | |
---|
521 | /* initial frame state for exceptions with error code (and interrupts with |
---|
522 | vector already pushed) */ |
---|
523 | #define XCPT_FRAME _frame (RIP-0x18); \ |
---|
524 | CFI_REL_OFFSET rcx,0; \ |
---|
525 | CFI_REL_OFFSET r11,8 |
---|
526 | |
---|
527 | /* |
---|
528 | * Interrupt exit. |
---|
529 | * |
---|
530 | */ |
---|
531 | |
---|
532 | retint_check: |
---|
533 | CFI_DEFAULT_STACK adj=1 |
---|
534 | movl threadinfo_flags(%rcx),%edx |
---|
535 | andl %edi,%edx |
---|
536 | CFI_REMEMBER_STATE |
---|
537 | jnz retint_careful |
---|
538 | retint_restore_args: |
---|
539 | movl EFLAGS-REST_SKIP(%rsp), %eax |
---|
540 | shr $9, %eax # EAX[0] == IRET_EFLAGS.IF |
---|
541 | XEN_GET_VCPU_INFO(%rsi) |
---|
542 | andb evtchn_upcall_mask(%rsi),%al |
---|
543 | andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask |
---|
544 | jnz restore_all_enable_events # != 0 => enable event delivery |
---|
545 | XEN_PUT_VCPU_INFO(%rsi) |
---|
546 | |
---|
547 | RESTORE_ARGS 0,8,0 |
---|
548 | HYPERVISOR_IRET 0 |
---|
549 | |
---|
550 | /* edi: workmask, edx: work */ |
---|
551 | retint_careful: |
---|
552 | CFI_RESTORE_STATE |
---|
553 | bt $TIF_NEED_RESCHED,%edx |
---|
554 | jnc retint_signal |
---|
555 | TRACE_IRQS_ON |
---|
556 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
557 | /* sti */ |
---|
558 | pushq %rdi |
---|
559 | CFI_ADJUST_CFA_OFFSET 8 |
---|
560 | call schedule |
---|
561 | popq %rdi |
---|
562 | CFI_ADJUST_CFA_OFFSET -8 |
---|
563 | GET_THREAD_INFO(%rcx) |
---|
564 | XEN_BLOCK_EVENTS(%rsi) |
---|
565 | /* cli */ |
---|
566 | TRACE_IRQS_OFF |
---|
567 | jmp retint_check |
---|
568 | |
---|
569 | retint_signal: |
---|
570 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
---|
571 | jz retint_restore_args |
---|
572 | TRACE_IRQS_ON |
---|
573 | XEN_UNBLOCK_EVENTS(%rsi) |
---|
574 | SAVE_REST |
---|
575 | movq $-1,ORIG_RAX(%rsp) |
---|
576 | xorl %esi,%esi # oldset |
---|
577 | movq %rsp,%rdi # &pt_regs |
---|
578 | call do_notify_resume |
---|
579 | RESTORE_REST |
---|
580 | XEN_BLOCK_EVENTS(%rsi) |
---|
581 | TRACE_IRQS_OFF |
---|
582 | movl $_TIF_NEED_RESCHED,%edi |
---|
583 | GET_THREAD_INFO(%rcx) |
---|
584 | jmp retint_check |
---|
585 | |
---|
586 | #ifdef CONFIG_PREEMPT |
---|
587 | /* Returning to kernel space. Check if we need preemption */ |
---|
588 | /* rcx: threadinfo. interrupts off. */ |
---|
589 | .p2align |
---|
590 | retint_kernel: |
---|
591 | cmpl $0,threadinfo_preempt_count(%rcx) |
---|
592 | jnz retint_restore_args |
---|
593 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) |
---|
594 | jnc retint_restore_args |
---|
595 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ |
---|
596 | jnc retint_restore_args |
---|
597 | call preempt_schedule_irq |
---|
598 | jmp retint_kernel /* check again */ |
---|
599 | #endif |
---|
600 | |
---|
601 | CFI_ENDPROC |
---|
602 | END(retint_check) |
---|
603 | |
---|
604 | #ifndef CONFIG_XEN |
---|
605 | /* |
---|
606 | * APIC interrupts. |
---|
607 | */ |
---|
608 | .macro apicinterrupt num,func |
---|
609 | INTR_FRAME |
---|
610 | pushq $~(\num) |
---|
611 | CFI_ADJUST_CFA_OFFSET 8 |
---|
612 | interrupt \func |
---|
613 | jmp error_entry |
---|
614 | CFI_ENDPROC |
---|
615 | .endm |
---|
616 | |
---|
617 | ENTRY(thermal_interrupt) |
---|
618 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt |
---|
619 | END(thermal_interrupt) |
---|
620 | |
---|
621 | ENTRY(threshold_interrupt) |
---|
622 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt |
---|
623 | END(threshold_interrupt) |
---|
624 | |
---|
625 | #ifdef CONFIG_SMP |
---|
626 | ENTRY(reschedule_interrupt) |
---|
627 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
---|
628 | END(reschedule_interrupt) |
---|
629 | |
---|
630 | .macro INVALIDATE_ENTRY num |
---|
631 | ENTRY(invalidate_interrupt\num) |
---|
632 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt |
---|
633 | END(invalidate_interrupt\num) |
---|
634 | .endm |
---|
635 | |
---|
636 | INVALIDATE_ENTRY 0 |
---|
637 | INVALIDATE_ENTRY 1 |
---|
638 | INVALIDATE_ENTRY 2 |
---|
639 | INVALIDATE_ENTRY 3 |
---|
640 | INVALIDATE_ENTRY 4 |
---|
641 | INVALIDATE_ENTRY 5 |
---|
642 | INVALIDATE_ENTRY 6 |
---|
643 | INVALIDATE_ENTRY 7 |
---|
644 | |
---|
645 | ENTRY(call_function_interrupt) |
---|
646 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt |
---|
647 | END(call_function_interrupt) |
---|
648 | #endif |
---|
649 | |
---|
650 | #ifdef CONFIG_X86_LOCAL_APIC |
---|
651 | ENTRY(apic_timer_interrupt) |
---|
652 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt |
---|
653 | END(apic_timer_interrupt) |
---|
654 | |
---|
655 | ENTRY(error_interrupt) |
---|
656 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt |
---|
657 | END(error_interrupt) |
---|
658 | |
---|
659 | ENTRY(spurious_interrupt) |
---|
660 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt |
---|
661 | END(spurious_interrupt) |
---|
662 | #endif |
---|
663 | #endif /* !CONFIG_XEN */ |
---|
664 | |
---|
665 | /* |
---|
666 | * Exception entry points. |
---|
667 | */ |
---|
668 | .macro zeroentry sym |
---|
669 | INTR_FRAME |
---|
670 | movq (%rsp),%rcx |
---|
671 | CFI_RESTORE rcx |
---|
672 | movq 8(%rsp),%r11 |
---|
673 | CFI_RESTORE r11 |
---|
674 | addq $0x10,%rsp /* skip rcx and r11 */ |
---|
675 | CFI_ADJUST_CFA_OFFSET -0x10 |
---|
676 | pushq $0 /* push error code/oldrax */ |
---|
677 | CFI_ADJUST_CFA_OFFSET 8 |
---|
678 | pushq %rax /* push real oldrax to the rdi slot */ |
---|
679 | CFI_ADJUST_CFA_OFFSET 8 |
---|
680 | CFI_REL_OFFSET rax,0 |
---|
681 | leaq \sym(%rip),%rax |
---|
682 | jmp error_entry |
---|
683 | CFI_ENDPROC |
---|
684 | .endm |
---|
685 | |
---|
686 | .macro errorentry sym |
---|
687 | XCPT_FRAME |
---|
688 | movq (%rsp),%rcx |
---|
689 | CFI_RESTORE rcx |
---|
690 | movq 8(%rsp),%r11 |
---|
691 | CFI_RESTORE r11 |
---|
692 | addq $0x10,%rsp /* rsp points to the error code */ |
---|
693 | CFI_ADJUST_CFA_OFFSET -0x10 |
---|
694 | pushq %rax |
---|
695 | CFI_ADJUST_CFA_OFFSET 8 |
---|
696 | CFI_REL_OFFSET rax,0 |
---|
697 | leaq \sym(%rip),%rax |
---|
698 | jmp error_entry |
---|
699 | CFI_ENDPROC |
---|
700 | .endm |
---|
701 | |
---|
702 | #if 0 /* not XEN */ |
---|
703 | /* error code is on the stack already */ |
---|
704 | /* handle NMI like exceptions that can happen everywhere */ |
---|
705 | .macro paranoidentry sym, ist=0, irqtrace=1 |
---|
706 | movq (%rsp),%rcx |
---|
707 | movq 8(%rsp),%r11 |
---|
708 | addq $0x10,%rsp /* skip rcx and r11 */ |
---|
709 | SAVE_ALL |
---|
710 | cld |
---|
711 | #if 0 /* not XEN */ |
---|
712 | movl $1,%ebx |
---|
713 | movl $MSR_GS_BASE,%ecx |
---|
714 | rdmsr |
---|
715 | testl %edx,%edx |
---|
716 | js 1f |
---|
717 | swapgs |
---|
718 | xorl %ebx,%ebx |
---|
719 | 1: |
---|
720 | #endif |
---|
721 | .if \ist |
---|
722 | movq %gs:pda_data_offset, %rbp |
---|
723 | .endif |
---|
724 | movq %rsp,%rdi |
---|
725 | movq ORIG_RAX(%rsp),%rsi |
---|
726 | movq $-1,ORIG_RAX(%rsp) |
---|
727 | .if \ist |
---|
728 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
---|
729 | .endif |
---|
730 | call \sym |
---|
731 | .if \ist |
---|
732 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
---|
733 | .endif |
---|
734 | /* cli */ |
---|
735 | XEN_BLOCK_EVENTS(%rsi) |
---|
736 | .if \irqtrace |
---|
737 | TRACE_IRQS_OFF |
---|
738 | .endif |
---|
739 | .endm |
---|
740 | |
---|
741 | /* |
---|
742 | * "Paranoid" exit path from exception stack. |
---|
743 | * Paranoid because this is used by NMIs and cannot take |
---|
744 | * any kernel state for granted. |
---|
745 | * We don't do kernel preemption checks here, because only |
---|
746 | * NMI should be common and it does not enable IRQs and |
---|
747 | * cannot get reschedule ticks. |
---|
748 | * |
---|
749 | * "trace" is 0 for the NMI handler only, because irq-tracing |
---|
750 | * is fundamentally NMI-unsafe. (we cannot change the soft and |
---|
751 | * hard flags at once, atomically) |
---|
752 | */ |
---|
753 | .macro paranoidexit trace=1 |
---|
754 | /* ebx: no swapgs flag */ |
---|
755 | paranoid_exit\trace: |
---|
756 | testl %ebx,%ebx /* swapgs needed? */ |
---|
757 | jnz paranoid_restore\trace |
---|
758 | testl $3,CS(%rsp) |
---|
759 | jnz paranoid_userspace\trace |
---|
760 | paranoid_swapgs\trace: |
---|
761 | TRACE_IRQS_IRETQ 0 |
---|
762 | swapgs |
---|
763 | paranoid_restore\trace: |
---|
764 | RESTORE_ALL 8 |
---|
765 | iretq |
---|
766 | paranoid_userspace\trace: |
---|
767 | GET_THREAD_INFO(%rcx) |
---|
768 | movl threadinfo_flags(%rcx),%ebx |
---|
769 | andl $_TIF_WORK_MASK,%ebx |
---|
770 | jz paranoid_swapgs\trace |
---|
771 | movq %rsp,%rdi /* &pt_regs */ |
---|
772 | call sync_regs |
---|
773 | movq %rax,%rsp /* switch stack for scheduling */ |
---|
774 | testl $_TIF_NEED_RESCHED,%ebx |
---|
775 | jnz paranoid_schedule\trace |
---|
776 | movl %ebx,%edx /* arg3: thread flags */ |
---|
777 | .if \trace |
---|
778 | TRACE_IRQS_ON |
---|
779 | .endif |
---|
780 | sti |
---|
781 | xorl %esi,%esi /* arg2: oldset */ |
---|
782 | movq %rsp,%rdi /* arg1: &pt_regs */ |
---|
783 | call do_notify_resume |
---|
784 | cli |
---|
785 | .if \trace |
---|
786 | TRACE_IRQS_OFF |
---|
787 | .endif |
---|
788 | jmp paranoid_userspace\trace |
---|
789 | paranoid_schedule\trace: |
---|
790 | .if \trace |
---|
791 | TRACE_IRQS_ON |
---|
792 | .endif |
---|
793 | sti |
---|
794 | call schedule |
---|
795 | cli |
---|
796 | .if \trace |
---|
797 | TRACE_IRQS_OFF |
---|
798 | .endif |
---|
799 | jmp paranoid_userspace\trace |
---|
800 | CFI_ENDPROC |
---|
801 | .endm |
---|
802 | #endif |
---|
803 | |
---|
804 | /* |
---|
805 | * Exception entry point. This expects an error code/orig_rax on the stack |
---|
806 | * and the exception handler in %rax. |
---|
807 | */ |
---|
808 | ENTRY(error_entry) |
---|
809 | _frame RDI |
---|
810 | CFI_REL_OFFSET rax,0 |
---|
811 | /* rdi slot contains rax, oldrax contains error code */ |
---|
812 | cld |
---|
813 | subq $14*8,%rsp |
---|
814 | CFI_ADJUST_CFA_OFFSET (14*8) |
---|
815 | movq %rsi,13*8(%rsp) |
---|
816 | CFI_REL_OFFSET rsi,RSI |
---|
817 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ |
---|
818 | CFI_REGISTER rax,rsi |
---|
819 | movq %rdx,12*8(%rsp) |
---|
820 | CFI_REL_OFFSET rdx,RDX |
---|
821 | movq %rcx,11*8(%rsp) |
---|
822 | CFI_REL_OFFSET rcx,RCX |
---|
823 | movq %rsi,10*8(%rsp) /* store rax */ |
---|
824 | CFI_REL_OFFSET rax,RAX |
---|
825 | movq %r8, 9*8(%rsp) |
---|
826 | CFI_REL_OFFSET r8,R8 |
---|
827 | movq %r9, 8*8(%rsp) |
---|
828 | CFI_REL_OFFSET r9,R9 |
---|
829 | movq %r10,7*8(%rsp) |
---|
830 | CFI_REL_OFFSET r10,R10 |
---|
831 | movq %r11,6*8(%rsp) |
---|
832 | CFI_REL_OFFSET r11,R11 |
---|
833 | movq %rbx,5*8(%rsp) |
---|
834 | CFI_REL_OFFSET rbx,RBX |
---|
835 | movq %rbp,4*8(%rsp) |
---|
836 | CFI_REL_OFFSET rbp,RBP |
---|
837 | movq %r12,3*8(%rsp) |
---|
838 | CFI_REL_OFFSET r12,R12 |
---|
839 | movq %r13,2*8(%rsp) |
---|
840 | CFI_REL_OFFSET r13,R13 |
---|
841 | movq %r14,1*8(%rsp) |
---|
842 | CFI_REL_OFFSET r14,R14 |
---|
843 | movq %r15,(%rsp) |
---|
844 | CFI_REL_OFFSET r15,R15 |
---|
845 | #if 0 |
---|
846 | cmpl $__KERNEL_CS,CS(%rsp) |
---|
847 | CFI_REMEMBER_STATE |
---|
848 | je error_kernelspace |
---|
849 | #endif |
---|
850 | error_call_handler: |
---|
851 | movq %rdi, RDI(%rsp) |
---|
852 | CFI_REL_OFFSET rdi,RDI |
---|
853 | movq %rsp,%rdi |
---|
854 | movq ORIG_RAX(%rsp),%rsi # get error code |
---|
855 | movq $-1,ORIG_RAX(%rsp) |
---|
856 | call *%rax |
---|
857 | error_exit: |
---|
858 | RESTORE_REST |
---|
859 | /* cli */ |
---|
860 | XEN_BLOCK_EVENTS(%rsi) |
---|
861 | TRACE_IRQS_OFF |
---|
862 | GET_THREAD_INFO(%rcx) |
---|
863 | testb $3,CS-ARGOFFSET(%rsp) |
---|
864 | jz retint_kernel |
---|
865 | movl threadinfo_flags(%rcx),%edx |
---|
866 | movl $_TIF_WORK_MASK,%edi |
---|
867 | andl %edi,%edx |
---|
868 | jnz retint_careful |
---|
869 | /* |
---|
870 | * The iret might restore flags: |
---|
871 | */ |
---|
872 | TRACE_IRQS_IRETQ |
---|
873 | jmp retint_restore_args |
---|
874 | |
---|
875 | #if 0 |
---|
876 | /* |
---|
877 | * We need to re-write the logic here because we don't do iretq to |
---|
878 | * to return to user mode. It's still possible that we get trap/fault |
---|
879 | * in the kernel (when accessing buffers pointed to by system calls, |
---|
880 | * for example). |
---|
881 | * |
---|
882 | */ |
---|
883 | CFI_RESTORE_STATE |
---|
884 | error_kernelspace: |
---|
885 | incl %ebx |
---|
886 | /* There are two places in the kernel that can potentially fault with |
---|
887 | usergs. Handle them here. The exception handlers after |
---|
888 | iret run with kernel gs again, so don't set the user space flag. |
---|
889 | B stepping K8s sometimes report an truncated RIP for IRET |
---|
890 | exceptions returning to compat mode. Check for these here too. */ |
---|
891 | leaq iret_label(%rip),%rbp |
---|
892 | cmpq %rbp,RIP(%rsp) |
---|
893 | je error_swapgs |
---|
894 | movl %ebp,%ebp /* zero extend */ |
---|
895 | cmpq %rbp,RIP(%rsp) |
---|
896 | je error_swapgs |
---|
897 | cmpq $gs_change,RIP(%rsp) |
---|
898 | je error_swapgs |
---|
899 | jmp error_sti |
---|
900 | #endif |
---|
901 | CFI_ENDPROC |
---|
902 | END(error_entry) |
---|
903 | |
---|
904 | ENTRY(hypervisor_callback) |
---|
905 | zeroentry do_hypervisor_callback |
---|
906 | END(hypervisor_callback) |
---|
907 | |
---|
908 | /* |
---|
909 | * Copied from arch/xen/i386/kernel/entry.S |
---|
910 | */ |
---|
911 | # A note on the "critical region" in our callback handler. |
---|
912 | # We want to avoid stacking callback handlers due to events occurring |
---|
913 | # during handling of the last event. To do this, we keep events disabled |
---|
914 | # until we've done all processing. HOWEVER, we must enable events before |
---|
915 | # popping the stack frame (can't be done atomically) and so it would still |
---|
916 | # be possible to get enough handler activations to overflow the stack. |
---|
917 | # Although unlikely, bugs of that kind are hard to track down, so we'd |
---|
918 | # like to avoid the possibility. |
---|
919 | # So, on entry to the handler we detect whether we interrupted an |
---|
920 | # existing activation in its critical region -- if so, we pop the current |
---|
921 | # activation and restart the handler using the previous one. |
---|
922 | ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
---|
923 | CFI_STARTPROC |
---|
924 | # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
---|
925 | # see the correct pointer to the pt_regs |
---|
926 | movq %rdi, %rsp # we don't return, adjust the stack frame |
---|
927 | CFI_ENDPROC |
---|
928 | CFI_DEFAULT_STACK |
---|
929 | 11: incl %gs:pda_irqcount |
---|
930 | movq %rsp,%rbp |
---|
931 | CFI_DEF_CFA_REGISTER rbp |
---|
932 | cmovzq %gs:pda_irqstackptr,%rsp |
---|
933 | pushq %rbp # backlink for old unwinder |
---|
934 | call evtchn_do_upcall |
---|
935 | popq %rsp |
---|
936 | CFI_DEF_CFA_REGISTER rsp |
---|
937 | decl %gs:pda_irqcount |
---|
938 | jmp error_exit |
---|
939 | CFI_ENDPROC |
---|
940 | END(do_hypervisor_callback) |
---|
941 | |
---|
942 | #ifdef CONFIG_X86_LOCAL_APIC |
---|
943 | KPROBE_ENTRY(nmi) |
---|
944 | zeroentry do_nmi_callback |
---|
945 | ENTRY(do_nmi_callback) |
---|
946 | CFI_STARTPROC |
---|
947 | addq $8, %rsp |
---|
948 | CFI_ENDPROC |
---|
949 | CFI_DEFAULT_STACK |
---|
950 | call do_nmi |
---|
951 | orl $NMI_MASK,EFLAGS(%rsp) |
---|
952 | RESTORE_REST |
---|
953 | XEN_BLOCK_EVENTS(%rsi) |
---|
954 | TRACE_IRQS_OFF |
---|
955 | GET_THREAD_INFO(%rcx) |
---|
956 | jmp retint_restore_args |
---|
957 | CFI_ENDPROC |
---|
958 | .previous .text |
---|
959 | END(nmi) |
---|
960 | #endif |
---|
961 | |
---|
962 | ALIGN |
---|
963 | restore_all_enable_events: |
---|
964 | CFI_DEFAULT_STACK adj=1 |
---|
965 | TRACE_IRQS_ON |
---|
966 | XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... |
---|
967 | |
---|
968 | scrit: /**** START OF CRITICAL REGION ****/ |
---|
969 | XEN_TEST_PENDING(%rsi) |
---|
970 | CFI_REMEMBER_STATE |
---|
971 | jnz 14f # process more events if necessary... |
---|
972 | XEN_PUT_VCPU_INFO(%rsi) |
---|
973 | RESTORE_ARGS 0,8,0 |
---|
974 | HYPERVISOR_IRET 0 |
---|
975 | |
---|
976 | CFI_RESTORE_STATE |
---|
977 | 14: XEN_LOCKED_BLOCK_EVENTS(%rsi) |
---|
978 | XEN_PUT_VCPU_INFO(%rsi) |
---|
979 | SAVE_REST |
---|
980 | movq %rsp,%rdi # set the argument again |
---|
981 | jmp 11b |
---|
982 | CFI_ENDPROC |
---|
983 | ecrit: /**** END OF CRITICAL REGION ****/ |
---|
984 | # At this point, unlike on x86-32, we don't do the fixup to simplify the |
---|
985 | # code and the stack frame is more complex on x86-64. |
---|
986 | # When the kernel is interrupted in the critical section, the kernel |
---|
987 | # will do IRET in that case, and everything will be restored at that point, |
---|
988 | # i.e. it just resumes from the next instruction interrupted with the same context. |
---|
989 | |
---|
990 | # Hypervisor uses this for application faults while it executes. |
---|
991 | # We get here for two reasons: |
---|
992 | # 1. Fault while reloading DS, ES, FS or GS |
---|
993 | # 2. Fault while executing IRET |
---|
994 | # Category 1 we do not need to fix up as Xen has already reloaded all segment |
---|
995 | # registers that could be reloaded and zeroed the others. |
---|
996 | # Category 2 we fix up by killing the current process. We cannot use the |
---|
997 | # normal Linux return path in this case because if we use the IRET hypercall |
---|
998 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
---|
999 | # We distinguish between categories by comparing each saved segment register |
---|
1000 | # with its current contents: any discrepancy means we in category 1. |
---|
1001 | ENTRY(failsafe_callback) |
---|
1002 | _frame (RIP-0x30) |
---|
1003 | CFI_REL_OFFSET rcx, 0 |
---|
1004 | CFI_REL_OFFSET r11, 8 |
---|
1005 | movw %ds,%cx |
---|
1006 | cmpw %cx,0x10(%rsp) |
---|
1007 | CFI_REMEMBER_STATE |
---|
1008 | jne 1f |
---|
1009 | movw %es,%cx |
---|
1010 | cmpw %cx,0x18(%rsp) |
---|
1011 | jne 1f |
---|
1012 | movw %fs,%cx |
---|
1013 | cmpw %cx,0x20(%rsp) |
---|
1014 | jne 1f |
---|
1015 | movw %gs,%cx |
---|
1016 | cmpw %cx,0x28(%rsp) |
---|
1017 | jne 1f |
---|
1018 | /* All segments match their saved values => Category 2 (Bad IRET). */ |
---|
1019 | movq (%rsp),%rcx |
---|
1020 | CFI_RESTORE rcx |
---|
1021 | movq 8(%rsp),%r11 |
---|
1022 | CFI_RESTORE r11 |
---|
1023 | addq $0x30,%rsp |
---|
1024 | CFI_ADJUST_CFA_OFFSET -0x30 |
---|
1025 | movq $11,%rdi /* SIGSEGV */ |
---|
1026 | jmp do_exit |
---|
1027 | CFI_RESTORE_STATE |
---|
1028 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
---|
1029 | movq (%rsp),%rcx |
---|
1030 | CFI_RESTORE rcx |
---|
1031 | movq 8(%rsp),%r11 |
---|
1032 | CFI_RESTORE r11 |
---|
1033 | addq $0x30,%rsp |
---|
1034 | CFI_ADJUST_CFA_OFFSET -0x30 |
---|
1035 | pushq $0 |
---|
1036 | CFI_ADJUST_CFA_OFFSET 8 |
---|
1037 | SAVE_ALL |
---|
1038 | jmp error_exit |
---|
1039 | CFI_ENDPROC |
---|
1040 | #if 0 |
---|
1041 | .section __ex_table,"a" |
---|
1042 | .align 8 |
---|
1043 | .quad gs_change,bad_gs |
---|
1044 | .previous |
---|
1045 | .section .fixup,"ax" |
---|
1046 | /* running with kernelgs */ |
---|
1047 | bad_gs: |
---|
1048 | /* swapgs */ /* switch back to user gs */ |
---|
1049 | xorl %eax,%eax |
---|
1050 | movl %eax,%gs |
---|
1051 | jmp 2b |
---|
1052 | .previous |
---|
1053 | #endif |
---|
1054 | |
---|
1055 | /* |
---|
1056 | * Create a kernel thread. |
---|
1057 | * |
---|
1058 | * C extern interface: |
---|
1059 | * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) |
---|
1060 | * |
---|
1061 | * asm input arguments: |
---|
1062 | * rdi: fn, rsi: arg, rdx: flags |
---|
1063 | */ |
---|
1064 | ENTRY(kernel_thread) |
---|
1065 | CFI_STARTPROC |
---|
1066 | FAKE_STACK_FRAME $child_rip |
---|
1067 | SAVE_ALL |
---|
1068 | |
---|
1069 | # rdi: flags, rsi: usp, rdx: will be &pt_regs |
---|
1070 | movq %rdx,%rdi |
---|
1071 | orq kernel_thread_flags(%rip),%rdi |
---|
1072 | movq $-1, %rsi |
---|
1073 | movq %rsp, %rdx |
---|
1074 | |
---|
1075 | xorl %r8d,%r8d |
---|
1076 | xorl %r9d,%r9d |
---|
1077 | |
---|
1078 | # clone now |
---|
1079 | call do_fork |
---|
1080 | movq %rax,RAX(%rsp) |
---|
1081 | xorl %edi,%edi |
---|
1082 | |
---|
1083 | /* |
---|
1084 | * It isn't worth to check for reschedule here, |
---|
1085 | * so internally to the x86_64 port you can rely on kernel_thread() |
---|
1086 | * not to reschedule the child before returning, this avoids the need |
---|
1087 | * of hacks for example to fork off the per-CPU idle tasks. |
---|
1088 | * [Hopefully no generic code relies on the reschedule -AK] |
---|
1089 | */ |
---|
1090 | RESTORE_ALL |
---|
1091 | UNFAKE_STACK_FRAME |
---|
1092 | ret |
---|
1093 | CFI_ENDPROC |
---|
1094 | ENDPROC(kernel_thread) |
---|
1095 | |
---|
1096 | child_rip: |
---|
1097 | pushq $0 # fake return address |
---|
1098 | CFI_STARTPROC |
---|
1099 | /* |
---|
1100 | * Here we are in the child and the registers are set as they were |
---|
1101 | * at kernel_thread() invocation in the parent. |
---|
1102 | */ |
---|
1103 | movq %rdi, %rax |
---|
1104 | movq %rsi, %rdi |
---|
1105 | call *%rax |
---|
1106 | # exit |
---|
1107 | xorl %edi, %edi |
---|
1108 | call do_exit |
---|
1109 | CFI_ENDPROC |
---|
1110 | ENDPROC(child_rip) |
---|
1111 | |
---|
1112 | /* |
---|
1113 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
---|
1114 | * |
---|
1115 | * C extern interface: |
---|
1116 | * extern long execve(char *name, char **argv, char **envp) |
---|
1117 | * |
---|
1118 | * asm input arguments: |
---|
1119 | * rdi: name, rsi: argv, rdx: envp |
---|
1120 | * |
---|
1121 | * We want to fallback into: |
---|
1122 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) |
---|
1123 | * |
---|
1124 | * do_sys_execve asm fallback arguments: |
---|
1125 | * rdi: name, rsi: argv, rdx: envp, fake frame on the stack |
---|
1126 | */ |
---|
1127 | ENTRY(execve) |
---|
1128 | CFI_STARTPROC |
---|
1129 | FAKE_STACK_FRAME $0 |
---|
1130 | SAVE_ALL |
---|
1131 | call sys_execve |
---|
1132 | movq %rax, RAX(%rsp) |
---|
1133 | RESTORE_REST |
---|
1134 | testq %rax,%rax |
---|
1135 | jne 1f |
---|
1136 | jmp int_ret_from_sys_call |
---|
1137 | 1: RESTORE_ARGS |
---|
1138 | UNFAKE_STACK_FRAME |
---|
1139 | ret |
---|
1140 | CFI_ENDPROC |
---|
1141 | ENDPROC(execve) |
---|
1142 | |
---|
1143 | KPROBE_ENTRY(page_fault) |
---|
1144 | errorentry do_page_fault |
---|
1145 | END(page_fault) |
---|
1146 | .previous .text |
---|
1147 | |
---|
1148 | ENTRY(coprocessor_error) |
---|
1149 | zeroentry do_coprocessor_error |
---|
1150 | END(coprocessor_error) |
---|
1151 | |
---|
1152 | ENTRY(simd_coprocessor_error) |
---|
1153 | zeroentry do_simd_coprocessor_error |
---|
1154 | END(simd_coprocessor_error) |
---|
1155 | |
---|
1156 | ENTRY(device_not_available) |
---|
1157 | zeroentry math_state_restore |
---|
1158 | END(device_not_available) |
---|
1159 | |
---|
1160 | /* runs on exception stack */ |
---|
1161 | KPROBE_ENTRY(debug) |
---|
1162 | /* INTR_FRAME |
---|
1163 | pushq $0 |
---|
1164 | CFI_ADJUST_CFA_OFFSET 8 */ |
---|
1165 | zeroentry do_debug |
---|
1166 | /* paranoidexit |
---|
1167 | CFI_ENDPROC */ |
---|
1168 | END(debug) |
---|
1169 | .previous .text |
---|
1170 | |
---|
1171 | #if 0 |
---|
1172 | /* runs on exception stack */ |
---|
1173 | KPROBE_ENTRY(nmi) |
---|
1174 | INTR_FRAME |
---|
1175 | pushq $-1 |
---|
1176 | CFI_ADJUST_CFA_OFFSET 8 |
---|
1177 | paranoidentry do_nmi, 0, 0 |
---|
1178 | #ifdef CONFIG_TRACE_IRQFLAGS |
---|
1179 | paranoidexit 0 |
---|
1180 | #else |
---|
1181 | jmp paranoid_exit1 |
---|
1182 | CFI_ENDPROC |
---|
1183 | #endif |
---|
1184 | END(nmi) |
---|
1185 | .previous .text |
---|
1186 | #endif |
---|
1187 | |
---|
1188 | KPROBE_ENTRY(int3) |
---|
1189 | /* INTR_FRAME |
---|
1190 | pushq $0 |
---|
1191 | CFI_ADJUST_CFA_OFFSET 8 */ |
---|
1192 | zeroentry do_int3 |
---|
1193 | /* jmp paranoid_exit1 |
---|
1194 | CFI_ENDPROC */ |
---|
1195 | END(int3) |
---|
1196 | .previous .text |
---|
1197 | |
---|
1198 | ENTRY(overflow) |
---|
1199 | zeroentry do_overflow |
---|
1200 | END(overflow) |
---|
1201 | |
---|
1202 | ENTRY(bounds) |
---|
1203 | zeroentry do_bounds |
---|
1204 | END(bounds) |
---|
1205 | |
---|
1206 | ENTRY(invalid_op) |
---|
1207 | zeroentry do_invalid_op |
---|
1208 | END(invalid_op) |
---|
1209 | |
---|
1210 | ENTRY(coprocessor_segment_overrun) |
---|
1211 | zeroentry do_coprocessor_segment_overrun |
---|
1212 | END(coprocessor_segment_overrun) |
---|
1213 | |
---|
1214 | ENTRY(reserved) |
---|
1215 | zeroentry do_reserved |
---|
1216 | END(reserved) |
---|
1217 | |
---|
1218 | #if 0 |
---|
1219 | /* runs on exception stack */ |
---|
1220 | ENTRY(double_fault) |
---|
1221 | XCPT_FRAME |
---|
1222 | paranoidentry do_double_fault |
---|
1223 | jmp paranoid_exit1 |
---|
1224 | CFI_ENDPROC |
---|
1225 | END(double_fault) |
---|
1226 | #endif |
---|
1227 | |
---|
1228 | ENTRY(invalid_TSS) |
---|
1229 | errorentry do_invalid_TSS |
---|
1230 | END(invalid_TSS) |
---|
1231 | |
---|
1232 | ENTRY(segment_not_present) |
---|
1233 | errorentry do_segment_not_present |
---|
1234 | END(segment_not_present) |
---|
1235 | |
---|
1236 | /* runs on exception stack */ |
---|
1237 | ENTRY(stack_segment) |
---|
1238 | /* XCPT_FRAME |
---|
1239 | paranoidentry do_stack_segment */ |
---|
1240 | errorentry do_stack_segment |
---|
1241 | /* jmp paranoid_exit1 |
---|
1242 | CFI_ENDPROC */ |
---|
1243 | END(stack_segment) |
---|
1244 | |
---|
1245 | KPROBE_ENTRY(general_protection) |
---|
1246 | errorentry do_general_protection |
---|
1247 | END(general_protection) |
---|
1248 | .previous .text |
---|
1249 | |
---|
1250 | ENTRY(alignment_check) |
---|
1251 | errorentry do_alignment_check |
---|
1252 | END(alignment_check) |
---|
1253 | |
---|
1254 | ENTRY(divide_error) |
---|
1255 | zeroentry do_divide_error |
---|
1256 | END(divide_error) |
---|
1257 | |
---|
1258 | ENTRY(spurious_interrupt_bug) |
---|
1259 | zeroentry do_spurious_interrupt_bug |
---|
1260 | END(spurious_interrupt_bug) |
---|
1261 | |
---|
1262 | #ifdef CONFIG_X86_MCE |
---|
1263 | /* runs on exception stack */ |
---|
1264 | ENTRY(machine_check) |
---|
1265 | INTR_FRAME |
---|
1266 | pushq $0 |
---|
1267 | CFI_ADJUST_CFA_OFFSET 8 |
---|
1268 | paranoidentry do_machine_check |
---|
1269 | jmp paranoid_exit1 |
---|
1270 | CFI_ENDPROC |
---|
1271 | END(machine_check) |
---|
1272 | #endif |
---|
1273 | |
---|
1274 | /* Call softirq on interrupt stack. Interrupts are off. */ |
---|
1275 | ENTRY(call_softirq) |
---|
1276 | CFI_STARTPROC |
---|
1277 | push %rbp |
---|
1278 | CFI_ADJUST_CFA_OFFSET 8 |
---|
1279 | CFI_REL_OFFSET rbp,0 |
---|
1280 | mov %rsp,%rbp |
---|
1281 | CFI_DEF_CFA_REGISTER rbp |
---|
1282 | incl %gs:pda_irqcount |
---|
1283 | cmove %gs:pda_irqstackptr,%rsp |
---|
1284 | push %rbp # backlink for old unwinder |
---|
1285 | call __do_softirq |
---|
1286 | leaveq |
---|
1287 | CFI_DEF_CFA_REGISTER rsp |
---|
1288 | CFI_ADJUST_CFA_OFFSET -8 |
---|
1289 | decl %gs:pda_irqcount |
---|
1290 | ret |
---|
1291 | CFI_ENDPROC |
---|
1292 | ENDPROC(call_softirq) |
---|
1293 | |
---|
1294 | #ifdef CONFIG_STACK_UNWIND |
---|
1295 | ENTRY(arch_unwind_init_running) |
---|
1296 | CFI_STARTPROC |
---|
1297 | movq %r15, R15(%rdi) |
---|
1298 | movq %r14, R14(%rdi) |
---|
1299 | xchgq %rsi, %rdx |
---|
1300 | movq %r13, R13(%rdi) |
---|
1301 | movq %r12, R12(%rdi) |
---|
1302 | xorl %eax, %eax |
---|
1303 | movq %rbp, RBP(%rdi) |
---|
1304 | movq %rbx, RBX(%rdi) |
---|
1305 | movq (%rsp), %rcx |
---|
1306 | movq %rax, R11(%rdi) |
---|
1307 | movq %rax, R10(%rdi) |
---|
1308 | movq %rax, R9(%rdi) |
---|
1309 | movq %rax, R8(%rdi) |
---|
1310 | movq %rax, RAX(%rdi) |
---|
1311 | movq %rax, RCX(%rdi) |
---|
1312 | movq %rax, RDX(%rdi) |
---|
1313 | movq %rax, RSI(%rdi) |
---|
1314 | movq %rax, RDI(%rdi) |
---|
1315 | movq %rax, ORIG_RAX(%rdi) |
---|
1316 | movq %rcx, RIP(%rdi) |
---|
1317 | leaq 8(%rsp), %rcx |
---|
1318 | movq $__KERNEL_CS, CS(%rdi) |
---|
1319 | movq %rax, EFLAGS(%rdi) |
---|
1320 | movq %rcx, RSP(%rdi) |
---|
1321 | movq $__KERNEL_DS, SS(%rdi) |
---|
1322 | jmpq *%rdx |
---|
1323 | CFI_ENDPROC |
---|
1324 | ENDPROC(arch_unwind_init_running) |
---|
1325 | #endif |
---|