patch-2.4.20 linux-2.4.20/arch/x86_64/kernel/entry.S
Next file: linux-2.4.20/arch/x86_64/kernel/head.S
Previous file: linux-2.4.20/arch/x86_64/kernel/early_printk.c
Back to the patch index
Back to the overall index
- Lines: 648
- Date:
Thu Nov 28 15:53:12 2002
- Orig file:
linux-2.4.19/arch/x86_64/kernel/entry.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.19/arch/x86_64/kernel/entry.S linux-2.4.20/arch/x86_64/kernel/entry.S
@@ -0,0 +1,647 @@
+/*
+ * linux/arch/x86_64/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ *
+ * $Id: entry.S,v 1.81 2002/09/12 12:55:25 ak Exp $
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after an interrupt and after each system call.
+ *
+ * Normal syscalls and interrupts don't save a full stack frame, this is
+ * only done for PT_TRACESYS, signals or fork/exec et.al.
+ *
+ * TODO:
+ * - schedule it carefully for the final hardware.
+ *
+ */
+
+#define ASSEMBLY 1
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/current.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+
+ .code64
+
+#define PDAREF(field) %gs:field
+
+/*
+ * C code is not supposed to know about partial frames. Everytime a C function
+ * that looks at the pt_regs is called these two macros are executed around it.
+ * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
+ * manipulation.
+ */
+
+ /* %rsp:at FRAMEEND */
+ .macro FIXUP_TOP_OF_STACK tmp
+ movq PDAREF(pda_oldrsp),\tmp
+ movq \tmp,RSP(%rsp)
+ movq $__USER_DS,SS(%rsp)
+ movq $__USER_CS,CS(%rsp)
+ movq $-1,RCX(%rsp) /* contains return address, already in RIP */
+ movq R11(%rsp),\tmp /* get eflags */
+ movq \tmp,EFLAGS(%rsp)
+ .endm
+
+ .macro RESTORE_TOP_OF_STACK tmp,offset=0
+ movq RSP-\offset(%rsp),\tmp
+ movq \tmp,PDAREF(pda_oldrsp)
+ movq EFLAGS-\offset(%rsp),\tmp
+ movq \tmp,R11-\offset(%rsp)
+ .endm
+
+
+/*
+ * A newly forked process directly context switches into this.
+ */
+ENTRY(ret_from_fork)
+ movq %rax,%rdi /* return value of __switch_to -> prev task */
+ call schedule_tail
+ GET_CURRENT(%rcx)
+ testb $PT_TRACESYS,tsk_ptrace(%rcx)
+ jnz 2f
+1:
+ RESTORE_REST
+ testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
+ jz int_ret_from_sys_call
+ testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
+ jnz int_ret_from_sys_call
+ RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
+ jmp ret_from_sys_call
+2:
+ movq %rsp,%rdi
+ call syscall_trace
+ jmp 1b
+
+/*
+ * System call entry. Upto 6 arguments in registers are supported.
+ *
+ * SYSCALL does not save anything on the stack and does not change the
+ * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the
+ * current kernel stack.
+ */
+
+/*
+ * Register setup:
+ * rax system call number
+ * rdi arg0
+ * rcx return address for syscall/sysret, C arg3
+ * rsi arg1
+ * rdx arg2
+ * r10 arg3 (--> moved to rcx for C)
+ * r8 arg4
+ * r9 arg5
+ * r11 eflags for syscall/sysret, temporary for C
+ * r12-r15,rbp,rbx saved by C code, not touched.
+ *
+ * Interrupts are off on entry.
+ * Only called from user space.
+ */
+
+ENTRY(system_call)
+ swapgs
+ movq %rsp,PDAREF(pda_oldrsp)
+ movq PDAREF(pda_kernelstack),%rsp
+ sti
+ SAVE_ARGS 8,1
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ movq %rcx,RIP-ARGOFFSET(%rsp)
+ GET_CURRENT(%rcx)
+ testl $PT_TRACESYS,tsk_ptrace(%rcx)
+ jne tracesys
+ cmpq $__NR_syscall_max,%rax
+ ja badsys
+ movq %r10,%rcx
+ call *sys_call_table(,%rax,8) # XXX: rip relative
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ .globl ret_from_sys_call
+ret_from_sys_call:
+sysret_with_reschedule:
+ GET_CURRENT(%rcx)
+ cli
+ cmpq $0,tsk_need_resched(%rcx)
+ jne sysret_reschedule
+ cmpl $0,tsk_sigpending(%rcx)
+ jne sysret_signal
+sysret_restore_args:
+ movq RIP-ARGOFFSET(%rsp),%rcx
+ RESTORE_ARGS 0,-ARG_SKIP,1
+ movq PDAREF(pda_oldrsp),%rsp
+ swapgs
+ sysretq
+
+sysret_signal:
+ sti
+ xorl %esi,%esi # oldset
+ leaq -ARGOFFSET(%rsp),%rdi # regs
+ leaq do_signal(%rip),%rax
+ call ptregscall_common
+sysret_signal_test:
+ GET_CURRENT(%rcx)
+ cli
+ cmpq $0,tsk_need_resched(%rcx)
+ je sysret_restore_args
+ sti
+ call schedule
+ jmp sysret_signal_test
+
+sysret_reschedule:
+ sti
+ call schedule
+ jmp sysret_with_reschedule
+
+tracesys:
+ SAVE_REST
+ movq $-ENOSYS,RAX(%rsp)
+ FIXUP_TOP_OF_STACK %rdi
+ movq %rsp,%rdi
+ call syscall_trace
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ cmpq $__NR_syscall_max,%rax
+ ja tracesys_done
+tracesys_call: /* backtrace marker */
+ movq %r10,%rcx /* fixup for C */
+ call *sys_call_table(,%rax,8)
+ movq %rax,RAX-ARGOFFSET(%rsp)
+tracesys_done: /* backtrace marker */
+ SAVE_REST
+ movq %rsp,%rdi
+ call syscall_trace
+ RESTORE_TOP_OF_STACK %rbx
+ RESTORE_REST
+ jmp ret_from_sys_call
+
+badsys:
+ movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+ jmp ret_from_sys_call
+
+/*
+ * Syscall return path ending with IRET.
+ * This can be either 64bit calls that require restoring of all registers
+ * (impossible with sysret) or 32bit calls.
+ */
+ENTRY(int_ret_from_sys_call)
+intret_test_kernel:
+ testl $3,CS-ARGOFFSET(%rsp)
+ je retint_restore_args
+intret_with_reschedule:
+ GET_CURRENT(%rcx)
+ cli
+ cmpq $0,tsk_need_resched(%rcx)
+ jne intret_reschedule
+ cmpl $0,tsk_sigpending(%rcx)
+ jne intret_signal
+ jmp retint_restore_args_swapgs
+
+intret_reschedule:
+ sti
+ call schedule
+ jmp intret_with_reschedule
+
+intret_signal:
+ sti
+ SAVE_REST
+ xorq %rsi,%rsi # oldset -> arg2
+ movq %rsp,%rdi # &ptregs -> arg1
+ call do_signal
+ RESTORE_REST
+intret_signal_test:
+ GET_CURRENT(%rcx)
+ cli
+ cmpq $0,tsk_need_resched(%rcx)
+ je retint_restore_args_swapgs
+ sti
+ call schedule
+ jmp intret_signal_test
+
+/*
+ * Certain special system calls that need to save a complete stack frame.
+ */
+
+ .macro PTREGSCALL label,func
+ .globl \label
+\label:
+ leaq \func(%rip),%rax
+ jmp ptregscall_common
+ .endm
+
+ PTREGSCALL stub_clone, sys_clone
+ PTREGSCALL stub_fork, sys_fork
+ PTREGSCALL stub_vfork, sys_vfork
+ PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
+ PTREGSCALL stub_sigaltstack, sys_sigaltstack
+ PTREGSCALL stub_iopl, sys_iopl
+
+ENTRY(ptregscall_common)
+ popq %r11
+ SAVE_REST
+ movq %r11, %r15
+ FIXUP_TOP_OF_STACK %r11
+ call *%rax
+ RESTORE_TOP_OF_STACK %r11
+ movq %r15, %r11
+ RESTORE_REST
+ pushq %r11
+ ret
+
+ENTRY(stub_execve)
+ popq %r11
+ SAVE_REST
+ movq %r11, %r15
+ FIXUP_TOP_OF_STACK %r11
+ call sys_execve
+ GET_CURRENT(%rcx)
+ testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
+ jnz exec_32bit
+ RESTORE_TOP_OF_STACK %r11
+ movq %r15, %r11
+ RESTORE_REST
+ push %r11
+ ret
+
+exec_32bit:
+ movq %rax,RAX(%rsp)
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+
+/*
+ * sigreturn is special because it needs to restore all registers on return.
+ * This cannot be done with SYSRET, so use the IRET return path instead.
+ */
+ENTRY(stub_rt_sigreturn)
+ addq $8, %rsp
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %r11
+ call sys_rt_sigreturn
+ movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+
+/*
+ * Interrupt entry/exit.
+ *
+ * Interrupt entry points save only callee clobbered registers, except
+ * for signals again.
+ *
+ * Entry runs with interrupts off.
+ */
+
+/* 0(%rsp): interrupt number */
+ENTRY(common_interrupt)
+ testl $3,16(%rsp) # from kernel?
+ je 1f
+ swapgs
+1: cld
+ SAVE_ARGS
+ leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
+ addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount
+ movq PDAREF(pda_irqstackptr),%rax
+ cmoveq %rax,%rsp
+ pushq %rdi # save old stack
+ call do_IRQ
+ /* 0(%rsp): oldrsp-ARGOFFSET */
+ENTRY(ret_from_intr)
+ cli
+ popq %rdi
+ subl $1,PDAREF(pda_irqcount)
+ leaq ARGOFFSET(%rdi),%rsp
+ testl $3,CS(%rdi) # from kernel?
+ je retint_restore_args
+ /* Interrupt came from user space */
+retint_with_reschedule:
+ GET_CURRENT(%rcx)
+ cmpq $0,tsk_need_resched(%rcx)
+ jne retint_reschedule
+ cmpl $0,tsk_sigpending(%rcx)
+ jne retint_signal
+retint_restore_args_swapgs:
+ swapgs
+retint_restore_args:
+ RESTORE_ARGS 0,8
+iret_label:
+ iretq
+ .section __ex_table,"a"
+ .align 8
+ .quad iret_label,bad_iret
+ .previous
+ .section .fixup,"ax"
+ /* force a signal here? this matches i386 behaviour */
+bad_iret:
+ movq $-9999,%rdi /* better code? */
+ jmp do_exit
+ .previous
+
+retint_signal:
+ sti
+ SAVE_REST
+ movq $-1,ORIG_RAX(%rsp)
+ xorq %rsi,%rsi # oldset
+ movq %rsp,%rdi # &pt_regs
+ call do_signal
+ RESTORE_REST
+retint_signal_test:
+ cli
+ GET_CURRENT(%rcx)
+ cmpq $0,tsk_need_resched(%rcx)
+ je retint_restore_args_swapgs
+ sti
+ call schedule
+ jmp retint_signal_test
+
+retint_reschedule:
+ sti
+ call schedule
+ cli
+ jmp retint_with_reschedule
+
+/*
+ * Exception entry points.
+ */
+ .macro zeroentry sym
+ pushq $0 /* push error code/oldrax */
+ pushq %rax /* push real oldrax to the rdi slot */
+ leaq \sym(%rip),%rax
+ jmp error_entry
+ .endm
+
+ .macro errorentry sym
+ pushq %rax
+ leaq \sym(%rip),%rax
+ jmp error_entry
+ .endm
+
+/*
+ * Exception entry point. This expects an error code/orig_rax on the stack
+ * and the exception handler in %rax.
+ */
+ ALIGN
+error_entry:
+ /* rdi slot contains rax, oldrax contains error code */
+ pushq %rsi
+ movq 8(%rsp),%rsi /* load rax */
+ pushq %rdx
+ pushq %rcx
+ pushq %rsi /* store rax */
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+ cld
+ SAVE_REST
+ testl $3,CS(%rsp)
+ je error_kernelspace
+ swapgs
+ movl $1,%r15d
+error_action:
+ sti
+ movq %rdi,RDI(%rsp)
+ movq %rsp,%rdi
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq $-1,ORIG_RAX(%rsp)
+ call *%rax
+ /* r15d: swapgs flag */
+error_exit:
+ testl %r15d,%r15d
+ jz error_restore
+error_test:
+ cli
+ GET_CURRENT(%rcx)
+ cmpq $0,tsk_need_resched(%rcx)
+ jne error_reschedule
+ cmpl $0,tsk_sigpending(%rcx)
+ jne error_signal
+error_restore_swapgs:
+ swapgs
+error_restore:
+ RESTORE_REST
+ jmp retint_restore_args
+
+error_reschedule:
+ sti
+ call schedule
+ jmp error_test
+
+error_signal:
+ sti
+ xorq %rsi,%rsi
+ movq %rsp,%rdi
+ call do_signal
+error_signal_test:
+ GET_CURRENT(%rcx)
+ cli
+ cmpq $0,tsk_need_resched(%rcx)
+ je error_restore_swapgs
+ sti
+ call schedule
+ jmp error_signal_test
+
+error_kernelspace:
+ xorl %r15d,%r15d
+ cmpq $iret_label,RIP(%rsp)
+ jne error_action
+ movl $1,%r15d
+ swapgs
+ jmp error_action
+
+/*
+ * Create a kernel thread.
+ *
+ * C extern interface:
+ * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ *
+ * asm input arguments:
+ * rdi: fn, rsi: arg, rdx: flags
+ */
+ENTRY(kernel_thread)
+ FAKE_STACK_FRAME $child_rip
+ SAVE_ALL
+
+ # rdi: flags, rsi: usp, rdx: will be &pt_regs
+ movq %rdx,%rdi
+ orq $CLONE_VM, %rdi
+
+ movq $-1, %rsi
+
+ movq %rsp, %rdx
+
+ # clone now
+ call do_fork
+ # save retval on the stack so it's popped before `ret`
+ movq %rax, RAX(%rsp)
+
+ /*
+ * It isn't worth to check for reschedule here,
+ * so internally to the x86_64 port you can rely on kernel_thread()
+ * not to reschedule the child before returning, this avoids the need
+ * of hacks for example to fork off the per-CPU idle tasks.
+ * [Hopefully no generic code relies on the reschedule -AK]
+ */
+ RESTORE_ALL
+ UNFAKE_STACK_FRAME
+ ret
+
+child_rip:
+ /*
+ * Here we are in the child and the registers are set as they were
+ * at kernel_thread() invocation in the parent.
+ */
+ movq %rdi, %rax
+ movq %rsi, %rdi
+ call *%rax
+ # exit
+ xorq %rdi, %rdi
+ call do_exit
+
+/*
+ * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
+ *
+ * C extern interface:
+ * extern long execve(char *name, char **argv, char **envp)
+ *
+ * asm input arguments:
+ * rdi: name, rsi: argv, rdx: envp
+ *
+ * We want to fallback into:
+ * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ *
+ * do_sys_execve asm fallback arguments:
+ * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ */
+ENTRY(execve)
+ FAKE_STACK_FRAME $0
+ SAVE_ALL
+ call sys_execve
+ movq %rax, RAX(%rsp)
+ RESTORE_REST
+ testq %rax,%rax
+ je int_ret_from_sys_call
+ RESTORE_ARGS
+ UNFAKE_STACK_FRAME
+ ret
+
+ENTRY(page_fault)
+#ifdef CONFIG_KDB
+ pushq %rcx
+ pushq %rdx
+ pushq %rax
+ movl $473,%ecx
+ rdmsr
+ andl $0xfffffffe,%eax /* Disable last branch recording */
+ wrmsr
+ popq %rax
+ popq %rdx
+ popq %rcx
+#endif
+ errorentry do_page_fault
+
+ENTRY(coprocessor_error)
+ zeroentry do_coprocessor_error
+
+ENTRY(simd_coprocessor_error)
+ zeroentry do_simd_coprocessor_error
+
+ENTRY(device_not_available)
+ pushq $-1
+ SAVE_ALL
+ xorl %r15d,%r15d
+ testl $3,CS(%rsp)
+ jz 1f
+ swapgs
+ movl $1,%r15d
+1:
+ movq %cr0,%rax
+ leaq math_state_restore(%rip),%rcx
+ leaq math_emulate(%rip),%rbx
+ testl $0x4,%eax
+ cmoveq %rcx,%rbx
+ call *%rbx
+ jmp error_exit
+
+ENTRY(debug)
+ zeroentry do_debug
+
+ENTRY(nmi)
+ pushq $-1
+ SAVE_ALL
+ /* NMI could happen inside the critical section of a swapgs,
+ so it is needed to use this expensive way to check.
+ Rely on arch_prctl forbiding user space from setting a negative
+ GS. Only the kernel value is negative. */
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ xorl %ebx,%ebx
+ testl %edx,%edx
+ js 1f
+ swapgs
+ movl $1,%ebx
+1: movq %rsp,%rdi
+ call do_nmi
+ cli
+ testl %ebx,%ebx
+ jz error_restore
+ swapgs
+ jmp error_restore
+
+ENTRY(int3)
+ zeroentry do_int3
+
+ENTRY(overflow)
+ zeroentry do_overflow
+
+ENTRY(bounds)
+ zeroentry do_bounds
+
+ENTRY(invalid_op)
+ zeroentry do_invalid_op
+
+ENTRY(coprocessor_segment_overrun)
+ zeroentry do_coprocessor_segment_overrun
+
+ENTRY(reserved)
+ zeroentry do_reserved
+
+ENTRY(double_fault)
+ errorentry do_double_fault
+
+ENTRY(invalid_TSS)
+ errorentry do_invalid_TSS
+
+ENTRY(segment_not_present)
+ errorentry do_segment_not_present
+
+ENTRY(stack_segment)
+ errorentry do_stack_segment
+
+ENTRY(general_protection)
+ errorentry do_general_protection
+
+ENTRY(alignment_check)
+ errorentry do_alignment_check
+
+ENTRY(divide_error)
+ zeroentry do_divide_error
+
+ENTRY(spurious_interrupt_bug)
+ zeroentry do_spurious_interrupt_bug
+
+ENTRY(machine_check)
+ zeroentry do_machine_check
+
+ENTRY(call_debug)
+ zeroentry do_call_debug
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)