patch-2.4.20 linux-2.4.20/arch/x86_64/lib/copy_user.S
Next file: linux-2.4.20/arch/x86_64/lib/csum-copy.S
Previous file: linux-2.4.20/arch/x86_64/lib/copy_page.S
Back to the patch index
Back to the overall index
- Lines: 237
- Date:
Thu Nov 28 15:53:12 2002
- Orig file:
linux-2.4.19/arch/x86_64/lib/copy_user.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.19/arch/x86_64/lib/copy_user.S linux-2.4.20/arch/x86_64/lib/copy_user.S
@@ -0,0 +1,236 @@
+/* Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License v2.
+ *
+ * Functions to copy from and to user space.
+ */
+
+#define FIX_ALIGNMENT 1
+
+#define movnti movq /* write to cache for now */
+#define prefetch prefetcht2
+
+ #include <asm/current.h>
+ #include <asm/offset.h>
+
+/* Standard copy_to_user with segment limit checking */
+ .globl copy_to_user
+ .p2align
+copy_to_user:
+ GET_CURRENT(%rax)
+ movq %rdi,%rcx
+ addq %rdx,%rcx
+ jc bad_to_user
+ cmpq tsk_addr_limit(%rax),%rcx
+ jae bad_to_user
+ jmp copy_user_generic
+
+/* Standard copy_from_user with segment limit checking */
+ .globl copy_from_user
+ .p2align
+copy_from_user:
+ GET_CURRENT(%rax)
+ movq %rsi,%rcx
+ addq %rdx,%rcx
+ jc bad_from_user
+ cmpq tsk_addr_limit(%rax),%rcx
+ jae bad_from_user
+ /* FALL THROUGH to copy_user_generic */
+
+ .section .fixup,"ax"
+ /* must zero dest */
+bad_from_user:
+ movl %edx,%ecx
+ xorl %eax,%eax
+ rep
+ stosb
+bad_to_user:
+ movl %edx,%eax
+ ret
+ .previous
+
+/*
+ * copy_user_generic - memory copy with exception handling.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successfull.
+ */
+ .globl copy_user_generic
+copy_user_generic:
+ /* Put the first cacheline into cache. This should handle
+ the small movements in ioctls etc., but not penalize the bigger
+ filesystem data copies too much. */
+ pushq %rbx
+ prefetch (%rsi)
+ xorl %eax,%eax /*zero for the exception handler */
+
+#ifdef FIX_ALIGNMENT
+ /* check for bad alignment of destination */
+ movl %edi,%ecx
+ andl $7,%ecx
+ jnz bad_alignment
+after_bad_alignment:
+#endif
+
+ movq %rdx,%rcx
+
+ movl $64,%ebx
+ shrq $6,%rdx
+ decq %rdx
+ js handle_tail
+ jz loop_no_prefetch
+
+loop:
+ prefetch 64(%rsi)
+
+loop_no_prefetch:
+s1: movq (%rsi),%r11
+s2: movq 1*8(%rsi),%r8
+s3: movq 2*8(%rsi),%r9
+s4: movq 3*8(%rsi),%r10
+d1: movnti %r11,(%rdi)
+d2: movnti %r8,1*8(%rdi)
+d3: movnti %r9,2*8(%rdi)
+d4: movnti %r10,3*8(%rdi)
+
+s5: movq 4*8(%rsi),%r11
+s6: movq 5*8(%rsi),%r8
+s7: movq 6*8(%rsi),%r9
+s8: movq 7*8(%rsi),%r10
+d5: movnti %r11,4*8(%rdi)
+d6: movnti %r8,5*8(%rdi)
+d7: movnti %r9,6*8(%rdi)
+d8: movnti %r10,7*8(%rdi)
+
+ addq %rbx,%rsi
+ addq %rbx,%rdi
+
+ decq %rdx
+ jz loop_no_prefetch
+ jns loop
+
+handle_tail:
+ movl %ecx,%edx
+ andl $63,%ecx
+ shrl $3,%ecx
+ jz handle_7
+ movl $8,%ebx
+loop_8:
+s9: movq (%rsi),%r8
+d9: movq %r8,(%rdi)
+ addq %rbx,%rdi
+ addq %rbx,%rsi
+ decl %ecx
+ jnz loop_8
+
+handle_7:
+ movl %edx,%ecx
+ andl $7,%ecx
+ jz ende
+loop_1:
+s10: movb (%rsi),%bl
+d10: movb %bl,(%rdi)
+ incq %rdi
+ incq %rsi
+ decl %ecx
+ jnz loop_1
+
+ende:
+ sfence
+ popq %rbx
+ ret
+
+#ifdef FIX_ALIGNMENT
+ /* align destination */
+bad_alignment:
+ movl $8,%r9d
+ subl %ecx,%r9d
+ movl %r9d,%ecx
+ subq %r9,%rdx
+ jz small_align
+ js small_align
+align_1:
+s11: movb (%rsi),%bl
+d11: movb %bl,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz align_1
+ jmp after_bad_alignment
+small_align:
+ addq %r9,%rdx
+ jmp handle_7
+#endif
+
+ /* table sorted by exception address */
+ .section __ex_table,"a"
+ .align 8
+ .quad s1,s1e
+ .quad s2,s2e
+ .quad s3,s3e
+ .quad s4,s4e
+ .quad d1,s1e
+ .quad d2,s2e
+ .quad d3,s3e
+ .quad d4,s4e
+ .quad s5,s5e
+ .quad s6,s6e
+ .quad s7,s7e
+ .quad s8,s8e
+ .quad d5,s5e
+ .quad d6,s6e
+ .quad d7,s7e
+ .quad d8,s8e
+ .quad s9,e_quad
+ .quad d9,e_quad
+ .quad s10,e_byte
+ .quad d10,e_byte
+#ifdef FIX_ALIGNMENT
+ .quad s11,e_byte
+ .quad d11,e_byte
+#endif
+ .quad e5,e_zero
+ .previous
+
+ /* compute 64-offset for main loop. 8 bytes accuracy with error on the
+ pessimistic side. this is gross. it would be better to fix the
+ interface. */
+ /* eax: zero, ebx: 64 */
+s1e: addl $8,%eax
+s2e: addl $8,%eax
+s3e: addl $8,%eax
+s4e: addl $8,%eax
+s5e: addl $8,%eax
+s6e: addl $8,%eax
+s7e: addl $8,%eax
+s8e: addl $8,%eax
+ addq %rbx,%rdi /* +64 */
+ subq %rax,%rdi /* correct destination with computed offset */
+
+ shlq $6,%rdx /* loop counter * 64 (stride length) */
+ addq %rax,%rdx /* add offset to loopcnt */
+ andl $63,%ecx /* remaining bytes */
+ addq %rcx,%rdx /* add them */
+ jmp zero_rest
+
+ /* exception on quad word loop in tail handling */
+ /* ecx: loopcnt/8, %edx: length, rdi: correct */
+e_quad:
+ shll $3,%ecx
+ andl $7,%edx
+ addl %ecx,%edx
+ /* edx: bytes to zero, rdi: dest, eax:zero */
+zero_rest:
+ movq %rdx,%rcx
+e_byte:
+ xorl %eax,%eax
+e5: rep
+ stosb
+ /* when there is another exception while zeroing the rest just return */
+e_zero:
+ movq %rdx,%rax
+ jmp ende
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)