patch-2.4.22 linux-2.4.22/arch/parisc/kernel/unaligned.c

Next file: linux-2.4.22/arch/parisc/math-emu/dfadd.c
Previous file: linux-2.4.22/arch/parisc/kernel/traps.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.21/arch/parisc/kernel/unaligned.c linux-2.4.22/arch/parisc/kernel/unaligned.c
@@ -3,6 +3,7 @@
  *    Unaligned memory access handler
  *
  *    Copyright (C) 2001 Randolph Chung <tausq@debian.org>
+ *    Significantly tweaked by LaMont Jones <lamont@debian.org>
  *
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
@@ -66,28 +67,28 @@
 #define OPCODE3_MASK	OPCODE3(0x3f,1)
 #define OPCODE4_MASK    OPCODE4(0x3f)
 
-/* skip LDB (index) */
+/* skip LDB - never unaligned (index) */
 #define OPCODE_LDH_I	OPCODE1(0x03,0,0x1)
 #define OPCODE_LDW_I	OPCODE1(0x03,0,0x2)
 #define OPCODE_LDD_I	OPCODE1(0x03,0,0x3)
 #define OPCODE_LDDA_I	OPCODE1(0x03,0,0x4)
-/* skip LDCD (index) */
+#define OPCODE_LDCD_I	OPCODE1(0x03,0,0x5)
 #define OPCODE_LDWA_I	OPCODE1(0x03,0,0x6)
-/* skip LDCW (index) */
-/* skip LDB (short) */
+#define OPCODE_LDCW_I	OPCODE1(0x03,0,0x7)
+/* skip LDB - never unaligned (short) */
 #define OPCODE_LDH_S	OPCODE1(0x03,1,0x1)
 #define OPCODE_LDW_S	OPCODE1(0x03,1,0x2)
 #define OPCODE_LDD_S	OPCODE1(0x03,1,0x3)
 #define OPCODE_LDDA_S	OPCODE1(0x03,1,0x4)
-/* skip LDCD (short) */
+#define OPCODE_LDCD_S	OPCODE1(0x03,1,0x5)
 #define OPCODE_LDWA_S	OPCODE1(0x03,1,0x6)
-/* skip LDCW (short) */
-/* skip STB */
+#define OPCODE_LDCW_S	OPCODE1(0x03,1,0x7)
+/* skip STB - never unaligned */
 #define OPCODE_STH	OPCODE1(0x03,1,0x9)
 #define OPCODE_STW	OPCODE1(0x03,1,0xa)
 #define OPCODE_STD	OPCODE1(0x03,1,0xb)
-/* skip STBY */
-/* skip STDBY */
+/* skip STBY - never unaligned */
+/* skip STDBY - never unaligned */
 #define OPCODE_STWA	OPCODE1(0x03,1,0xe)
 #define OPCODE_STDA	OPCODE1(0x03,1,0xf)
 
@@ -103,129 +104,200 @@
 
 #define OPCODE_LDH_L    OPCODE4(0x11)
 #define OPCODE_LDW_L    OPCODE4(0x12)
-#define OPCODE_LDW_L2   OPCODE4(0x13)
+#define OPCODE_LDWM     OPCODE4(0x13)
 #define OPCODE_STH_L    OPCODE4(0x19)
 #define OPCODE_STW_L    OPCODE4(0x1A)
-#define OPCODE_STW_L2   OPCODE4(0x1B)
+#define OPCODE_STWM     OPCODE4(0x1B)
+
+#define MAJOR_OP(i) (((i)>>26)&0x3f)
+#define R1(i) (((i)>>21)&0x1f)
+#define R2(i) (((i)>>16)&0x1f)
+#define R3(i) ((i)&0x1f)
+#define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0))
+#define IM5_2(i) IM((i)>>16,5)
+#define IM5_3(i) IM((i),5)
+#define IM14(i) IM((i),14)
 
 int unaligned_enabled = 1;
 
 void die_if_kernel (char *str, struct pt_regs *regs, long err);
 
-static int emulate_load(struct pt_regs *regs, int len, int toreg)
+static int emulate_ldh(struct pt_regs *regs, int toreg)
 {
 	unsigned long saddr = regs->ior;
 	unsigned long val = 0;
-	int ret = 0;
 
-	if (regs->isr != regs->sr[7])
-	{
-		printk(KERN_CRIT "isr verification failed (isr: " RFMT ", sr7: " RFMT "\n",
-			regs->isr, regs->sr[7]);
-		return 1;
-	}
+	DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", 
+		regs->isr, regs->ior, toreg);
+
+	__asm__ __volatile__  (
+"	mtsp	%3, %%sr1\n"
+"	ldbs	0(%%sr1,%2), %%r20\n"
+"	ldbs	1(%%sr1,%2), %0\n"
+	"depw	%%r20, 23, 24, %0\n"
+	: "=r" (val)
+	: "0" (val), "r" (saddr), "r" (regs->isr)
+	: "r20" );
+
+	DPRINTF("val = 0x" RFMT "\n", val);
+
+	if (toreg)
+		regs->gr[toreg] = val;
+
+	return 0;
+}
+static int emulate_ldw(struct pt_regs *regs, int toreg)
+{
+	unsigned long saddr = regs->ior;
+	unsigned long val = 0;
 
-	DPRINTF("load " RFMT ":" RFMT " to r%d for %d bytes\n", 
-		regs->isr, regs->ior, toreg, len);
+	DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", 
+		regs->isr, regs->ior, toreg);
 
 	__asm__ __volatile__  (
-"       mfsp %%sr1, %%r20\n"
-"       mtsp %6, %%sr1\n"
-"	copy %%r0, %0\n"
-"0:	ldbs,ma	1(%%sr1,%4), %%r19\n"
-"	addi -1, %5, %5\n"
-"	cmpib,>= 0, %5, 2f\n"
-"	or %%r19, %0, %0\n"
-"	b 0b\n"
-	
-#ifdef __LP64__
-	"depd,z %0, 55, 56, %0\n"
-#else
-	"depw,z %0, 23, 24, %0\n"
-#endif
-	
-"1:	ldi	10, %1\n"
-"2:     mtsp %%r20, %%sr1\n"
-"	.section __ex_table,\"a\"\n"
+"	zdep	%2,28,2,%%r19\n"		/* r19=(ofs&3)*8 */
+"	mtsp	%3, %%sr1\n"
+"	depw	%%r0,31,2,%2\n"
+"	ldw	0(%%sr1,%2),%0\n"
+"	ldw	4(%%sr1,%2),%%r20\n"
+"	subi	32,%%r19,%%r19\n"
+"	mtctl	%%r19,11\n"
+"	vshd	%0,%%r20,%0\n"
+	: "=r" (val)
+	: "0" (val), "r" (saddr), "r" (regs->isr)
+	: "r19", "r20" );
+
+	DPRINTF("val = 0x" RFMT "\n", val);
+
+	if (toreg)
+		regs->gr[toreg] = val;
+
+	return 0;
+}
 #ifdef __LP64__
-	".dword 0b, (1b-0b)\n"
-#else
-	".word 0b, (1b-0b)\n"
-#endif
-	".previous\n" 
-	: "=r" (val), "=r" (ret)
-	: "0" (val), "1" (ret), "r" (saddr), "r" (len), "r" (regs->isr)
+static int emulate_ldd(struct pt_regs *regs, int toreg)
+{
+	unsigned long saddr = regs->ior;
+	unsigned long val = 0;
+
+	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", 
+		regs->isr, regs->ior, toreg);
+
+	__asm__ __volatile__  (
+"	depd,z	%2,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
+"	mtsp	%3, %%sr1\n"
+"	depd	%%r0,63,3,%2\n"
+"	ldd	0(%%sr1,%2),%0\n"
+"	ldd	8(%%sr1,%2),%%r20\n"
+"	subi	64,%%r19,%%r19\n"
+"	mtsar	%%r19\n"
+"	shrpd	%0,%%r20,%%sar,%0\n"
+	: "=r" (val)
+	: "0" (val), "r" (saddr), "r" (regs->isr)
 	: "r19", "r20" );
 
 	DPRINTF("val = 0x" RFMT "\n", val);
 
-	regs->gr[toreg] = val;
+	if (toreg)
+		regs->gr[toreg] = val;
 
-	return ret;
+	return 0;
 }
+#endif
 
-static int emulate_store(struct pt_regs *regs, int len, int frreg)
+static int emulate_sth(struct pt_regs *regs, int frreg)
 {
-	int ret = 0;
-#ifdef __LP64__
-	unsigned long val = regs->gr[frreg] << (64 - (len << 3));
-#else
-	unsigned long val = regs->gr[frreg] << (32 - (len << 3));
-#endif
+	unsigned long val = regs->gr[frreg];
+	if (!frreg)
+		val = 0;
 
-	if (regs->isr != regs->sr[7])
-	{
-		printk(KERN_CRIT "isr verification failed (isr: " RFMT ", sr7: " RFMT "\n",
-			regs->isr, regs->sr[7]);
-		return 1;
-	}
+	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, 
+		regs->gr[frreg], regs->isr, regs->ior);
+
+	__asm__ __volatile__ (
+"	mtsp %2, %%sr1\n"
+"	extrw,u %0, 23, 8, %%r19\n"
+"	stb %0, 1(%%sr1, %1)\n"
+"	stb %%r19, 0(%%sr1, %1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19" );
+
+	return 0;
+}
+static int emulate_stw(struct pt_regs *regs, int frreg)
+{
+	unsigned long val = regs->gr[frreg];
+	if (!frreg)
+		val = 0;
 
-	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for %d bytes\n", frreg, 
-		regs->gr[frreg], regs->isr, regs->ior, len);
+	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, 
+		regs->gr[frreg], regs->isr, regs->ior);
 
 
 	__asm__ __volatile__ (
-"       mfsp %%sr1, %%r20\n"		/* save sr1 */
-"       mtsp %5, %%sr1\n"
-#ifdef __LP64__
-"0:	extrd,u %2, 7, 8, %%r19\n"
-#else
-"0:	extrw,u %2, 7, 8, %%r19\n"
-#endif
-"1:	stb,ma %%r19, 1(%%sr1, %3)\n"
-"	addi -1, %4, %4\n"
-"	cmpib,>= 0, %4, 3f\n"
-	
+"	mtsp %2, %%sr1\n"
+"	zdep	%1, 28, 2, %%r19\n"
+"	dep	%%r0, 31, 2, %1\n"
+"	mtsar	%%r19\n"
+"	depwi,z	-2, %%sar, 32, %%r19\n"
+"	ldw	0(%%sr1,%1),%%r20\n"
+"	ldw	4(%%sr1,%1),%%r21\n"
+"	vshd	%%r0, %0, %%r22\n"
+"	vshd	%0, %%r0, %%r1\n"
+"	and	%%r20, %%r19, %%r20\n"
+"	andcm	%%r21, %%r19, %%r21\n"
+"	or	%%r22, %%r20, %%r20\n"
+"	or	%%r1, %%r21, %%r21\n"
+"	stw	%%r20,0(%%sr1,%1)\n"
+"	stw	%%r21,4(%%sr1,%1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19", "r20", "r21", "r22", "r1" );
+
+	return 0;
+}
 #ifdef __LP64__
-	"depd,z %2, 55, 56, %2\n"
-#else
-	"depw,z %2, 23, 24, %2\n"
-#endif
+static int emulate_std(struct pt_regs *regs, int frreg)
+{
+	unsigned long val = regs->gr[frreg];
+	if (!frreg)
+		val = 0;
 
-"	b 0b\n"
-"	nop\n"
+	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 8 bytes\n", frreg, 
+		regs->gr[frreg], regs->isr, regs->ior);
 
-"2:	ldi 11, %0\n"
-"3:     mtsp %%r20, %%sr1\n"
-"	.section __ex_table,\"a\"\n"
-#ifdef __LP64__
-	".dword 1b, (2b-1b)\n"
-#else
-	".word 1b, (2b-1b)\n"
-#endif
-	".previous\n" 
-	: "=r" (ret)
-	: "0" (ret), "r" (val), "r" (regs->ior), "r" (len), "r" (regs->isr)
-	: "r19", "r20" );
 
-	return ret;
-}
+	__asm__ __volatile__ (
+"	mtsp %2, %%sr1\n"
+"	depd,z	%1, 60, 3, %%r19\n"
+"	depd	%%r0, 63, 3, %1\n"
+"	mtsar	%%r19\n"
+"	depdi,z	-2, %%sar, 64, %%r19\n"
+"	ldd	0(%%sr1,%1),%%r20\n"
+"	ldd	8(%%sr1,%1),%%r21\n"
+"	shrpd	%%r0, %0, %%sar, %%r22\n"
+"	shrpd	%0, %%r0, %%sar, %%r1\n"
+"	and	%%r20, %%r19, %%r20\n"
+"	andcm	%%r21, %%r19, %%r21\n"
+"	or	%%r22, %%r20, %%r20\n"
+"	or	%%r1, %%r21, %%r21\n"
+"	std	%%r20,0(%%sr1,%1)\n"
+"	std	%%r21,8(%%sr1,%1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19", "r20", "r21", "r22", "r1" );
 
+	return 0;
+}
+#endif
 
 void handle_unaligned(struct pt_regs *regs)
 {
 	unsigned long unaligned_count = 0;
 	unsigned long last_time = 0;
+	unsigned long newbase = regs->gr[R1(regs->iir)];
+	int modify = 0;
 	int ret = -1;
 	struct siginfo si;
 
@@ -284,83 +356,169 @@
 	if (!unaligned_enabled)
 		goto force_sigbus;
 
+	/* handle modification - OK, it's ugly, see the instruction manual */
+	switch (MAJOR_OP(regs->iir))
+	{
+	case 0x03:
+	case 0x09:
+	case 0x0b:
+		if (regs->iir&0x20)
+		{
+			modify = 1;
+			if (regs->iir&0x1000)		/* short loads */
+				if (regs->iir&0x200)
+					newbase += IM5_3(regs->iir);
+				else
+					newbase += IM5_2(regs->iir);
+			else if (regs->iir&0x2000)	/* scaled indexed */
+			{
+				int shift=0;
+				switch (regs->iir & OPCODE1_MASK)
+				{
+				case OPCODE_LDH_I:
+					shift= 1; break;
+				case OPCODE_LDW_I:
+					shift= 2; break;
+				case OPCODE_LDD_I:
+				case OPCODE_LDDA_I:
+					shift= 3; break;
+				}
+				newbase += (R2(regs->iir)?regs->gr[R2(regs->iir)]:0)<<shift;
+			} else				/* simple indexed */
+				newbase += (R2(regs->iir)?regs->gr[R2(regs->iir)]:0);
+		}
+		break;
+	case 0x13:
+	case 0x1b:
+		modify = 1;
+		newbase += IM14(regs->iir);
+		break;
+	case 0x14:
+	case 0x1c:
+		if (regs->iir&8)
+		{
+			modify = 1;
+			newbase += IM14(regs->iir&~0xe);
+		}
+		break;
+	case 0x16:
+	case 0x1e:
+		modify = 1;
+		newbase += IM14(regs->iir&6);
+		break;
+	case 0x17:
+	case 0x1f:
+		if (regs->iir&4)
+		{
+			modify = 1;
+			newbase += IM14(regs->iir&~4);
+		}
+		break;
+	}
+
+	if (regs->isr != regs->sr[7])
+	{
+		printk(KERN_CRIT "isr verification failed (isr: " RFMT ", sr7: " RFMT "\n",
+			regs->isr, regs->sr[7]);
+
+		/* don't kill him though, since he has appropriate access to the page, or we
+		 * would never have gotten here.
+		 */
+	}
+
 	/* TODO: make this cleaner... */
 	switch (regs->iir & OPCODE1_MASK)
 	{
 	case OPCODE_LDH_I:
 	case OPCODE_LDH_S:
-		ret = emulate_load(regs, 2, regs->iir & 0x1f);
+		ret = emulate_ldh(regs, R3(regs->iir));
 		break;
 
 	case OPCODE_LDW_I:
 	case OPCODE_LDWA_I:
 	case OPCODE_LDW_S:
 	case OPCODE_LDWA_S:
-		ret = emulate_load(regs, 4, regs->iir&0x1f);
-		break;
-
-	case OPCODE_LDD_I:
-	case OPCODE_LDDA_I:
-	case OPCODE_LDD_S:
-	case OPCODE_LDDA_S:
-		ret = emulate_load(regs, 8, regs->iir&0x1f);
+		ret = emulate_ldw(regs, R3(regs->iir));
 		break;
 
 	case OPCODE_STH:
-		ret = emulate_store(regs, 2, (regs->iir>>16)&0x1f);
+		ret = emulate_sth(regs, R2(regs->iir));
 		break;
 
 	case OPCODE_STW:
 	case OPCODE_STWA:
-		ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+		ret = emulate_stw(regs, R2(regs->iir));
+		break;
+
+#ifdef __LP64__
+	case OPCODE_LDD_I:
+	case OPCODE_LDDA_I:
+	case OPCODE_LDD_S:
+	case OPCODE_LDDA_S:
+		ret = emulate_ldd(regs, R3(regs->iir));
 		break;
 
 	case OPCODE_STD:
 	case OPCODE_STDA:
-		ret = emulate_store(regs, 8, (regs->iir>>16)&0x1f);
+		ret = emulate_std(regs, R2(regs->iir));
+		break;
+#endif
+
+	case OPCODE_LDCD_I:
+	case OPCODE_LDCW_I:
+	case OPCODE_LDCD_S:
+	case OPCODE_LDCW_S:
+		ret = -1;	/* "undefined", but lets kill them. */
 		break;
 	}
+#ifdef __LP64__
 	switch (regs->iir & OPCODE2_MASK)
 	{
 	case OPCODE_LDD_L:
 	case OPCODE_FLDD_L:
-		ret = emulate_load(regs, 8, (regs->iir>>16)&0x1f);
+		ret = emulate_ldd(regs, R2(regs->iir));
 		break;
 
 	case OPCODE_STD_L:
 	case OPCODE_FSTD_L:
-		ret = emulate_store(regs, 8, (regs->iir>>16)&0x1f);
+		ret = emulate_std(regs, R2(regs->iir));
 		break;
 	}
+#endif
 	switch (regs->iir & OPCODE3_MASK)
 	{
 	case OPCODE_LDW_M:
 	case OPCODE_FLDW_L:
-		ret = emulate_load(regs, 4, (regs->iir>>16)&0x1f);
+		ret = emulate_ldw(regs, R2(regs->iir));
 		break;
 
 	case OPCODE_FSTW_L:
 	case OPCODE_STW_M:
-		ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+		ret = emulate_stw(regs, R2(regs->iir));
 		break;
 	}
 	switch (regs->iir & OPCODE4_MASK)
 	{
 	case OPCODE_LDH_L:
-		ret = emulate_load(regs, 2, (regs->iir>>16)&0x1f);
+		ret = emulate_ldh(regs, R2(regs->iir));
 		break;
 	case OPCODE_LDW_L:
-	case OPCODE_LDW_L2:
-		ret = emulate_load(regs, 4, (regs->iir>>16)&0x1f);
+	case OPCODE_LDWM:
+		ret = emulate_ldw(regs, R2(regs->iir));
 		break;
 	case OPCODE_STH_L:
-		ret = emulate_store(regs, 2, (regs->iir>>16)&0x1f);
+		ret = emulate_sth(regs, R2(regs->iir));
 		break;
 	case OPCODE_STW_L:
-	case OPCODE_STW_L2:
-		ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+	case OPCODE_STWM:
+		ret = emulate_stw(regs, R2(regs->iir));
 		break;
 	}
+	/* XXX LJ - need to handle float load/store */
+
+	if (modify && R1(regs->iir))
+		regs->gr[R1(regs->iir)] = newbase;
+
 
 	if (ret < 0)
 		printk(KERN_CRIT "Not-handled unaligned insn 0x%08lx\n", regs->iir);
@@ -424,9 +582,9 @@
 			align_mask = 1UL;
 			break;
 		case OPCODE_LDW_L:
-		case OPCODE_LDW_L2:
+		case OPCODE_LDWM:
 		case OPCODE_STW_L:
-		case OPCODE_STW_L2:
+		case OPCODE_STWM:
 			align_mask = 3UL;
 			break;
 		}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)