diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c 2007-05-20 01:46:35.000000000 +0200 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c 2007-05-20 01:51:47.000000000 +0200 @@ -29,224 +29,21 @@ #include -/* No, there's no macro saying 12*4, since it is "hard" to get it into - the asm in a good way. Thus better to expose the problem everywhere. - */ -/* Assuming 1 cycle per dword written or read (ok, not really true), and - one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1) - so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */ - -#define ZERO_BLOCK_SIZE (1*12*4) - -void *memset(void *pdst, - int c, - size_t plen) +/** + * memset - Fill a region of memory with the given value + * @s: Pointer to the start of the area. + * @c: The byte to fill the area with + * @count: The size of the area. + * + * Do not use memset() to access IO space, use memset_io() instead. + */ +void *memset(void *s, int c, size_t count) { - /* Ok. Now we want the parameters put in special registers. - Make sure the compiler is able to make something useful of this. */ - - register char *return_dst __asm__ ("r10") = pdst; - register int n __asm__ ("r12") = plen; - register int lc __asm__ ("r11") = c; - - /* Most apps use memset sanely. Only those memsetting about 3..4 - bytes or less get penalized compared to the generic implementation - - and that's not really sane use. */ - - /* Ugh. This is fragile at best. Check with newer GCC releases, if - they compile cascaded "x |= x << 8" sanely! */ - __asm__("movu.b %0,$r13\n\t" - "lslq 8,$r13\n\t" - "move.b %0,$r13\n\t" - "move.d $r13,%0\n\t" - "lslq 16,$r13\n\t" - "or.d $r13,%0" - : "=r" (lc) : "0" (lc) : "r13"); - - { - register char *dst __asm__ ("r13") = pdst; - - /* This is NONPORTABLE, but since this whole routine is */ - /* grossly nonportable that doesn't matter. */ - - if (((unsigned long) pdst & 3) != 0 - /* Oops! n=0 must be a legal call, regardless of alignment. */ - && n >= 3) - { - if ((unsigned long)dst & 1) - { - *dst = (char) lc; - n--; - dst++; - } - - if ((unsigned long)dst & 2) - { - *(short *)dst = lc; - n -= 2; - dst += 2; - } - } - - /* Now the fun part. For the threshold value of this, check the equation - above. */ - /* Decide which copying method to use. */ - if (n >= ZERO_BLOCK_SIZE) - { - /* For large copies we use 'movem' */ - - /* It is not optimal to tell the compiler about clobbering any - registers; that will move the saving/restoring of those registers - to the function prologue/epilogue, and make non-movem sizes - suboptimal. - - This method is not foolproof; it assumes that the "asm reg" - declarations at the beginning of the function really are used - here (beware: they may be moved to temporary registers). - This way, we do not have to save/move the registers around into - temporaries; we can safely use them straight away. - - If you want to check that the allocation was right; then - check the equalities in the first comment. It should say - "r13=r13, r12=r12, r11=r11" */ - __asm__ volatile (" - ;; Check that the following is true (same register names on - ;; both sides of equal sign, as in r8=r8): - ;; %0=r13, %1=r12, %4=r11 - ;; - ;; Save the registers we'll clobber in the movem process - ;; on the stack. Don't mention them to gcc, it will only be - ;; upset. - subq 11*4,$sp - movem $r10,[$sp] - - move.d $r11,$r0 - move.d $r11,$r1 - move.d $r11,$r2 - move.d $r11,$r3 - move.d $r11,$r4 - move.d $r11,$r5 - move.d $r11,$r6 - move.d $r11,$r7 - move.d $r11,$r8 - move.d $r11,$r9 - move.d $r11,$r10 - - ;; Now we've got this: - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 12*4,$r12 -0: - subq 12*4,$r12 - bge 0b - movem $r11,[$r13+] - - addq 12*4,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10" - - /* Outputs */ : "=r" (dst), "=r" (n) - /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); - - } - - /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' - and then the last block (<44 bytes) is copied here. - This will work since 'movem' will have updated src,dst,n. */ - - while ( n >= 16 ) - { - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - n -= 16; - } + char *xs = s; - /* A switch() is definitely the fastest although it takes a LOT of code. - * Particularly if you inline code this. - */ - switch (n) - { - case 0: - break; - case 1: - *(char*)dst = (char) lc; - break; - case 2: - *(short*)dst = (short) lc; - break; - case 3: - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; - break; - case 4: - *((long*)dst)++ = lc; - break; - case 5: - *((long*)dst)++ = lc; - *(char*)dst = (char) lc; - break; - case 6: - *((long*)dst)++ = lc; - *(short*)dst = (short) lc; - break; - case 7: - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; - break; - case 8: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - break; - case 9: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(char*)dst = (char) lc; - break; - case 10: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(short*)dst = (short) lc; - break; - case 11: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; - break; - case 12: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - break; - case 13: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(char*)dst = (char) lc; - break; - case 14: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(short*)dst = (short) lc; - break; - case 15: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; - break; - } - } + while (count--) + *xs++ = c; + return s; +} - return return_dst; /* destination pointer. */ -} /* memset() */ diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c linux-2.6.19.2/arch/cris/arch-v10/lib/string.c --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c 2007-05-20 01:46:35.000000000 +0200 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/string.c 2007-05-20 01:51:19.000000000 +0200 @@ -33,193 +33,21 @@ #include -void *memcpy(void *pdst, - const void *psrc, - size_t pn) + /** + * memcpy - Copy one area of memory to another + * @dest: Where to copy to + * @src: Where to copy from + * @count: The size of the area. + * + * You should not use this function to access IO space, use memcpy_toio() + * or memcpy_fromio() instead. + */ +void *memcpy(void *dest, const void *src, size_t count) { - /* Ok. Now we want the parameters put in special registers. - Make sure the compiler is able to make something useful of this. - As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). + char *tmp = dest; + const char *s = src; - If gcc was allright, it really would need no temporaries, and no - stack space to save stuff on. */ - - register void *return_dst __asm__ ("r10") = pdst; - register char *dst __asm__ ("r13") = pdst; - register const char *src __asm__ ("r11") = psrc; - register int n __asm__ ("r12") = pn; - - - /* When src is aligned but not dst, this makes a few extra needless - cycles. I believe it would take as many to check that the - re-alignment was unnecessary. */ - if (((unsigned long) dst & 3) != 0 - /* Don't align if we wouldn't copy more than a few bytes; so we - don't have to check further for overflows. */ - && n >= 3) - { - if ((unsigned long) dst & 1) - { - n--; - *(char*)dst = *(char*)src; - src++; - dst++; - } - - if ((unsigned long) dst & 2) - { - n -= 2; - *(short*)dst = *(short*)src; - src += 2; - dst += 2; - } - } - - /* Decide which copying method to use. */ - if (n >= 44*2) /* Break even between movem and - move16 is at 38.7*2, but modulo 44. */ - { - /* For large copies we use 'movem' */ - - /* It is not optimal to tell the compiler about clobbering any - registers; that will move the saving/restoring of those registers - to the function prologue/epilogue, and make non-movem sizes - suboptimal. - - This method is not foolproof; it assumes that the "asm reg" - declarations at the beginning of the function really are used - here (beware: they may be moved to temporary registers). - This way, we do not have to save/move the registers around into - temporaries; we can safely use them straight away. - - If you want to check that the allocation was right; then - check the equalities in the first comment. It should say - "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile (" - ;; Check that the following is true (same register names on - ;; both sides of equal sign, as in r8=r8): - ;; %0=r13, %1=r11, %2=r12 - ;; - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 -0: - movem [$r11+],$r10 - subq 44,$r12 - bge 0b - movem $r10,[$r13+] - - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10" - - /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) - /* Inputs */ : "0" (dst), "1" (src), "2" (n)); - - } - - /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' - and then the last block (<44 bytes) is copied here. - This will work since 'movem' will have updated src,dst,n. */ - - while ( n >= 16 ) - { - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - n -= 16; - } - - /* A switch() is definitely the fastest although it takes a LOT of code. - * Particularly if you inline code this. - */ - switch (n) - { - case 0: - break; - case 1: - *(char*)dst = *(char*)src; - break; - case 2: - *(short*)dst = *(short*)src; - break; - case 3: - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; - break; - case 4: - *((long*)dst)++ = *((long*)src)++; - break; - case 5: - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; - break; - case 6: - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; - break; - case 7: - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; - break; - case 8: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - break; - case 9: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; - break; - case 10: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; - break; - case 11: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; - break; - case 12: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - break; - case 13: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; - break; - case 14: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; - break; - case 15: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; - break; - } - - return return_dst; /* destination pointer. */ -} /* memcpy() */ + while (count--) + *tmp++ = *s++; + return dest; +} diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c 2007-05-16 22:11:26.000000000 +0200 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c 2007-05-16 23:17:41.000000000 +0200 @@ -88,63 +88,38 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12". */ - __asm__ volatile ("\ - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ - .err \n\ - .endif \n\ - - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 - -; Since the noted PC of a faulting instruction in a delay-slot of a taken -; branch, is that of the branch target, we actually point at the from-movem -; for this case. There is no ambiguity here; if there was a fault in that -; instruction (meaning a kernel oops), the faulted PC would be the address -; after *that* movem. - -0: - movem [$r11+],$r10 - subq 44,$r12 - bge 0b - movem $r10,[$r13+] -1: - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -2: - .section .fixup,\"ax\" - -; To provide a correct count in r10 of bytes that failed to be copied, -; we jump back into the loop if the loop-branch was taken. There is no -; performance penalty for sany use; the program will segfault soon enough. - -3: - move.d [$sp],$r10 - addq 44,$r10 - move.d $r10,[$sp] - jump 0b -4: - movem [$sp+],$r10 - addq 44,$r10 - addq 44,$r12 - jump 2b - - .previous - .section __ex_table,\"a\" - .dword 0b,3b - .dword 1b,4b - .previous" + __asm__ volatile ( + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t" + ".err \n\t" + ".endif \n\t" + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "subq 44,$r12\n\t" + "0:\n\t" + "movem [$r11+],$r10\n\t" + "subq 44,$r12\n\t" + "bge 0b\n\t" + "movem $r10,[$r13+]\n\t" + "1:\n\t" + "addq 44,$r12 \n\t" + "movem [$sp+],$r10\n\t" + "2:\n\t" + ".section .fixup,\"ax\"\n\t" + "3:\n\t" + "move.d [$sp],$r10\n\t" + "addq 44,$r10\n\t" + "move.d $r10,[$sp]\n\t" + "jump 0b\n\t" + "4:\n\t" + "movem [$sp+],$r10\n\t" + "addq 44,$r10\n\t" + "addq 44,$r12\n\t" + "jump 2b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n\t" + ".dword 0b,3b\n\t" + ".dword 1b,4b\n\t" + ".previous\n\t" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); @@ -253,60 +228,32 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile (" - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ - .err \n\ - .endif \n\ - - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 -0: - movem [$r11+],$r10 -1: - subq 44,$r12 - bge 0b - movem $r10,[$r13+] - - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -4: - .section .fixup,\"ax\" - -;; Do not jump back into the loop if we fail. For some uses, we get a -;; page fault somewhere on the line. Without checking for page limits, -;; we don't know where, but we need to copy accurately and keep an -;; accurate count; not just clear the whole line. To do that, we fall -;; down in the code below, proceeding with smaller amounts. It should -;; be kept in mind that we have to cater to code like what at one time -;; was in fs/super.c: -;; i = size - copy_from_user((void *)page, data, size); -;; which would cause repeated faults while clearing the remainder of -;; the SIZE bytes at PAGE after the first fault. -;; A caveat here is that we must not fall through from a failing page -;; to a valid page. - -3: - movem [$sp+],$r10 - addq 44,$r12 ;; Get back count before faulting point. - subq 44,$r11 ;; Get back pointer to faulting movem-line. - jump 4b ;; Fall through, pretending the fault didn't happen. - - .previous - .section __ex_table,\"a\" - .dword 1b,3b - .previous" + __asm__ volatile ( + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t" + ".err \n\t" + ".endif \n\t" + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "subq 44,$r12\n\t" + "0:\n\t" + "movem [$r11+],$r10\n\t" + "1:\n\t" + "subq 44,$r12\n\t" + "bge 0b\n\t" + "movem $r10,[$r13+]\n\t" + "addq 44,$r12 \n\t" + "movem [$sp+],$r10\n\t" + "4:\n\t" + ".section .fixup,\"ax\"\n\t" + "3:\n\t" + "movem [$sp+],$r10\n\t" + "addq 44,$r12\n\t" + "subq 44,$r11\n\t" + "jump 4b \n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n\t" + ".dword 1b,3b\n\t" + ".previous\n\t" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); @@ -425,66 +372,50 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say something like "r13=r13, r11=r11, r12=r12". */ - __asm__ volatile (" - .ifnc %0%1%2,$r13$r12$r10 \n\ - .err \n\ - .endif \n\ - - ;; Save the registers we'll clobber in the movem process - ;; on the stack. Don't mention them to gcc, it will only be - ;; upset. - subq 11*4,$sp - movem $r10,[$sp] - - clear.d $r0 - clear.d $r1 - clear.d $r2 - clear.d $r3 - clear.d $r4 - clear.d $r5 - clear.d $r6 - clear.d $r7 - clear.d $r8 - clear.d $r9 - clear.d $r10 - clear.d $r11 - - ;; Now we've got this: - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 12*4,$r12 -0: - subq 12*4,$r12 - bge 0b - movem $r11,[$r13+] -1: - addq 12*4,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -2: - .section .fixup,\"ax\" -3: - move.d [$sp],$r10 - addq 12*4,$r10 - move.d $r10,[$sp] - clear.d $r10 - jump 0b - -4: - movem [$sp+],$r10 - addq 12*4,$r10 - addq 12*4,$r12 - jump 2b - - .previous - .section __ex_table,\"a\" - .dword 0b,3b - .dword 1b,4b - .previous" - + __asm__ volatile ( + ".ifnc %0%1%2,$r13$r12$r10\n\t" + ".err \n\t" + ".endif\n\t" + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "clear.d $r0\n\t" + "clear.d $r1\n\t" + "clear.d $r2\n\t" + "clear.d $r3\n\t" + "clear.d $r4\n\t" + "clear.d $r5\n\t" + "clear.d $r6\n\t" + "clear.d $r7\n\t" + "clear.d $r8\n\t" + "clear.d $r9\n\t" + "clear.d $r10\n\t" + "clear.d $r11\n\t" + "subq 12*4,$r12\n\t" + "0:\n\t" + "subq 12*4,$r12\n\t" + "bge 0b\n\t" + "movem $r11,[$r13+]\n\t" + "1: \n\t" + "addq 12*4,$r12 \n\t" + "movem [$sp+],$r10\n\t" + "2:\n\t" + ".section .fixup,\"ax\"\n\t" + "3:\n\t" + "move.d [$sp],$r10\n\t" + "addq 12*4,$r10\n\t" + "move.d $r10,[$sp]\n\t" + "clear.d $r10\n\t" + "jump 0b\n\t" + "4:\n\t" + "movem [$sp+],$r10\n\t" + "addq 12*4,$r10\n\t" + "addq 12*4,$r12\n\t" + "jump 2b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n\t" + ".dword 0b,3b\n\t" + ".dword 1b,4b\n\t" + ".previous\n\t" /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) /* Inputs */ : "0" (dst), "1" (n), "2" (retn) /* Clobber */ : "r11");