diff options
Diffstat (limited to 'toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch')
-rw-r--r-- | toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch | 1139 |
1 files changed, 0 insertions, 1139 deletions
diff --git a/toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch b/toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch deleted file mode 100644 index 8518ccf66..000000000 --- a/toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch +++ /dev/null @@ -1,1139 +0,0 @@ -Subject: [PATCH] AVR32-optimized string operations - -Add hand-optimized AVR32-specific string operations. Some of them -need a bit more testing, though. - ---- - - libc/string/avr32/Makefile | 40 +++++++++++ - libc/string/avr32/bcopy.S | 15 ++++ - libc/string/avr32/bzero.S | 12 +++ - libc/string/avr32/memchr.S | 62 +++++++++++++++++ - libc/string/avr32/memcmp.S | 50 +++++++++++++ - libc/string/avr32/memcpy.S | 110 ++++++++++++++++++++++++++++++ - libc/string/avr32/memmove.S | 114 +++++++++++++++++++++++++++++++ - libc/string/avr32/memset.S | 60 ++++++++++++++++ - libc/string/avr32/strcat.S | 95 ++++++++++++++++++++++++++ - libc/string/avr32/strcmp.S | 80 ++++++++++++++++++++++ - libc/string/avr32/strcpy.S | 63 +++++++++++++++++ - libc/string/avr32/stringtest.c | 144 ++++++++++++++++++++++++++++++++++++++++ - libc/string/avr32/strlen.S | 52 ++++++++++++++ - libc/string/avr32/strncpy.S | 77 +++++++++++++++++++++ - libc/string/avr32/test_memcpy.c | 66 ++++++++++++++++++ - 15 files changed, 1040 insertions(+) - -Index: uClibc-0.9.28-avr32/libc/string/avr32/bcopy.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/bcopy.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,15 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+ .text -+ .global bcopy -+ .type bcopy, @function -+ .align 1 -+bcopy: -+ /* Swap the first two arguments */ -+ eor r11, r12 -+ eor r12, r11 -+ eor r11, r12 -+ rjmp __memmove -+ .size bcopy, . - bcopy -Index: uClibc-0.9.28-avr32/libc/string/avr32/bzero.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/bzero.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,12 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+ .text -+ .global bzero -+ .type bzero, @function -+ .align 1 -+bzero: -+ mov r10, r11 -+ mov r11, 0 -+ rjmp __memset -Index: uClibc-0.9.28-avr32/libc/string/avr32/Makefile -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/Makefile 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,40 @@ -+# Makefile for uClibc -+# -+# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org> -+# -+# This program is free software; you can redistribute it and/or modify it under -+# the terms of the GNU Library General Public License as published by the Free -+# Software Foundation; either version 2 of the License, or (at your option) any -+# later version. -+# -+# This program is distributed in the hope that it will be useful, but WITHOUT -+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more -+# details. -+# -+# You should have received a copy of the GNU Library General Public License -+# along with this program; if not, write to the Free Software Foundation, Inc., -+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+TOPDIR=../../../ -+include $(TOPDIR)Rules.mak -+ -+SSRC := bcopy.S bzero.S memcmp.S memcpy.S memmove.S -+SSRC += memset.S strcmp.S strlen.S -+# memchr.S, strcat.S, strcpy.S, strncpy.S is broken -+SOBJS := $(patsubst %.S,%.o, $(SSRC)) -+OBJS := $(SOBJS) -+ -+OBJ_LIST:= ../../obj.string.$(TARGET_ARCH) -+ -+all: $(OBJ_LIST) -+ -+$(OBJ_LIST): $(OBJS) -+ echo $(addprefix string/$(TARGET_ARCH)/, $(OBJS)) > $@ -+ -+$(SOBJS): %.o: %.S -+ $(CC) $(ASFLAGS) -c $< -o $@ -+ $(STRIPTOOL) -x -R .note -R .comment $@ -+ -+clean: -+ $(RM) *.[oa] *~ core -Index: uClibc-0.9.28-avr32/libc/string/avr32/memchr.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/memchr.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+#define str r12 -+#define chr r11 -+#define len r10 -+ -+ .text -+ .global memchr -+ .type memchr, @function -+memchr: -+ or chr, chr, chr << 8 -+ or chr, chr, chr << 16 -+ -+ mov r9, str -+ andl r9, 3, COH -+ brne .Lunaligned_str -+ -+1: sub len, 4 -+ brlt 2f -+ ld.w r8, str++ -+ psub.b r9, r8, r11 -+ tnbz r9 -+ brne 1b -+ -+ sub str, 4 -+ bfextu r9, r8, 24, 8 -+ cp.b r9, r11 -+ reteq str -+ sub str, -1 -+ bfextu r9, r8, 16, 8 -+ cp.b r9, r11 -+ reteq str -+ sub str, -1 -+ bfextu r9, r8, 8, 8 -+ cp.b r9, r11 -+ reteq str -+ sub str, -1 -+ retal str -+ -+2: sub len, -4 -+ reteq 0 -+ -+3: ld.ub r8, str++ -+ cp.w r8, 0 -+ reteq str -+ sub len, 1 -+ brne 3b -+ -+ retal 0 -+ -+.Lunaligned_str: -+1: sub len, 1 -+ retlt 0 -+ ld.ub r8, str++ -+ cp.b r8, r11 -+ reteq str -+ sub r9, 1 -+ brge 1b -+ -+ rjmp .Laligned_search -Index: uClibc-0.9.28-avr32/libc/string/avr32/memcmp.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/memcmp.S 2006-10-20 10:42:09.000000000 +0200 -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway. -+ */ -+ -+#define s1 r12 -+#define s2 r11 -+#define len r10 -+ -+ .text -+ .global memcmp -+ .type memcmp, @function -+ .align 1 -+memcmp: -+ sub len, 4 -+ brlt .Lless_than_4 -+ -+1: ld.w r8, s1++ -+ ld.w r9, s2++ -+ cp.w r8, r9 -+ brne .Lfound_word -+ sub len, 4 -+ brge 1b -+ -+.Lless_than_4: -+ sub len, -4 -+ reteq 0 -+ -+1: ld.ub r8, s1++ -+ ld.ub r9, s2++ -+ sub r8, r9 -+ retne r8 -+ sub len, 1 -+ brgt 1b -+ -+ retal 0 -+ -+.Lfound_word: -+ psub.b r9, r8, r9 -+ bfextu r8, r9, 24, 8 -+ retne r8 -+ bfextu r8, r9, 16, 8 -+ retne r8 -+ bfextu r8, r9, 8, 8 -+ retne r8 -+ retal r9 -+ -+ .size memcmp, . - memcmp -+ -+ .weak bcmp -+ bcmp = memcmp -Index: uClibc-0.9.28-avr32/libc/string/avr32/memcpy.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/memcpy.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,110 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+/* Don't use r12 as dst since we must return it unmodified */ -+#define dst r9 -+#define src r11 -+#define len r10 -+ -+ .text -+ .global memcpy -+ .type memcpy, @function -+ -+ .global __memcpy -+ .hidden __memcpy -+ .type __memcpy, @function -+memcpy: -+__memcpy: -+ pref src[0] -+ mov dst, r12 -+ -+ /* If we have less than 32 bytes, don't do anything fancy */ -+ cp.w len, 32 -+ brge .Lmore_than_31 -+ -+ sub len, 1 -+ retlt r12 -+1: ld.ub r8, src++ -+ st.b dst++, r8 -+ sub len, 1 -+ brge 1b -+ retal r12 -+ -+.Lmore_than_31: -+ pushm r0-r7, lr -+ -+ /* Check alignment */ -+ mov r8, src -+ andl r8, 31, COH -+ brne .Lunaligned_src -+ mov r8, dst -+ andl r8, 3, COH -+ brne .Lunaligned_dst -+ -+.Laligned_copy: -+ sub len, 32 -+ brlt .Lless_than_32 -+ -+1: /* Copy 32 bytes at a time */ -+ ldm src, r0-r7 -+ sub src, -32 -+ stm dst, r0-r7 -+ sub dst, -32 -+ sub len, 32 -+ brge 1b -+ -+.Lless_than_32: -+ /* Copy 16 more bytes if possible */ -+ sub len, -16 -+ brlt .Lless_than_16 -+ ldm src, r0-r3 -+ sub src, -16 -+ sub len, 16 -+ stm dst, r0-r3 -+ sub dst, -16 -+ -+.Lless_than_16: -+ /* Do the remaining as byte copies */ -+ neg len -+ add pc, pc, len << 2 -+ .rept 15 -+ ld.ub r0, src++ -+ st.b dst++, r0 -+ .endr -+ -+ popm r0-r7, pc -+ -+.Lunaligned_src: -+ /* Make src cacheline-aligned. r8 = (src & 31) */ -+ rsub r8, r8, 32 -+ sub len, r8 -+1: ld.ub r0, src++ -+ st.b dst++, r0 -+ sub r8, 1 -+ brne 1b -+ -+ /* If dst is word-aligned, we're ready to go */ -+ pref src[0] -+ mov r8, 3 -+ tst dst, r8 -+ breq .Laligned_copy -+ -+.Lunaligned_dst: -+ /* src is aligned, but dst is not. Expect bad performance */ -+ sub len, 4 -+ brlt 2f -+1: ld.w r0, src++ -+ st.w dst++, r0 -+ sub len, 4 -+ brge 1b -+ -+2: neg len -+ add pc, pc, len << 2 -+ .rept 3 -+ ld.ub r0, src++ -+ st.b dst++, r0 -+ .endr -+ -+ popm r0-r7, pc -+ .size memcpy, . - memcpy -Index: uClibc-0.9.28-avr32/libc/string/avr32/memmove.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/memmove.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+#define dst r12 -+#define src r11 -+#define len r10 -+ -+ .text -+ .global memmove -+ .type memmove, @function -+ -+ .global __memmove -+ .hidden __memmove -+ .type __memmove, @function -+memmove: -+__memmove: -+ cp.w src, dst -+ brge __memcpy -+ -+ add dst, len -+ add src, len -+ pref src[-1] -+ -+ /* -+ * The rest is basically the same as in memcpy.S except that -+ * the direction is reversed. -+ */ -+ cp.w len, 32 -+ brge .Lmore_than_31 -+ -+ sub len, 1 -+ retlt r12 -+1: ld.ub r8, --src -+ st.b --dst, r8 -+ sub len, 1 -+ brge 1b -+ retal r12 -+ -+.Lmore_than_31: -+ pushm r0-r7, lr -+ -+ /* Check alignment */ -+ mov r8, src -+ andl r8, 31, COH -+ brne .Lunaligned_src -+ mov r8, r12 -+ andl r8, 3, COH -+ brne .Lunaligned_dst -+ -+.Laligned_copy: -+ sub len, 32 -+ brlt .Lless_than_32 -+ -+1: /* Copy 32 bytes at a time */ -+ sub src, 32 -+ ldm src, r0-r7 -+ sub dst, 32 -+ sub len, 32 -+ stm dst, r0-r7 -+ brge 1b -+ -+.Lless_than_32: -+ /* Copy 16 more bytes if possible */ -+ sub len, -16 -+ brlt .Lless_than_16 -+ sub src, 16 -+ ldm src, r0-r3 -+ sub dst, 16 -+ sub len, 16 -+ stm dst, r0-r3 -+ -+.Lless_than_16: -+ /* Do the remaining as byte copies */ -+ sub len, -16 -+ breq 2f -+1: ld.ub r0, --src -+ st.b --dst, r0 -+ sub len, 1 -+ brne 1b -+ -+2: popm r0-r7, pc -+ -+.Lunaligned_src: -+ /* Make src cacheline-aligned. r8 = (src & 31) */ -+ sub len, r8 -+1: ld.ub r0, --src -+ st.b --dst, r0 -+ sub r8, 1 -+ brne 1b -+ -+ /* If dst is word-aligned, we're ready to go */ -+ pref src[-4] -+ mov r8, 3 -+ tst dst, r8 -+ breq .Laligned_copy -+ -+.Lunaligned_dst: -+ /* src is aligned, but dst is not. Expect bad performance */ -+ sub len, 4 -+ brlt 2f -+1: ld.w r0, --src -+ st.w --dst, r0 -+ sub len, 4 -+ brge 1b -+ -+2: neg len -+ add pc, pc, len << 2 -+ .rept 3 -+ ld.ub r0, --src -+ st.b --dst, r0 -+ .endr -+ -+ popm r0-r7, pc -Index: uClibc-0.9.28-avr32/libc/string/avr32/memset.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/memset.S 2006-10-20 10:42:15.000000000 +0200 -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway. -+ */ -+ -+#define s r12 -+#define c r11 -+#define n r10 -+ -+ .text -+ .global memset -+ .type memset, @function -+ -+ .global __memset -+ .hidden __memset -+ .type __memset, @function -+ -+ .align 1 -+memset: -+__memset: -+ cp.w n, 32 -+ mov r9, s -+ brge .Llarge_memset -+ -+ sub n, 1 -+ retlt s -+1: st.b s++, c -+ sub n, 1 -+ brge 1b -+ -+ retal r9 -+ -+.Llarge_memset: -+ mov r8, r11 -+ mov r11, 3 -+ bfins r8, r8, 8, 8 -+ bfins r8, r8, 16, 16 -+ tst s, r11 -+ breq 2f -+ -+1: st.b s++, r8 -+ sub n, 1 -+ tst s, r11 -+ brne 1b -+ -+2: mov r11, r9 -+ mov r9, r8 -+ sub n, 8 -+ -+3: st.d s++, r8 -+ sub n, 8 -+ brge 3b -+ -+ /* If we are done, n == -8 and we'll skip all st.b insns below */ -+ neg n -+ lsl n, 1 -+ add pc, n -+ .rept 7 -+ st.b s++, r8 -+ .endr -+ retal r11 -Index: uClibc-0.9.28-avr32/libc/string/avr32/strcat.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/strcat.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+#define s1 r9 -+#define s2 r11 -+ -+ .text -+ .global strcat -+ .type strcat, @function -+ .align 1 -+strcat: -+ mov s1, r12 -+ -+ /* Make sure s1 is word-aligned */ -+ mov r10, s1 -+ andl r10, 3, COH -+ breq 2f -+ -+ add pc, pc, r10 << 3 -+ sub r0, r0, 0 /* 4-byte nop */ -+ ld.ub r8, s1++ -+ sub r8, r8, 0 -+ breq 2f -+ ld.ub r8, s1++ -+ sub r8, r8, 0 -+ breq 3f -+ ld.ub r8, s1++ -+ sub r8, r8, 0 -+ breq 4f -+ -+ /* Find the end of the first string */ -+5: ld.w r8, s1++ -+ tnbz r8 -+ brne 5b -+ -+ sub s1, 4 -+ -+ bfextu r10, r8, 24, 8 -+ cp.w r10, 0 -+ breq 1f -+ sub s1, -1 -+ bfextu r10, r8, 16, 8 -+ cp.w r10, 0 -+ breq 2f -+ sub s1, -1 -+ bfextu r10, r8, 8, 8 -+ cp.w r10, 0 -+ breq 3f -+ sub s1, -1 -+ rjmp 4f -+ -+ /* Now, append s2 */ -+1: ld.ub r8, s2++ -+ st.b s1++, r8 -+ cp.w r8, 0 -+ reteq r12 -+2: ld.ub r8, s2++ -+ st.b s1++, r8 -+ cp.w r8, 0 -+ reteq r12 -+3: ld.ub r8, s2++ -+ st.b s1++, r8 -+ cp.w r8, 0 -+ reteq r12 -+4: ld.ub r8, s2++ -+ st.b s1++, r8 -+ cp.w r8, 0 -+ reteq r12 -+ -+ /* Copy one word at a time */ -+ ld.w r8, s2++ -+ tnbz r8 -+ breq 2f -+1: st.w r8, s2++ -+ ld.w r8, s2++ -+ tnbz r8 -+ brne 1b -+ -+ /* Copy the remaining bytes */ -+ bfextu r10, r8, 24, 8 -+ st.b s1++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ bfextu r10, r8, 16, 8 -+ st.b s1++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ bfextu r10, r8, 8, 8 -+ st.b s1++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ st.b s1++, r8 -+ retal r12 -+ .size strcat, . - strcat -Index: uClibc-0.9.28-avr32/libc/string/avr32/strcmp.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/strcmp.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway. -+ */ -+ -+#define s1 r12 -+#define s2 r11 -+#define len r10 -+ -+ .text -+ .global strcmp -+ .type strcmp, @function -+ .align 1 -+strcmp: -+ mov r8, 3 -+ tst s1, r8 -+ brne .Lunaligned_s1 -+ tst s2, r8 -+ brne .Lunaligned_s2 -+ -+1: ld.w r8, s1++ -+ ld.w r9, s2++ -+ cp.w r8, r9 -+ brne 2f -+ tnbz r8 -+ brne 1b -+ retal 0 -+ -+2: bfextu r12, r8, 24, 8 -+ bfextu r11, r9, 24, 8 -+ sub r12, r11 -+ retne r12 -+ cp.w r11, 0 -+ reteq 0 -+ bfextu r12, r8, 16, 8 -+ bfextu r11, r9, 16, 8 -+ sub r12, r11 -+ retne r12 -+ cp.w r11, 0 -+ reteq 0 -+ bfextu r12, r8, 8, 8 -+ bfextu r11, r9, 8, 8 -+ sub r12, r11 -+ retne r12 -+ cp.w r11, 0 -+ reteq 0 -+ bfextu r12, r8, 0, 8 -+ bfextu r11, r9, 0, 8 -+ sub r12, r11 -+ retal r12 -+ -+.Lunaligned_s1: -+3: tst s1, r8 -+ breq 4f -+ ld.ub r10, s1++ -+ ld.ub r9, s2++ -+ sub r10, r9 -+ retne r10 -+ cp.w r9, 0 -+ brne 3b -+ retal r10 -+ -+4: tst s2, r8 -+ breq 1b -+ -+.Lunaligned_s2: -+ /* -+ * s1 and s2 can't both be aligned, and unaligned word loads -+ * can trigger spurious exceptions if we cross a page boundary. -+ * Do it the slow way... -+ */ -+1: ld.ub r8, s1++ -+ ld.ub r9, s2++ -+ sub r8, r9 -+ retne r8 -+ cp.w r9, 0 -+ brne 1b -+ retal 0 -+ -+ .weak strcoll -+ strcoll = strcmp -Index: uClibc-0.9.28-avr32/libc/string/avr32/strcpy.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/strcpy.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,63 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ * -+ * To reduce the size, this one might simply call strncpy with len = -1. -+ */ -+ -+#define dst r9 -+#define src r11 -+ -+ .text -+ .global strcpy -+ .type strcpy, @function -+strcpy: -+ mov dst, r12 -+ -+ pref src[0] -+ -+ /* -+ * Check alignment. If src is aligned but dst isn't, we can't -+ * do much about it... -+ */ -+ mov r8, src -+ andl r8, 3 COH -+ brne .Lunaligned_src -+ -+.Laligned_copy: -+1: ld.w r8, src++ -+ tnbz r8 -+ breq 2f -+ st.w dst++, r8 -+ rjmp 1b -+ -+2: /* -+ * Ok, r8 now contains the terminating '\0'. Copy the -+ * remaining bytes individually. -+ */ -+ bfextu r10, r8, 24, 8 -+ st.b dst++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ bfextu r10, r8, 16, 8 -+ st.b dst++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ bfextu r10, r8, 8, 8 -+ st.b dst++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ st.b dst++, r8 -+ retal r12 -+ -+.Lunaligned_src: -+ /* Copy bytes until we're aligned */ -+ rsub r8, r8, 4 -+ add pc, pc, r8 << 3 -+ nop -+ nop -+ ld.ub r10, src++ -+ st.b dst++, r10 -+ cp.w r10, 0 -+ reteq r12 -+ -+ rjmp .Laligned_copy -Index: uClibc-0.9.28-avr32/libc/string/avr32/stringtest.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/stringtest.c 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,144 @@ -+ -+#include <stdio.h> -+#include <string.h> -+#include <time.h> -+#include <sys/mman.h> -+ -+#define BUF_SIZE (8 * 1024) -+ -+static char *buf1; -+static char *buf1_ref; -+static char *buf2; -+ -+extern void *optimized_memcpy(void *dest, void *src, size_t len); -+extern void *optimized_memmove(void *dest, void *src, size_t len); -+extern char *optimized_strcpy(char *dest, char *src); -+extern char *optimized_strncpy(char *dest, char *src, size_t len); -+ -+void dump_mismatch(char *buf, char *ref, size_t len) -+{ -+ int i, j; -+ -+ for (i = 0; i < len; i += 16) { -+ if (memcmp(buf + i, ref + i, 16) == 0) -+ continue; -+ -+ printf("%4x buf:", i); -+ for (j = i; j < (i + 16); j++) -+ printf(" %02x", buf[j]); -+ printf("\n ref:"); -+ for (j = i; j < (i + 16); j++) -+ printf(" %02x", ref[j]); -+ printf("\n"); -+ } -+} -+ -+static void test_memcpy(int src_offset, int dst_offset, int len) -+{ -+ clock_t start, old, new; -+ int i; -+ -+ memset(buf1, 0x55, BUF_SIZE); -+ memset(buf1_ref, 0x55, BUF_SIZE); -+ memset(buf2, 0xaa, BUF_SIZE); -+ -+ printf("Testing memcpy with offsets %d => %d and len %d...", -+ src_offset, dst_offset, len); -+ -+ start = clock(); -+ for (i = 0; i < 8192; i++) -+ optimized_memcpy(buf1 + dst_offset, buf2 + src_offset, len); -+ new = clock() - start; -+ start = clock(); -+ for ( i = 0; i < 8192; i++) -+ memcpy(buf1_ref + dst_offset, buf2 + src_offset, len); -+ old = clock() - start; -+ -+ if (memcmp(buf1, buf1_ref, BUF_SIZE) == 0) -+ printf("OK\n"); -+ else { -+ printf("FAILED\n"); -+ dump_mismatch(buf1, buf1_ref, BUF_SIZE); -+ } -+ printf("CPU time used: %d vs. %d\n", new, old); -+} -+ -+static void test_memmove(int src_offset, int dst_offset, int len) -+{ -+ clock_t start, old, new; -+ -+ memset(buf1, 0x55, BUF_SIZE); -+ memset(buf1_ref, 0x55, BUF_SIZE); -+ memset(buf2, 0xaa, BUF_SIZE); -+ -+ printf("Testing memmove with offsets %d => %d and len %d...", -+ src_offset, dst_offset, len); -+ -+ start = clock(); -+ optimized_memmove(buf1 + dst_offset, buf2 + src_offset, len); -+ new = clock() - start; -+ start = clock(); -+ memmove(buf1_ref + dst_offset, buf2 + src_offset, len); -+ old = clock() - start; -+ -+ if (memcmp(buf1, buf1_ref, BUF_SIZE) == 0) -+ printf("OK\n"); -+ else { -+ printf("FAILED\n"); -+ dump_mismatch(buf1, buf1_ref, BUF_SIZE); -+ } -+ printf("CPU time used: %d vs. %d\n", new, old); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ buf2 = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, -+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); -+ if (buf2 == MAP_FAILED) { -+ perror("Failed to allocate memory for buf2"); -+ return 1; -+ } -+ buf1 = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, -+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); -+ if (buf1 == MAP_FAILED) { -+ perror("Failed to allocate memory for buf1"); -+ return 1; -+ } -+ buf1_ref = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, -+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); -+ if (buf1_ref == MAP_FAILED) { -+ perror("Failed to allocate memory for buf1_ref"); -+ return 1; -+ } -+ printf("\n === MEMCPY ===\n\n"); -+ -+ test_memcpy(0, 0, BUF_SIZE - 32); -+ test_memcpy(0, 0, 1); -+ test_memcpy(0, 0, 31); -+ test_memcpy(0, 0, 32); -+ test_memcpy(0, 0, 127); -+ test_memcpy(0, 0, 128); -+ test_memcpy(4, 4, BUF_SIZE - 32 - 4); -+ test_memcpy(1, 1, BUF_SIZE - 32 - 1); -+ test_memcpy(1, 1, 126); -+ test_memcpy(0, 3, 128); -+ test_memcpy(1, 4, 128); -+ test_memcpy(0, 0, 0); -+ -+ printf("\n === MEMMOVE ===\n\n"); -+ -+ test_memmove(0, 0, BUF_SIZE - 32); -+ test_memmove(0, 0, 1); -+ test_memmove(0, 0, 31); -+ test_memmove(0, 0, 32); -+ test_memmove(0, 0, BUF_SIZE - 33); -+ test_memmove(0, 0, 128); -+ test_memmove(4, 4, BUF_SIZE - 32 - 4); -+ test_memmove(1, 1, BUF_SIZE - 32 - 1); -+ test_memmove(1, 1, BUF_SIZE - 130); -+ test_memmove(0, 3, BUF_SIZE - 128); -+ test_memmove(1, 4, BUF_SIZE - 128); -+ test_memmove(0, 0, 0); -+ -+ return 0; -+} -Index: uClibc-0.9.28-avr32/libc/string/avr32/strlen.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/strlen.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+#define str r12 -+ -+ .text -+ .global strlen -+ .type strlen, @function -+strlen: -+ mov r11, r12 -+ -+ mov r9, str -+ andl r9, 3, COH -+ brne .Lunaligned_str -+ -+1: ld.w r8, str++ -+ tnbz r8 -+ brne 1b -+ -+ sub r12, r11 -+ bfextu r9, r8, 24, 8 -+ cp.w r9, 0 -+ subeq r12, 4 -+ reteq r12 -+ bfextu r9, r8, 16, 8 -+ cp.w r9, 0 -+ subeq r12, 3 -+ reteq r12 -+ bfextu r9, r8, 8, 8 -+ cp.w r9, 0 -+ subeq r12, 2 -+ reteq r12 -+ sub r12, 1 -+ retal r12 -+ -+.Lunaligned_str: -+ add pc, pc, r9 << 3 -+ sub r0, r0, 0 /* 4-byte nop */ -+ ld.ub r8, str++ -+ sub r8, r8, 0 -+ breq 1f -+ ld.ub r8, str++ -+ sub r8, r8, 0 -+ breq 1f -+ ld.ub r8, str++ -+ sub r8, r8, 0 -+ brne 1b -+ -+1: sub r12, 1 -+ sub r12, r11 -+ retal r12 -Index: uClibc-0.9.28-avr32/libc/string/avr32/strncpy.S -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/strncpy.S 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,77 @@ -+/* -+ * Copyright (C) 2004 Atmel Norway -+ */ -+ -+#define dst r9 -+#define src r11 -+ -+ .text -+ .global strcpy -+ .type strncpy, @function -+strncpy: -+ mov dst, r12 -+ -+ pref src[0] -+ mov dst, r12 -+ -+ /* -+ * Check alignment. If src is aligned but dst isn't, we can't -+ * do much about it... -+ */ -+ mov r8, src -+ andl r8, 3 COH -+ brne .Lunaligned_src -+ -+.Laligned_copy: -+ sub r10, 4 -+ brlt 3f -+1: ld.w r8, src++ -+ tnbz r8 -+ breq 2f -+ st.w dst++, r8 -+ sub r10, 4 -+ brne 1b -+ -+3: sub r10, -4 -+ reteq r12 -+ -+ /* This is safe as long as src is word-aligned and r10 > 0 */ -+ ld.w r8, src++ -+ -+2: /* -+ * Ok, r8 now contains the terminating '\0'. Copy the -+ * remaining bytes individually. -+ */ -+ bfextu r11, r8, 24, 8 -+ st.b dst++, r11 -+ cp.w r11, 0 -+ reteq r12 -+ sub r10, 1 -+ reteq r12 -+ bfextu r11, r8, 16, 8 -+ st.b dst++, r11 -+ cp.w r11, 0 -+ reteq r12 -+ sub r10, 1 -+ reteq r12 -+ bfextu r11, r8, 8, 8 -+ st.b dst++, r11 -+ cp.w r11, 0 -+ reteq r12 -+ sub r10, 1 -+ reteq r12 -+ st.b dst++, r8 -+ retal r12 -+ -+.Lunaligned_src: -+ /* Copy bytes until we're aligned */ -+ min r8, r8, r10 -+ sub r10, r8 -+ sub r8, 1 -+ retlt r12 -+1: ld.ub r10, src++ -+ st.b dst++, r10 -+ sub r8, 1 -+ brge 1b -+ -+ rjmp .Laligned_copy -Index: uClibc-0.9.28-avr32/libc/string/avr32/test_memcpy.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ uClibc-0.9.28-avr32/libc/string/avr32/test_memcpy.c 2006-10-19 15:05:52.000000000 +0200 -@@ -0,0 +1,66 @@ -+ -+#include <stdio.h> -+#include <string.h> -+ -+#define BUF_SIZE 32768 -+ -+static char buf1[BUF_SIZE] __attribute__((aligned(32))); -+static char buf1_ref[BUF_SIZE] __attribute__((aligned(32))); -+static char buf2[BUF_SIZE] __attribute__((aligned(32))); -+ -+extern void *new_memcpy(void *dest, void *src, size_t len); -+ -+void dump_mismatch(char *buf, char *ref, size_t len) -+{ -+ int i, j; -+ -+ for (i = 0; i < len; i += 16) { -+ if (memcmp(buf + i, ref + i, 16) == 0) -+ continue; -+ -+ printf("% 4x buf:", i); -+ for (j = i; j < (i + 16); j++) -+ printf(" %02x", buf[j]); -+ printf("\n ref:"); -+ for (j = i; j < (i + 16); j++) -+ printf(" %02x", ref[j]); -+ printf("\n"); -+ } -+} -+ -+void test(int src_offset, int dst_offset, int len) -+{ -+ memset(buf1, 0x55, sizeof(buf1)); -+ memset(buf1_ref, 0x55, sizeof(buf1_ref)); -+ memset(buf2, 0xaa, sizeof(buf2)); -+ -+ printf("Testing with offsets %d => %d and len %d...", -+ src_offset, dst_offset, len); -+ -+ new_memcpy(buf1 + dst_offset, buf2 + src_offset, len); -+ memcpy(buf1_ref + dst_offset, buf2 + src_offset, len); -+ -+ if (memcmp(buf1, buf1_ref, sizeof(buf1)) == 0) -+ printf("OK\n"); -+ else { -+ printf("FAILED\n"); -+ dump_mismatch(buf1, buf1_ref, sizeof(buf1)); -+ } -+} -+ -+int main(int argc, char *argv[]) -+{ -+ test(0, 0, BUF_SIZE); -+ test(0, 0, 1); -+ test(0, 0, 31); -+ test(0, 0, 32); -+ test(0, 0, 127); -+ test(0, 0, 128); -+ test(4, 4, BUF_SIZE - 4); -+ test(1, 1, BUF_SIZE - 1); -+ test(1, 1, 126); -+ test(0, 3, 128); -+ test(1, 4, 128); -+ -+ return 0; -+} |