diff options
Diffstat (limited to 'toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch')
-rw-r--r-- | toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch | 1139 |
1 files changed, 1139 insertions, 0 deletions
diff --git a/toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch b/toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch new file mode 100644 index 000000000..8518ccf66 --- /dev/null +++ b/toolchain/uClibc/patches-0.9.28/402-avr32-string-ops.patch @@ -0,0 +1,1139 @@ +Subject: [PATCH] AVR32-optimized string operations + +Add hand-optimized AVR32-specific string operations. Some of them +need a bit more testing, though. + +--- + + libc/string/avr32/Makefile | 40 +++++++++++ + libc/string/avr32/bcopy.S | 15 ++++ + libc/string/avr32/bzero.S | 12 +++ + libc/string/avr32/memchr.S | 62 +++++++++++++++++ + libc/string/avr32/memcmp.S | 50 +++++++++++++ + libc/string/avr32/memcpy.S | 110 ++++++++++++++++++++++++++++++ + libc/string/avr32/memmove.S | 114 +++++++++++++++++++++++++++++++ + libc/string/avr32/memset.S | 60 ++++++++++++++++ + libc/string/avr32/strcat.S | 95 ++++++++++++++++++++++++++ + libc/string/avr32/strcmp.S | 80 ++++++++++++++++++++++ + libc/string/avr32/strcpy.S | 63 +++++++++++++++++ + libc/string/avr32/stringtest.c | 144 ++++++++++++++++++++++++++++++++++++++++ + libc/string/avr32/strlen.S | 52 ++++++++++++++ + libc/string/avr32/strncpy.S | 77 +++++++++++++++++++++ + libc/string/avr32/test_memcpy.c | 66 ++++++++++++++++++ + 15 files changed, 1040 insertions(+) + +Index: uClibc-0.9.28-avr32/libc/string/avr32/bcopy.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/bcopy.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,15 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++ .text ++ .global bcopy ++ .type bcopy, @function ++ .align 1 ++bcopy: ++ /* Swap the first two arguments */ ++ eor r11, r12 ++ eor r12, r11 ++ eor r11, r12 ++ rjmp __memmove ++ .size bcopy, . - bcopy +Index: uClibc-0.9.28-avr32/libc/string/avr32/bzero.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/bzero.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,12 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++ .text ++ .global bzero ++ .type bzero, @function ++ .align 1 ++bzero: ++ mov r10, r11 ++ mov r11, 0 ++ rjmp __memset +Index: uClibc-0.9.28-avr32/libc/string/avr32/Makefile +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/Makefile 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,40 @@ ++# Makefile for uClibc ++# ++# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org> ++# ++# This program is free software; you can redistribute it and/or modify it under ++# the terms of the GNU Library General Public License as published by the Free ++# Software Foundation; either version 2 of the License, or (at your option) any ++# later version. ++# ++# This program is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more ++# details. ++# ++# You should have received a copy of the GNU Library General Public License ++# along with this program; if not, write to the Free Software Foundation, Inc., ++# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ ++TOPDIR=../../../ ++include $(TOPDIR)Rules.mak ++ ++SSRC := bcopy.S bzero.S memcmp.S memcpy.S memmove.S ++SSRC += memset.S strcmp.S strlen.S ++# memchr.S, strcat.S, strcpy.S, strncpy.S is broken ++SOBJS := $(patsubst %.S,%.o, $(SSRC)) ++OBJS := $(SOBJS) ++ ++OBJ_LIST:= ../../obj.string.$(TARGET_ARCH) ++ ++all: $(OBJ_LIST) ++ ++$(OBJ_LIST): $(OBJS) ++ echo $(addprefix string/$(TARGET_ARCH)/, $(OBJS)) > $@ ++ ++$(SOBJS): %.o: %.S ++ $(CC) $(ASFLAGS) -c $< -o $@ ++ $(STRIPTOOL) -x -R .note -R .comment $@ ++ ++clean: ++ $(RM) *.[oa] *~ core +Index: uClibc-0.9.28-avr32/libc/string/avr32/memchr.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/memchr.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++#define str r12 ++#define chr r11 ++#define len r10 ++ ++ .text ++ .global memchr ++ .type memchr, @function ++memchr: ++ or chr, chr, chr << 8 ++ or chr, chr, chr << 16 ++ ++ mov r9, str ++ andl r9, 3, COH ++ brne .Lunaligned_str ++ ++1: sub len, 4 ++ brlt 2f ++ ld.w r8, str++ ++ psub.b r9, r8, r11 ++ tnbz r9 ++ brne 1b ++ ++ sub str, 4 ++ bfextu r9, r8, 24, 8 ++ cp.b r9, r11 ++ reteq str ++ sub str, -1 ++ bfextu r9, r8, 16, 8 ++ cp.b r9, r11 ++ reteq str ++ sub str, -1 ++ bfextu r9, r8, 8, 8 ++ cp.b r9, r11 ++ reteq str ++ sub str, -1 ++ retal str ++ ++2: sub len, -4 ++ reteq 0 ++ ++3: ld.ub r8, str++ ++ cp.w r8, 0 ++ reteq str ++ sub len, 1 ++ brne 3b ++ ++ retal 0 ++ ++.Lunaligned_str: ++1: sub len, 1 ++ retlt 0 ++ ld.ub r8, str++ ++ cp.b r8, r11 ++ reteq str ++ sub r9, 1 ++ brge 1b ++ ++ rjmp .Laligned_search +Index: uClibc-0.9.28-avr32/libc/string/avr32/memcmp.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/memcmp.S 2006-10-20 10:42:09.000000000 +0200 +@@ -0,0 +1,50 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway. ++ */ ++ ++#define s1 r12 ++#define s2 r11 ++#define len r10 ++ ++ .text ++ .global memcmp ++ .type memcmp, @function ++ .align 1 ++memcmp: ++ sub len, 4 ++ brlt .Lless_than_4 ++ ++1: ld.w r8, s1++ ++ ld.w r9, s2++ ++ cp.w r8, r9 ++ brne .Lfound_word ++ sub len, 4 ++ brge 1b ++ ++.Lless_than_4: ++ sub len, -4 ++ reteq 0 ++ ++1: ld.ub r8, s1++ ++ ld.ub r9, s2++ ++ sub r8, r9 ++ retne r8 ++ sub len, 1 ++ brgt 1b ++ ++ retal 0 ++ ++.Lfound_word: ++ psub.b r9, r8, r9 ++ bfextu r8, r9, 24, 8 ++ retne r8 ++ bfextu r8, r9, 16, 8 ++ retne r8 ++ bfextu r8, r9, 8, 8 ++ retne r8 ++ retal r9 ++ ++ .size memcmp, . - memcmp ++ ++ .weak bcmp ++ bcmp = memcmp +Index: uClibc-0.9.28-avr32/libc/string/avr32/memcpy.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/memcpy.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,110 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++/* Don't use r12 as dst since we must return it unmodified */ ++#define dst r9 ++#define src r11 ++#define len r10 ++ ++ .text ++ .global memcpy ++ .type memcpy, @function ++ ++ .global __memcpy ++ .hidden __memcpy ++ .type __memcpy, @function ++memcpy: ++__memcpy: ++ pref src[0] ++ mov dst, r12 ++ ++ /* If we have less than 32 bytes, don't do anything fancy */ ++ cp.w len, 32 ++ brge .Lmore_than_31 ++ ++ sub len, 1 ++ retlt r12 ++1: ld.ub r8, src++ ++ st.b dst++, r8 ++ sub len, 1 ++ brge 1b ++ retal r12 ++ ++.Lmore_than_31: ++ pushm r0-r7, lr ++ ++ /* Check alignment */ ++ mov r8, src ++ andl r8, 31, COH ++ brne .Lunaligned_src ++ mov r8, dst ++ andl r8, 3, COH ++ brne .Lunaligned_dst ++ ++.Laligned_copy: ++ sub len, 32 ++ brlt .Lless_than_32 ++ ++1: /* Copy 32 bytes at a time */ ++ ldm src, r0-r7 ++ sub src, -32 ++ stm dst, r0-r7 ++ sub dst, -32 ++ sub len, 32 ++ brge 1b ++ ++.Lless_than_32: ++ /* Copy 16 more bytes if possible */ ++ sub len, -16 ++ brlt .Lless_than_16 ++ ldm src, r0-r3 ++ sub src, -16 ++ sub len, 16 ++ stm dst, r0-r3 ++ sub dst, -16 ++ ++.Lless_than_16: ++ /* Do the remaining as byte copies */ ++ neg len ++ add pc, pc, len << 2 ++ .rept 15 ++ ld.ub r0, src++ ++ st.b dst++, r0 ++ .endr ++ ++ popm r0-r7, pc ++ ++.Lunaligned_src: ++ /* Make src cacheline-aligned. r8 = (src & 31) */ ++ rsub r8, r8, 32 ++ sub len, r8 ++1: ld.ub r0, src++ ++ st.b dst++, r0 ++ sub r8, 1 ++ brne 1b ++ ++ /* If dst is word-aligned, we're ready to go */ ++ pref src[0] ++ mov r8, 3 ++ tst dst, r8 ++ breq .Laligned_copy ++ ++.Lunaligned_dst: ++ /* src is aligned, but dst is not. Expect bad performance */ ++ sub len, 4 ++ brlt 2f ++1: ld.w r0, src++ ++ st.w dst++, r0 ++ sub len, 4 ++ brge 1b ++ ++2: neg len ++ add pc, pc, len << 2 ++ .rept 3 ++ ld.ub r0, src++ ++ st.b dst++, r0 ++ .endr ++ ++ popm r0-r7, pc ++ .size memcpy, . - memcpy +Index: uClibc-0.9.28-avr32/libc/string/avr32/memmove.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/memmove.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,114 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++#define dst r12 ++#define src r11 ++#define len r10 ++ ++ .text ++ .global memmove ++ .type memmove, @function ++ ++ .global __memmove ++ .hidden __memmove ++ .type __memmove, @function ++memmove: ++__memmove: ++ cp.w src, dst ++ brge __memcpy ++ ++ add dst, len ++ add src, len ++ pref src[-1] ++ ++ /* ++ * The rest is basically the same as in memcpy.S except that ++ * the direction is reversed. ++ */ ++ cp.w len, 32 ++ brge .Lmore_than_31 ++ ++ sub len, 1 ++ retlt r12 ++1: ld.ub r8, --src ++ st.b --dst, r8 ++ sub len, 1 ++ brge 1b ++ retal r12 ++ ++.Lmore_than_31: ++ pushm r0-r7, lr ++ ++ /* Check alignment */ ++ mov r8, src ++ andl r8, 31, COH ++ brne .Lunaligned_src ++ mov r8, r12 ++ andl r8, 3, COH ++ brne .Lunaligned_dst ++ ++.Laligned_copy: ++ sub len, 32 ++ brlt .Lless_than_32 ++ ++1: /* Copy 32 bytes at a time */ ++ sub src, 32 ++ ldm src, r0-r7 ++ sub dst, 32 ++ sub len, 32 ++ stm dst, r0-r7 ++ brge 1b ++ ++.Lless_than_32: ++ /* Copy 16 more bytes if possible */ ++ sub len, -16 ++ brlt .Lless_than_16 ++ sub src, 16 ++ ldm src, r0-r3 ++ sub dst, 16 ++ sub len, 16 ++ stm dst, r0-r3 ++ ++.Lless_than_16: ++ /* Do the remaining as byte copies */ ++ sub len, -16 ++ breq 2f ++1: ld.ub r0, --src ++ st.b --dst, r0 ++ sub len, 1 ++ brne 1b ++ ++2: popm r0-r7, pc ++ ++.Lunaligned_src: ++ /* Make src cacheline-aligned. r8 = (src & 31) */ ++ sub len, r8 ++1: ld.ub r0, --src ++ st.b --dst, r0 ++ sub r8, 1 ++ brne 1b ++ ++ /* If dst is word-aligned, we're ready to go */ ++ pref src[-4] ++ mov r8, 3 ++ tst dst, r8 ++ breq .Laligned_copy ++ ++.Lunaligned_dst: ++ /* src is aligned, but dst is not. Expect bad performance */ ++ sub len, 4 ++ brlt 2f ++1: ld.w r0, --src ++ st.w --dst, r0 ++ sub len, 4 ++ brge 1b ++ ++2: neg len ++ add pc, pc, len << 2 ++ .rept 3 ++ ld.ub r0, --src ++ st.b --dst, r0 ++ .endr ++ ++ popm r0-r7, pc +Index: uClibc-0.9.28-avr32/libc/string/avr32/memset.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/memset.S 2006-10-20 10:42:15.000000000 +0200 +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway. ++ */ ++ ++#define s r12 ++#define c r11 ++#define n r10 ++ ++ .text ++ .global memset ++ .type memset, @function ++ ++ .global __memset ++ .hidden __memset ++ .type __memset, @function ++ ++ .align 1 ++memset: ++__memset: ++ cp.w n, 32 ++ mov r9, s ++ brge .Llarge_memset ++ ++ sub n, 1 ++ retlt s ++1: st.b s++, c ++ sub n, 1 ++ brge 1b ++ ++ retal r9 ++ ++.Llarge_memset: ++ mov r8, r11 ++ mov r11, 3 ++ bfins r8, r8, 8, 8 ++ bfins r8, r8, 16, 16 ++ tst s, r11 ++ breq 2f ++ ++1: st.b s++, r8 ++ sub n, 1 ++ tst s, r11 ++ brne 1b ++ ++2: mov r11, r9 ++ mov r9, r8 ++ sub n, 8 ++ ++3: st.d s++, r8 ++ sub n, 8 ++ brge 3b ++ ++ /* If we are done, n == -8 and we'll skip all st.b insns below */ ++ neg n ++ lsl n, 1 ++ add pc, n ++ .rept 7 ++ st.b s++, r8 ++ .endr ++ retal r11 +Index: uClibc-0.9.28-avr32/libc/string/avr32/strcat.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/strcat.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++#define s1 r9 ++#define s2 r11 ++ ++ .text ++ .global strcat ++ .type strcat, @function ++ .align 1 ++strcat: ++ mov s1, r12 ++ ++ /* Make sure s1 is word-aligned */ ++ mov r10, s1 ++ andl r10, 3, COH ++ breq 2f ++ ++ add pc, pc, r10 << 3 ++ sub r0, r0, 0 /* 4-byte nop */ ++ ld.ub r8, s1++ ++ sub r8, r8, 0 ++ breq 2f ++ ld.ub r8, s1++ ++ sub r8, r8, 0 ++ breq 3f ++ ld.ub r8, s1++ ++ sub r8, r8, 0 ++ breq 4f ++ ++ /* Find the end of the first string */ ++5: ld.w r8, s1++ ++ tnbz r8 ++ brne 5b ++ ++ sub s1, 4 ++ ++ bfextu r10, r8, 24, 8 ++ cp.w r10, 0 ++ breq 1f ++ sub s1, -1 ++ bfextu r10, r8, 16, 8 ++ cp.w r10, 0 ++ breq 2f ++ sub s1, -1 ++ bfextu r10, r8, 8, 8 ++ cp.w r10, 0 ++ breq 3f ++ sub s1, -1 ++ rjmp 4f ++ ++ /* Now, append s2 */ ++1: ld.ub r8, s2++ ++ st.b s1++, r8 ++ cp.w r8, 0 ++ reteq r12 ++2: ld.ub r8, s2++ ++ st.b s1++, r8 ++ cp.w r8, 0 ++ reteq r12 ++3: ld.ub r8, s2++ ++ st.b s1++, r8 ++ cp.w r8, 0 ++ reteq r12 ++4: ld.ub r8, s2++ ++ st.b s1++, r8 ++ cp.w r8, 0 ++ reteq r12 ++ ++ /* Copy one word at a time */ ++ ld.w r8, s2++ ++ tnbz r8 ++ breq 2f ++1: st.w r8, s2++ ++ ld.w r8, s2++ ++ tnbz r8 ++ brne 1b ++ ++ /* Copy the remaining bytes */ ++ bfextu r10, r8, 24, 8 ++ st.b s1++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ bfextu r10, r8, 16, 8 ++ st.b s1++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ bfextu r10, r8, 8, 8 ++ st.b s1++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ st.b s1++, r8 ++ retal r12 ++ .size strcat, . - strcat +Index: uClibc-0.9.28-avr32/libc/string/avr32/strcmp.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/strcmp.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway. ++ */ ++ ++#define s1 r12 ++#define s2 r11 ++#define len r10 ++ ++ .text ++ .global strcmp ++ .type strcmp, @function ++ .align 1 ++strcmp: ++ mov r8, 3 ++ tst s1, r8 ++ brne .Lunaligned_s1 ++ tst s2, r8 ++ brne .Lunaligned_s2 ++ ++1: ld.w r8, s1++ ++ ld.w r9, s2++ ++ cp.w r8, r9 ++ brne 2f ++ tnbz r8 ++ brne 1b ++ retal 0 ++ ++2: bfextu r12, r8, 24, 8 ++ bfextu r11, r9, 24, 8 ++ sub r12, r11 ++ retne r12 ++ cp.w r11, 0 ++ reteq 0 ++ bfextu r12, r8, 16, 8 ++ bfextu r11, r9, 16, 8 ++ sub r12, r11 ++ retne r12 ++ cp.w r11, 0 ++ reteq 0 ++ bfextu r12, r8, 8, 8 ++ bfextu r11, r9, 8, 8 ++ sub r12, r11 ++ retne r12 ++ cp.w r11, 0 ++ reteq 0 ++ bfextu r12, r8, 0, 8 ++ bfextu r11, r9, 0, 8 ++ sub r12, r11 ++ retal r12 ++ ++.Lunaligned_s1: ++3: tst s1, r8 ++ breq 4f ++ ld.ub r10, s1++ ++ ld.ub r9, s2++ ++ sub r10, r9 ++ retne r10 ++ cp.w r9, 0 ++ brne 3b ++ retal r10 ++ ++4: tst s2, r8 ++ breq 1b ++ ++.Lunaligned_s2: ++ /* ++ * s1 and s2 can't both be aligned, and unaligned word loads ++ * can trigger spurious exceptions if we cross a page boundary. ++ * Do it the slow way... ++ */ ++1: ld.ub r8, s1++ ++ ld.ub r9, s2++ ++ sub r8, r9 ++ retne r8 ++ cp.w r9, 0 ++ brne 1b ++ retal 0 ++ ++ .weak strcoll ++ strcoll = strcmp +Index: uClibc-0.9.28-avr32/libc/string/avr32/strcpy.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/strcpy.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,63 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ * ++ * To reduce the size, this one might simply call strncpy with len = -1. ++ */ ++ ++#define dst r9 ++#define src r11 ++ ++ .text ++ .global strcpy ++ .type strcpy, @function ++strcpy: ++ mov dst, r12 ++ ++ pref src[0] ++ ++ /* ++ * Check alignment. If src is aligned but dst isn't, we can't ++ * do much about it... ++ */ ++ mov r8, src ++ andl r8, 3 COH ++ brne .Lunaligned_src ++ ++.Laligned_copy: ++1: ld.w r8, src++ ++ tnbz r8 ++ breq 2f ++ st.w dst++, r8 ++ rjmp 1b ++ ++2: /* ++ * Ok, r8 now contains the terminating '\0'. Copy the ++ * remaining bytes individually. ++ */ ++ bfextu r10, r8, 24, 8 ++ st.b dst++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ bfextu r10, r8, 16, 8 ++ st.b dst++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ bfextu r10, r8, 8, 8 ++ st.b dst++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ st.b dst++, r8 ++ retal r12 ++ ++.Lunaligned_src: ++ /* Copy bytes until we're aligned */ ++ rsub r8, r8, 4 ++ add pc, pc, r8 << 3 ++ nop ++ nop ++ ld.ub r10, src++ ++ st.b dst++, r10 ++ cp.w r10, 0 ++ reteq r12 ++ ++ rjmp .Laligned_copy +Index: uClibc-0.9.28-avr32/libc/string/avr32/stringtest.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/stringtest.c 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,144 @@ ++ ++#include <stdio.h> ++#include <string.h> ++#include <time.h> ++#include <sys/mman.h> ++ ++#define BUF_SIZE (8 * 1024) ++ ++static char *buf1; ++static char *buf1_ref; ++static char *buf2; ++ ++extern void *optimized_memcpy(void *dest, void *src, size_t len); ++extern void *optimized_memmove(void *dest, void *src, size_t len); ++extern char *optimized_strcpy(char *dest, char *src); ++extern char *optimized_strncpy(char *dest, char *src, size_t len); ++ ++void dump_mismatch(char *buf, char *ref, size_t len) ++{ ++ int i, j; ++ ++ for (i = 0; i < len; i += 16) { ++ if (memcmp(buf + i, ref + i, 16) == 0) ++ continue; ++ ++ printf("%4x buf:", i); ++ for (j = i; j < (i + 16); j++) ++ printf(" %02x", buf[j]); ++ printf("\n ref:"); ++ for (j = i; j < (i + 16); j++) ++ printf(" %02x", ref[j]); ++ printf("\n"); ++ } ++} ++ ++static void test_memcpy(int src_offset, int dst_offset, int len) ++{ ++ clock_t start, old, new; ++ int i; ++ ++ memset(buf1, 0x55, BUF_SIZE); ++ memset(buf1_ref, 0x55, BUF_SIZE); ++ memset(buf2, 0xaa, BUF_SIZE); ++ ++ printf("Testing memcpy with offsets %d => %d and len %d...", ++ src_offset, dst_offset, len); ++ ++ start = clock(); ++ for (i = 0; i < 8192; i++) ++ optimized_memcpy(buf1 + dst_offset, buf2 + src_offset, len); ++ new = clock() - start; ++ start = clock(); ++ for ( i = 0; i < 8192; i++) ++ memcpy(buf1_ref + dst_offset, buf2 + src_offset, len); ++ old = clock() - start; ++ ++ if (memcmp(buf1, buf1_ref, BUF_SIZE) == 0) ++ printf("OK\n"); ++ else { ++ printf("FAILED\n"); ++ dump_mismatch(buf1, buf1_ref, BUF_SIZE); ++ } ++ printf("CPU time used: %d vs. %d\n", new, old); ++} ++ ++static void test_memmove(int src_offset, int dst_offset, int len) ++{ ++ clock_t start, old, new; ++ ++ memset(buf1, 0x55, BUF_SIZE); ++ memset(buf1_ref, 0x55, BUF_SIZE); ++ memset(buf2, 0xaa, BUF_SIZE); ++ ++ printf("Testing memmove with offsets %d => %d and len %d...", ++ src_offset, dst_offset, len); ++ ++ start = clock(); ++ optimized_memmove(buf1 + dst_offset, buf2 + src_offset, len); ++ new = clock() - start; ++ start = clock(); ++ memmove(buf1_ref + dst_offset, buf2 + src_offset, len); ++ old = clock() - start; ++ ++ if (memcmp(buf1, buf1_ref, BUF_SIZE) == 0) ++ printf("OK\n"); ++ else { ++ printf("FAILED\n"); ++ dump_mismatch(buf1, buf1_ref, BUF_SIZE); ++ } ++ printf("CPU time used: %d vs. %d\n", new, old); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ buf2 = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); ++ if (buf2 == MAP_FAILED) { ++ perror("Failed to allocate memory for buf2"); ++ return 1; ++ } ++ buf1 = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); ++ if (buf1 == MAP_FAILED) { ++ perror("Failed to allocate memory for buf1"); ++ return 1; ++ } ++ buf1_ref = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); ++ if (buf1_ref == MAP_FAILED) { ++ perror("Failed to allocate memory for buf1_ref"); ++ return 1; ++ } ++ printf("\n === MEMCPY ===\n\n"); ++ ++ test_memcpy(0, 0, BUF_SIZE - 32); ++ test_memcpy(0, 0, 1); ++ test_memcpy(0, 0, 31); ++ test_memcpy(0, 0, 32); ++ test_memcpy(0, 0, 127); ++ test_memcpy(0, 0, 128); ++ test_memcpy(4, 4, BUF_SIZE - 32 - 4); ++ test_memcpy(1, 1, BUF_SIZE - 32 - 1); ++ test_memcpy(1, 1, 126); ++ test_memcpy(0, 3, 128); ++ test_memcpy(1, 4, 128); ++ test_memcpy(0, 0, 0); ++ ++ printf("\n === MEMMOVE ===\n\n"); ++ ++ test_memmove(0, 0, BUF_SIZE - 32); ++ test_memmove(0, 0, 1); ++ test_memmove(0, 0, 31); ++ test_memmove(0, 0, 32); ++ test_memmove(0, 0, BUF_SIZE - 33); ++ test_memmove(0, 0, 128); ++ test_memmove(4, 4, BUF_SIZE - 32 - 4); ++ test_memmove(1, 1, BUF_SIZE - 32 - 1); ++ test_memmove(1, 1, BUF_SIZE - 130); ++ test_memmove(0, 3, BUF_SIZE - 128); ++ test_memmove(1, 4, BUF_SIZE - 128); ++ test_memmove(0, 0, 0); ++ ++ return 0; ++} +Index: uClibc-0.9.28-avr32/libc/string/avr32/strlen.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/strlen.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++#define str r12 ++ ++ .text ++ .global strlen ++ .type strlen, @function ++strlen: ++ mov r11, r12 ++ ++ mov r9, str ++ andl r9, 3, COH ++ brne .Lunaligned_str ++ ++1: ld.w r8, str++ ++ tnbz r8 ++ brne 1b ++ ++ sub r12, r11 ++ bfextu r9, r8, 24, 8 ++ cp.w r9, 0 ++ subeq r12, 4 ++ reteq r12 ++ bfextu r9, r8, 16, 8 ++ cp.w r9, 0 ++ subeq r12, 3 ++ reteq r12 ++ bfextu r9, r8, 8, 8 ++ cp.w r9, 0 ++ subeq r12, 2 ++ reteq r12 ++ sub r12, 1 ++ retal r12 ++ ++.Lunaligned_str: ++ add pc, pc, r9 << 3 ++ sub r0, r0, 0 /* 4-byte nop */ ++ ld.ub r8, str++ ++ sub r8, r8, 0 ++ breq 1f ++ ld.ub r8, str++ ++ sub r8, r8, 0 ++ breq 1f ++ ld.ub r8, str++ ++ sub r8, r8, 0 ++ brne 1b ++ ++1: sub r12, 1 ++ sub r12, r11 ++ retal r12 +Index: uClibc-0.9.28-avr32/libc/string/avr32/strncpy.S +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/strncpy.S 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (C) 2004 Atmel Norway ++ */ ++ ++#define dst r9 ++#define src r11 ++ ++ .text ++ .global strcpy ++ .type strncpy, @function ++strncpy: ++ mov dst, r12 ++ ++ pref src[0] ++ mov dst, r12 ++ ++ /* ++ * Check alignment. If src is aligned but dst isn't, we can't ++ * do much about it... ++ */ ++ mov r8, src ++ andl r8, 3 COH ++ brne .Lunaligned_src ++ ++.Laligned_copy: ++ sub r10, 4 ++ brlt 3f ++1: ld.w r8, src++ ++ tnbz r8 ++ breq 2f ++ st.w dst++, r8 ++ sub r10, 4 ++ brne 1b ++ ++3: sub r10, -4 ++ reteq r12 ++ ++ /* This is safe as long as src is word-aligned and r10 > 0 */ ++ ld.w r8, src++ ++ ++2: /* ++ * Ok, r8 now contains the terminating '\0'. Copy the ++ * remaining bytes individually. ++ */ ++ bfextu r11, r8, 24, 8 ++ st.b dst++, r11 ++ cp.w r11, 0 ++ reteq r12 ++ sub r10, 1 ++ reteq r12 ++ bfextu r11, r8, 16, 8 ++ st.b dst++, r11 ++ cp.w r11, 0 ++ reteq r12 ++ sub r10, 1 ++ reteq r12 ++ bfextu r11, r8, 8, 8 ++ st.b dst++, r11 ++ cp.w r11, 0 ++ reteq r12 ++ sub r10, 1 ++ reteq r12 ++ st.b dst++, r8 ++ retal r12 ++ ++.Lunaligned_src: ++ /* Copy bytes until we're aligned */ ++ min r8, r8, r10 ++ sub r10, r8 ++ sub r8, 1 ++ retlt r12 ++1: ld.ub r10, src++ ++ st.b dst++, r10 ++ sub r8, 1 ++ brge 1b ++ ++ rjmp .Laligned_copy +Index: uClibc-0.9.28-avr32/libc/string/avr32/test_memcpy.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ uClibc-0.9.28-avr32/libc/string/avr32/test_memcpy.c 2006-10-19 15:05:52.000000000 +0200 +@@ -0,0 +1,66 @@ ++ ++#include <stdio.h> ++#include <string.h> ++ ++#define BUF_SIZE 32768 ++ ++static char buf1[BUF_SIZE] __attribute__((aligned(32))); ++static char buf1_ref[BUF_SIZE] __attribute__((aligned(32))); ++static char buf2[BUF_SIZE] __attribute__((aligned(32))); ++ ++extern void *new_memcpy(void *dest, void *src, size_t len); ++ ++void dump_mismatch(char *buf, char *ref, size_t len) ++{ ++ int i, j; ++ ++ for (i = 0; i < len; i += 16) { ++ if (memcmp(buf + i, ref + i, 16) == 0) ++ continue; ++ ++ printf("% 4x buf:", i); ++ for (j = i; j < (i + 16); j++) ++ printf(" %02x", buf[j]); ++ printf("\n ref:"); ++ for (j = i; j < (i + 16); j++) ++ printf(" %02x", ref[j]); ++ printf("\n"); ++ } ++} ++ ++void test(int src_offset, int dst_offset, int len) ++{ ++ memset(buf1, 0x55, sizeof(buf1)); ++ memset(buf1_ref, 0x55, sizeof(buf1_ref)); ++ memset(buf2, 0xaa, sizeof(buf2)); ++ ++ printf("Testing with offsets %d => %d and len %d...", ++ src_offset, dst_offset, len); ++ ++ new_memcpy(buf1 + dst_offset, buf2 + src_offset, len); ++ memcpy(buf1_ref + dst_offset, buf2 + src_offset, len); ++ ++ if (memcmp(buf1, buf1_ref, sizeof(buf1)) == 0) ++ printf("OK\n"); ++ else { ++ printf("FAILED\n"); ++ dump_mismatch(buf1, buf1_ref, sizeof(buf1)); ++ } ++} ++ ++int main(int argc, char *argv[]) ++{ ++ test(0, 0, BUF_SIZE); ++ test(0, 0, 1); ++ test(0, 0, 31); ++ test(0, 0, 32); ++ test(0, 0, 127); ++ test(0, 0, 128); ++ test(4, 4, BUF_SIZE - 4); ++ test(1, 1, BUF_SIZE - 1); ++ test(1, 1, 126); ++ test(0, 3, 128); ++ test(1, 4, 128); ++ ++ return 0; ++} |