summaryrefslogtreecommitdiffstats
path: root/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch')
-rw-r--r--target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch117
1 files changed, 117 insertions, 0 deletions
diff --git a/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch b/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch
new file mode 100644
index 000000000..c2efe5be2
--- /dev/null
+++ b/target/linux/orion/patches/002-feroceon__speed_up_flushing_of_the_entire_cache.patch
@@ -0,0 +1,117 @@
+Flushing the L1 D cache with a test/clean/invalidate loop is very
+easy in software, but it is not the quickest way of doing it, as
+there is a lot of overhead involved in re-scanning the cache from
+the beginning every time we hit a dirty line.
+
+This patch makes proc-feroceon.S use "clean+invalidate by set/way"
+loops according to possible cache configuration of Feroceon CPUs
+(either direct-mapped or 4-way set associative).
+
+[nico: optimized the assembly a bit]
+
+Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
+Signed-off-by: Nicolas Pitre <nico@marvell.com>
+---
+ arch/arm/mm/proc-feroceon.S | 53 ++++++++++++++++++++++++++++++++++---------
+ 1 files changed, 42 insertions(+), 11 deletions(-)
+
+--- a/arch/arm/mm/proc-feroceon.S
++++ b/arch/arm/mm/proc-feroceon.S
+@@ -44,11 +44,31 @@
+ */
+ #define CACHE_DLINESIZE 32
+
++ .bss
++ .align 3
++__cache_params_loc:
++ .space 8
++
+ .text
++__cache_params:
++ .word __cache_params_loc
++
+ /*
+ * cpu_feroceon_proc_init()
+ */
+ ENTRY(cpu_feroceon_proc_init)
++ mrc p15, 0, r0, c0, c0, 1 @ read cache type register
++ ldr r1, __cache_params
++ mov r2, #(16 << 5)
++ tst r0, #(1 << 16) @ get way
++ mov r0, r0, lsr #18 @ get cache size order
++ movne r3, #((4 - 1) << 30) @ 4-way
++ and r0, r0, #0xf
++ moveq r3, #0 @ 1-way
++ mov r2, r2, lsl r0 @ actual cache size
++ movne r2, r2, lsr #2 @ turned into # of sets
++ sub r2, r2, #(1 << 5)
++ stmia r1, {r2, r3}
+ mov pc, lr
+
+ /*
+@@ -117,11 +137,19 @@
+ */
+ ENTRY(feroceon_flush_kern_cache_all)
+ mov r2, #VM_EXEC
+- mov ip, #0
++
+ __flush_whole_cache:
+-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
+- bne 1b
++ ldr r1, __cache_params
++ ldmia r1, {r1, r3}
++1: orr ip, r1, r3
++2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
++ subs ip, ip, #(1 << 30) @ next way
++ bcs 2b
++ subs r1, r1, #(1 << 5) @ next set
++ bcs 1b
++
+ tst r2, #VM_EXEC
++ mov ip, #0
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcrne p15, 0, ip, c7, c10, 4 @ drain WB
+ mov pc, lr
+@@ -138,7 +166,6 @@
+ */
+ .align 5
+ ENTRY(feroceon_flush_user_cache_range)
+- mov ip, #0
+ sub r3, r1, r0 @ calculate total size
+ cmp r3, #CACHE_DLIMIT
+ bgt __flush_whole_cache
+@@ -152,6 +179,7 @@
+ cmp r0, r1
+ blo 1b
+ tst r2, #VM_EXEC
++ mov ip, #0
+ mcrne p15, 0, ip, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+@@ -306,16 +334,19 @@
+ .align 5
+ ENTRY(cpu_feroceon_switch_mm)
+ #ifdef CONFIG_MMU
+- mov ip, #0
+-@ && 'Clean & Invalidate whole DCache'
+-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
+- bne 1b
+- mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
+- mcr p15, 0, ip, c7, c10, 4 @ drain WB
++ mov r2, lr @ abuse r2 to preserve lr
++ bl __flush_whole_cache
++ @ if r2 contains the VM_EXEC bit then the next 2 ops are done already
++ tst r2, #VM_EXEC
++ mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
++ mcreq p15, 0, ip, c7, c10, 4 @ drain WB
++
+ mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
+ mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
+-#endif
++ mov pc, r2
++#else
+ mov pc, lr
++#endif
+
+ /*
+ * cpu_feroceon_set_pte_ext(ptep, pte, ext)