--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -375,6 +375,7 @@ config ARCH_CNS3XXX
 	select PCI_DOMAINS if PCI
 	select HAVE_ARM_TWD
 	select HAVE_SMP
+	select FIQ
 	help
 	  Support for Cavium Networks CNS3XXX platform.
 
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -49,6 +49,8 @@
 
 static unsigned long no_fiq_insn;
 
+unsigned int fiq_number[2] = {0, 0};
+
 /* Default reacquire function
  * - we always relinquish FIQ control
  * - we always reacquire FIQ control
@@ -70,9 +72,12 @@ static struct fiq_handler *current_fiq =
 
 int show_fiq_list(struct seq_file *p, int prec)
 {
-	if (current_fiq != &default_owner)
-		seq_printf(p, "%*s:              %s\n", prec, "FIQ",
-			current_fiq->name);
+	if (current_fiq != &default_owner) {
+		seq_printf(p, "%*s: ", prec, "FIQ");
+		seq_printf(p, "%10u ", fiq_number[0]);
+		seq_printf(p, "%10u ", fiq_number[1]);
+		seq_printf(p, "      %s\n", current_fiq->name);
+	}
 
 	return 0;
 }
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -400,13 +400,13 @@ void show_ipi_list(struct seq_file *p, i
 	unsigned int cpu, i;
 
 	for (i = 0; i < NR_IPI; i++) {
-		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
+		seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
 
 		for_each_present_cpu(cpu)
 			seq_printf(p, "%10u ",
 				   __get_irq_stat(cpu, ipi_irqs[i]));
 
-		seq_printf(p, " %s\n", ipi_types[i]);
+		seq_printf(p, "      %s\n", ipi_types[i]);
 	}
 }
 
--- a/arch/arm/mach-cns3xxx/Makefile
+++ b/arch/arm/mach-cns3xxx/Makefile
@@ -2,6 +2,6 @@ obj-$(CONFIG_ARCH_CNS3XXX)		+= core.o pm
 obj-$(CONFIG_PCI)			+= pcie.o
 obj-$(CONFIG_MACH_CNS3420VB)		+= cns3420vb.o
 obj-$(CONFIG_MACH_GW2388)		+= laguna.o
-obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o
+obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o cns3xxx_fiq.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
 obj-$(CONFIG_LOCAL_TIMERS)		+= localtimer.o
--- /dev/null
+++ b/arch/arm/mach-cns3xxx/cns3xxx_fiq.S
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (C) 2012 Gateworks Corporation
+ *      Chris Lang <clang@gateworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+
+#define D_CACHE_LINE_SIZE 32
+
+	.text
+
+/*
+ * R8  - DMA Start Address
+ * R9  - DMA Length
+ * R10 - DMA Direction
+ * R11 - DMA type
+ * R12 - fiq_buffer Address
+ * R13 - DMA type Address
+*/
+
+	.global	cns3xxx_fiq_end
+ENTRY(cns3xxx_fiq_start)
+	mov r8, #0
+	str r8, [r13]
+
+	ldr r9, [r12]
+	ldr r8, [r9]
+	add r8, r8, #1
+	str r8, [r9]
+
+	ldmib r12, {r8, r9, r10}
+	and r11, r10, #0x3000000
+	and r10, r10, #0xff
+
+	teq r11, #0x1000000
+	beq cns3xxx_dma_map_area
+	teq r11, #0x2000000
+	beq cns3xxx_dma_unmap_area
+	b cns3xxx_dma_flush_range
+
+cns3xxx_fiq_exit:
+	mov r8, #0
+	str r8, [r12, #12]
+	mcr p15, 0, r8, c7, c10, 4    @ drain write buffer
+	subs pc, lr, #4
+
+cns3xxx_dma_map_area:
+	add r9, r9, r8
+	teq r10, #DMA_FROM_DEVICE
+	beq cns3xxx_dma_inv_range
+	b cns3xxx_dma_clean_range
+
+cns3xxx_dma_unmap_area:
+	add r9, r9, r8
+	teq r10, #DMA_TO_DEVICE
+	bne cns3xxx_dma_inv_range
+	b cns3xxx_fiq_exit
+
+cns3xxx_dma_flush_range:
+	bic r8, r8, #D_CACHE_LINE_SIZE - 1
+1:
+	mcr p15, 0, r8, c7, c14, 1   @ clean & invalidate D line
+	add r8, r8, #D_CACHE_LINE_SIZE
+	cmp r8, r9
+	blo 1b
+	b cns3xxx_fiq_exit
+
+cns3xxx_dma_clean_range:
+	bic r8, r8, #D_CACHE_LINE_SIZE - 1
+1:
+	mcr p15, 0, r8, c7, c10, 1    @ clean D line
+	add r8, r8, #D_CACHE_LINE_SIZE
+	cmp r8, r9
+	blo 1b
+	b cns3xxx_fiq_exit
+
+cns3xxx_dma_inv_range:
+	tst r8, #D_CACHE_LINE_SIZE - 1
+	bic r8, r8, #D_CACHE_LINE_SIZE - 1
+	mcrne p15, 0, r8, c7, c10, 1    @ clean D line
+	tst r9, #D_CACHE_LINE_SIZE - 1
+	bic r9, r9, #D_CACHE_LINE_SIZE - 1
+	mcrne p15, 0, r9, c7, c14, 1    @ clean & invalidate D line
+1:
+	mcr p15, 0, r8, c7, c6, 1   @ invalidate D line
+	add r8, r8, #D_CACHE_LINE_SIZE
+	cmp r8, r9
+	blo 1b
+	b cns3xxx_fiq_exit
+
+cns3xxx_fiq_end:
--- a/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h
+++ b/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h
@@ -294,6 +294,7 @@
 #define MISC_PCIE_INT_MASK(x)			MISC_MEM_MAP(0x978 + (x) * 0x100)
 #define MISC_PCIE_INT_STATUS(x)			MISC_MEM_MAP(0x97C + (x) * 0x100)
 
+#define MISC_FIQ_CPU(x)				MISC_MEM_MAP(0xA58 - (x) * 0x4)
 /*
  * Power management and clock control
  */
--- a/arch/arm/mach-cns3xxx/include/mach/irqs.h
+++ b/arch/arm/mach-cns3xxx/include/mach/irqs.h
@@ -14,6 +14,7 @@
 #define IRQ_LOCALTIMER		29
 #define IRQ_LOCALWDOG		30
 #define IRQ_TC11MP_GIC_START	32
+#define FIQ_START 0
 
 #include <mach/cns3xxx.h>
 
--- /dev/null
+++ b/arch/arm/mach-cns3xxx/include/mach/smp.h
@@ -0,0 +1,8 @@
+#ifndef __MACH_SMP_H
+#define __MACH_SMP_H
+
+extern void smp_dma_map_area(const void *, size_t, int);
+extern void smp_dma_unmap_area(const void *, size_t, int);
+extern void smp_dma_flush_range(const void *, const void *);
+
+#endif
--- a/arch/arm/mach-cns3xxx/platsmp.c
+++ b/arch/arm/mach-cns3xxx/platsmp.c
@@ -24,10 +24,27 @@
 #include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
 #include <asm/unified.h>
-
+#include <asm/fiq.h>
+#include <mach/smp.h>
 #include <mach/cns3xxx.h>
 
+static struct fiq_handler fh = {
+	.name = "cns3xxx-fiq"
+};
+
+static unsigned int fiq_buffer[8];
+
+#define FIQ_ENABLED         0x80000000
+#define FIQ_GENERATE				0x00010000
+#define CNS3XXX_MAP_AREA    0x01000000
+#define CNS3XXX_UNMAP_AREA  0x02000000
+#define CNS3XXX_FLUSH_RANGE 0x03000000
+
 extern void cns3xxx_secondary_startup(void);
+extern unsigned char cns3xxx_fiq_start, cns3xxx_fiq_end;
+extern unsigned int fiq_number[2];
+extern struct cpu_cache_fns cpu_cache;
+struct cpu_cache_fns cpu_cache_save;
 
 #define SCU_CPU_STATUS 0x08
 static void __iomem *scu_base;
@@ -38,12 +55,50 @@ static void __iomem *scu_base;
  */
 volatile int __cpuinitdata pen_release = -1;
 
+static void __init cns3xxx_set_fiq_regs(void)
+{
+	struct pt_regs FIQ_regs;
+	unsigned int cpu = smp_processor_id();
+
+	if (cpu) {
+		FIQ_regs.ARM_ip = (unsigned int)&fiq_buffer[4];
+		FIQ_regs.ARM_sp = (unsigned int)MISC_FIQ_CPU(0);
+	} else {
+		FIQ_regs.ARM_ip = (unsigned int)&fiq_buffer[0];
+		FIQ_regs.ARM_sp = (unsigned int)MISC_FIQ_CPU(1);
+	}
+	set_fiq_regs(&FIQ_regs);
+}
+
+static void __init cns3xxx_init_fiq(void)
+{
+	void *fiqhandler_start;
+	unsigned int fiqhandler_length;
+	int ret;
+
+	fiqhandler_start = &cns3xxx_fiq_start;
+	fiqhandler_length = &cns3xxx_fiq_end - &cns3xxx_fiq_start;
+
+	ret = claim_fiq(&fh);
+
+	if (ret) {
+		return;
+	}
+
+	set_fiq_handler(fiqhandler_start, fiqhandler_length);
+	fiq_buffer[0] = (unsigned int)&fiq_number[0];
+	fiq_buffer[3] = 0;
+	fiq_buffer[4] = (unsigned int)&fiq_number[1];
+	fiq_buffer[7] = 0;
+}
+
+
 /*
  * Write pen_release in a way that is guaranteed to be visible to all
  * observers, irrespective of whether they're taking part in coherency
  * or not.  This is necessary for the hotplug code to work reliably.
  */
-static void write_pen_release(int val)
+static void __cpuinit write_pen_release(int val)
 {
 	pen_release = val;
 	smp_wmb();
@@ -63,12 +118,25 @@ void __cpuinit platform_secondary_init(u
 	gic_secondary_init(0);
 
 	/*
+	 * Setup Secondary Core FIQ regs
+	 */
+	cns3xxx_set_fiq_regs();
+
+	/*
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
 	write_pen_release(-1);
 
 	/*
+	 * Fixup DMA Operations
+	 *
+	 */
+	cpu_cache.dma_map_area = (void *)smp_dma_map_area;
+	cpu_cache.dma_unmap_area = (void *)smp_dma_unmap_area;
+	cpu_cache.dma_flush_range = (void *)smp_dma_flush_range;
+
+	/*
 	 * Synchronise with the boot thread.
 	 */
 	spin_lock(&boot_lock);
@@ -171,4 +239,112 @@ void __init platform_smp_prepare_cpus(un
 	 */
 	__raw_writel(virt_to_phys(cns3xxx_secondary_startup),
 			(void __iomem *)(CNS3XXX_MISC_BASE_VIRT + 0x0600));
+
+	/*
+	 * Setup FIQ's for main cpu
+	 */
+	cns3xxx_init_fiq();
+	cns3xxx_set_fiq_regs();
+	memcpy((void *)&cpu_cache_save, (void *)&cpu_cache, sizeof(struct cpu_cache_fns));
+}
+
+
+static inline unsigned long cns3xxx_cpu_id(void)
+{
+	unsigned long cpu;
+
+	asm volatile(
+		" mrc p15, 0, %0, c0, c0, 5  @ cns3xxx_cpu_id\n"
+		: "=r" (cpu) : : "memory", "cc");
+	return (cpu & 0xf);
+}
+
+void smp_dma_map_area(const void *addr, size_t size, int dir)
+{
+	unsigned int cpu;
+	unsigned long flags;
+	raw_local_irq_save(flags);
+	cpu = cns3xxx_cpu_id();
+	if (cpu) {
+		fiq_buffer[1] = (unsigned int)addr;
+		fiq_buffer[2] = size;
+		fiq_buffer[3] = dir | CNS3XXX_MAP_AREA | FIQ_ENABLED;
+		smp_mb();
+		__raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(1));
+
+		cpu_cache_save.dma_map_area(addr, size, dir);
+		while ((fiq_buffer[3]) & FIQ_ENABLED) { barrier(); }
+	} else {
+
+		fiq_buffer[5] = (unsigned int)addr;
+		fiq_buffer[6] = size;
+		fiq_buffer[7] = dir | CNS3XXX_MAP_AREA | FIQ_ENABLED;
+		smp_mb();
+		__raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(0));
+
+		cpu_cache_save.dma_map_area(addr, size, dir);
+		while ((fiq_buffer[7]) & FIQ_ENABLED) { barrier(); }
+	}
+	raw_local_irq_restore(flags);
+}
+
+void smp_dma_unmap_area(const void *addr, size_t size, int dir)
+{
+	unsigned int cpu;
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	cpu = cns3xxx_cpu_id();
+	if (cpu) {
+
+		fiq_buffer[1] = (unsigned int)addr;
+		fiq_buffer[2] = size;
+		fiq_buffer[3] = dir | CNS3XXX_UNMAP_AREA | FIQ_ENABLED;
+		smp_mb();
+		__raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(1));
+
+		cpu_cache_save.dma_unmap_area(addr, size, dir);
+		while ((fiq_buffer[3]) & FIQ_ENABLED) { barrier(); }
+	} else {
+
+		fiq_buffer[5] = (unsigned int)addr;
+		fiq_buffer[6] = size;
+		fiq_buffer[7] = dir | CNS3XXX_UNMAP_AREA | FIQ_ENABLED;
+		smp_mb();
+		__raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(0));
+
+		cpu_cache_save.dma_unmap_area(addr, size, dir);
+		while ((fiq_buffer[7]) & FIQ_ENABLED) { barrier(); }
+	}
+	raw_local_irq_restore(flags);
+}
+
+void smp_dma_flush_range(const void *start, const void *end)
+{
+	unsigned int cpu;
+	unsigned long flags;
+	raw_local_irq_save(flags);
+	cpu = cns3xxx_cpu_id();
+	if (cpu) {
+
+		fiq_buffer[1] = (unsigned int)start;
+		fiq_buffer[2] = (unsigned int)end;
+		fiq_buffer[3] = CNS3XXX_FLUSH_RANGE | FIQ_ENABLED;
+		smp_mb();
+		__raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(1));
+
+		cpu_cache_save.dma_flush_range(start, end);
+		while ((fiq_buffer[3]) & FIQ_ENABLED) { barrier(); }
+	} else {
+
+		fiq_buffer[5] = (unsigned int)start;
+		fiq_buffer[6] = (unsigned int)end;
+		fiq_buffer[7] = CNS3XXX_FLUSH_RANGE | FIQ_ENABLED;
+		smp_mb();
+		__raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(0));
+
+		cpu_cache_save.dma_flush_range(start, end);
+		while ((fiq_buffer[7]) & FIQ_ENABLED) { barrier(); }
+	}
+	raw_local_irq_restore(flags);
 }
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -793,7 +793,7 @@ config NEEDS_SYSCALL_FOR_CMPXCHG
 
 config DMA_CACHE_RWFO
 	bool "Enable read/write for ownership DMA cache maintenance"
-	depends on CPU_V6K && SMP
+	depends on CPU_V6K && SMP && !ARCH_CNS3XXX
 	default y
 	help
 	  The Snoop Control Unit on ARM11MPCore does not detect the