Teensy 3.1 DMA memcpy() 1092mbs 16-byte aligned DMA
Teensy 3.1 DMA memcpy, proof of concept
2048 bytes aligned 16 loop set 215.58 mbs 76 us loop copy 910.22 mbs 18 us memset 1365.33 mbs 12 us memcpy 910.22 mbs 18 us memcpy128 1092.27 mbs 15 us DMA errs 0
Obviously, the ARMCC memcpy() is quite fast (unrolled assembler I presume).
You could add IRQ handler if you wanted asynch operations.
main.cpp@0:d374f051a3ac, 2015-10-03 (annotated)
- Committer:
- manitou
- Date:
- Sat Oct 03 15:15:58 2015 +0000
- Revision:
- 0:d374f051a3ac
teensy 3.1 DMA memcpy
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
manitou | 0:d374f051a3ac | 1 | // teensy 3.1 mbed memcpy using DMA |
manitou | 0:d374f051a3ac | 2 | // could add IRQ handler for asynch operation |
manitou | 0:d374f051a3ac | 3 | #include "mbed.h" |
manitou | 0:d374f051a3ac | 4 | #include "USBSerial.h" |
manitou | 0:d374f051a3ac | 5 | |
manitou | 0:d374f051a3ac | 6 | #define PRREG(x) pc.printf(#x" 0x%0x\n",x) |
manitou | 0:d374f051a3ac | 7 | |
manitou | 0:d374f051a3ac | 8 | USBSerial pc; // Virtual serial port over USB |
manitou | 0:d374f051a3ac | 9 | Timer tmr; |
manitou | 0:d374f051a3ac | 10 | |
manitou | 0:d374f051a3ac | 11 | #define CHNL 1 |
manitou | 0:d374f051a3ac | 12 | void dma_init() { |
manitou | 0:d374f051a3ac | 13 | SIM->SCGC7 |= SIM_SCGC7_DMA_MASK; // DMA clock |
manitou | 0:d374f051a3ac | 14 | // SIM->SCGC6 |= SIM_SCGC6_DMAMUX_MASK; // Enable clock to DMA mux |
manitou | 0:d374f051a3ac | 15 | // DMAMUX->CHCFG[CHNL] = 0; // IO to DMA map |
manitou | 0:d374f051a3ac | 16 | } |
manitou | 0:d374f051a3ac | 17 | |
manitou | 0:d374f051a3ac | 18 | void memcpy32(void *dest, void *src, unsigned int bytes) |
manitou | 0:d374f051a3ac | 19 | { |
manitou | 0:d374f051a3ac | 20 | DMA0->TCD[CHNL].SADDR = (uint32_t)src; |
manitou | 0:d374f051a3ac | 21 | DMA0->TCD[CHNL].SOFF = 4; |
manitou | 0:d374f051a3ac | 22 | DMA0->TCD[CHNL].ATTR = DMA_ATTR_SSIZE(2) | DMA_ATTR_DSIZE(2); //32-bit |
manitou | 0:d374f051a3ac | 23 | DMA0->TCD[CHNL].NBYTES_MLNO = bytes; |
manitou | 0:d374f051a3ac | 24 | DMA0->TCD[CHNL].SLAST = 0; |
manitou | 0:d374f051a3ac | 25 | DMA0->TCD[CHNL].DADDR = (uint32_t)dest; |
manitou | 0:d374f051a3ac | 26 | DMA0->TCD[CHNL].DOFF = 4; |
manitou | 0:d374f051a3ac | 27 | DMA0->TCD[CHNL].CITER_ELINKNO = 1; |
manitou | 0:d374f051a3ac | 28 | DMA0->TCD[CHNL].DLAST_SGA = 0; |
manitou | 0:d374f051a3ac | 29 | DMA0->TCD[CHNL].BITER_ELINKNO = 1; |
manitou | 0:d374f051a3ac | 30 | DMA0->TCD[CHNL].CSR = DMA_CSR_START_MASK; |
manitou | 0:d374f051a3ac | 31 | |
manitou | 0:d374f051a3ac | 32 | while (!(DMA0->TCD[CHNL].CSR & DMA_CSR_DONE_MASK)) /* wait */ ; |
manitou | 0:d374f051a3ac | 33 | } |
manitou | 0:d374f051a3ac | 34 | |
manitou | 0:d374f051a3ac | 35 | void memcpy128(void *dest, void *src, unsigned int bytes) |
manitou | 0:d374f051a3ac | 36 | { |
manitou | 0:d374f051a3ac | 37 | DMA0->TCD[CHNL].SADDR = (uint32_t)src; |
manitou | 0:d374f051a3ac | 38 | DMA0->TCD[CHNL].SOFF = 16; |
manitou | 0:d374f051a3ac | 39 | DMA0->TCD[CHNL].ATTR = DMA_ATTR_SSIZE(4) | DMA_ATTR_DSIZE(4); |
manitou | 0:d374f051a3ac | 40 | DMA0->TCD[CHNL].NBYTES_MLNO = bytes; |
manitou | 0:d374f051a3ac | 41 | DMA0->TCD[CHNL].SLAST = 0; |
manitou | 0:d374f051a3ac | 42 | DMA0->TCD[CHNL].DADDR = (uint32_t)dest; |
manitou | 0:d374f051a3ac | 43 | DMA0->TCD[CHNL].DOFF = 16; |
manitou | 0:d374f051a3ac | 44 | DMA0->TCD[CHNL].CITER_ELINKNO = 1; |
manitou | 0:d374f051a3ac | 45 | DMA0->TCD[CHNL].DLAST_SGA = 0; |
manitou | 0:d374f051a3ac | 46 | DMA0->TCD[CHNL].BITER_ELINKNO = 1; |
manitou | 0:d374f051a3ac | 47 | DMA0->TCD[CHNL].CSR = DMA_CSR_START_MASK; |
manitou | 0:d374f051a3ac | 48 | |
manitou | 0:d374f051a3ac | 49 | while (!(DMA0->TCD[CHNL].CSR & DMA_CSR_DONE_MASK)) /* wait */ ; |
manitou | 0:d374f051a3ac | 50 | } |
manitou | 0:d374f051a3ac | 51 | |
manitou | 0:d374f051a3ac | 52 | #define BYTES 2048 |
manitou | 0:d374f051a3ac | 53 | |
manitou | 0:d374f051a3ac | 54 | uint8_t src[BYTES] __attribute__ ((aligned (16))); |
manitou | 0:d374f051a3ac | 55 | uint8_t dst[BYTES] __attribute__ ((aligned (16))); |
manitou | 0:d374f051a3ac | 56 | |
manitou | 0:d374f051a3ac | 57 | |
manitou | 0:d374f051a3ac | 58 | void memperf(){ |
manitou | 0:d374f051a3ac | 59 | int i; |
manitou | 0:d374f051a3ac | 60 | uint32_t us; |
manitou | 0:d374f051a3ac | 61 | |
manitou | 0:d374f051a3ac | 62 | pc.printf("\n%d bytes aligned 16\n",BYTES); |
manitou | 0:d374f051a3ac | 63 | us = tmr.read_us(); |
manitou | 0:d374f051a3ac | 64 | for (i=0;i<BYTES;i++) src[i] = i; |
manitou | 0:d374f051a3ac | 65 | us = tmr.read_us() - us; |
manitou | 0:d374f051a3ac | 66 | pc.printf("loop set %.2f mbs %d us\n",8*BYTES/(float)us,us); |
manitou | 0:d374f051a3ac | 67 | us = tmr.read_us(); |
manitou | 0:d374f051a3ac | 68 | for (i=0;i<BYTES;i++) dst[i] = src[i]; |
manitou | 0:d374f051a3ac | 69 | us = tmr.read_us() - us; |
manitou | 0:d374f051a3ac | 70 | pc.printf("loop copy %.2f mbs %d us\n",8*BYTES/(float)us,us); |
manitou | 0:d374f051a3ac | 71 | us = tmr.read_us(); |
manitou | 0:d374f051a3ac | 72 | memset(dst,0,BYTES); |
manitou | 0:d374f051a3ac | 73 | us = tmr.read_us() - us; |
manitou | 0:d374f051a3ac | 74 | pc.printf("memset %.2f mbs %d us\n",8*BYTES/(float)us,us); |
manitou | 0:d374f051a3ac | 75 | us = tmr.read_us(); |
manitou | 0:d374f051a3ac | 76 | memcpy(dst,src,BYTES); |
manitou | 0:d374f051a3ac | 77 | us = tmr.read_us() - us; |
manitou | 0:d374f051a3ac | 78 | pc.printf("memcpy %.2f mbs %d us\n",8*BYTES/(float)us,us); |
manitou | 0:d374f051a3ac | 79 | |
manitou | 0:d374f051a3ac | 80 | memset(dst,0,BYTES); // for validation |
manitou | 0:d374f051a3ac | 81 | us = tmr.read_us(); |
manitou | 0:d374f051a3ac | 82 | memcpy128(dst,src,BYTES); |
manitou | 0:d374f051a3ac | 83 | us = tmr.read_us() - us; |
manitou | 0:d374f051a3ac | 84 | pc.printf("memcpy128 %.2f mbs %d us\n",8*BYTES/(float)us,us); |
manitou | 0:d374f051a3ac | 85 | int errs=0; |
manitou | 0:d374f051a3ac | 86 | for ( i=0;i<BYTES;i++) if (src[i] != dst[i]) errs++; |
manitou | 0:d374f051a3ac | 87 | pc.printf("errs %d\n",errs); |
manitou | 0:d374f051a3ac | 88 | } |
manitou | 0:d374f051a3ac | 89 | |
manitou | 0:d374f051a3ac | 90 | int main() { |
manitou | 0:d374f051a3ac | 91 | wait(2.0); |
manitou | 0:d374f051a3ac | 92 | pc.printf("SystemCoreClock %d %s %s\n",SystemCoreClock,__TIME__,__DATE__); |
manitou | 0:d374f051a3ac | 93 | tmr.start(); |
manitou | 0:d374f051a3ac | 94 | dma_init(); |
manitou | 0:d374f051a3ac | 95 | while(1) { |
manitou | 0:d374f051a3ac | 96 | memperf(); |
manitou | 0:d374f051a3ac | 97 | wait(3.0); |
manitou | 0:d374f051a3ac | 98 | } |
manitou | 0:d374f051a3ac | 99 | |
manitou | 0:d374f051a3ac | 100 | } |