lwip operating system abstraction layer implementation based on CMSIS-RTOS
Dependents: LwIPNetworking NetServicesMin EthernetInterface EthernetInterface_RSF ... more
Revision 10:09b0951b1899, committed 2013-09-10
- Comitter:
- bogdanm
- Date:
- Tue Sep 10 15:14:42 2013 +0300
- Parent:
- 9:d7ad3f3ee934
- Child:
- 11:faba3c7e1122
- Commit message:
- Sync with git revision 171dda705c947bf910926a0b73d6a4797802554d
Changed in this revision
--- a/arch/cc.h Mon Aug 19 18:38:06 2013 +0300
+++ b/arch/cc.h Tue Sep 10 15:14:42 2013 +0300
@@ -82,8 +82,21 @@
#define ALIGNED(n) __attribute__((aligned (n)))
#endif
-/* Used with IP headers only */
-#define LWIP_CHKSUM_ALGORITHM 1
+/* Provide Thumb-2 routines for GCC to improve performance */
+#if defined(TOOLCHAIN_GCC) && defined(__thumb2__)
+ #define MEMCPY(dst,src,len) thumb2_memcpy(dst,src,len)
+ #define LWIP_CHKSUM thumb2_checksum
+ /* Set algorithm to 0 so that unused lwip_standard_chksum function
+ doesn't generate compiler warning */
+ #define LWIP_CHKSUM_ALGORITHM 0
+
+ void* thumb2_memcpy(void* pDest, const void* pSource, size_t length);
+ u16_t thumb2_checksum(void* pData, int length);
+#else
+ /* Used with IP headers only */
+ #define LWIP_CHKSUM_ALGORITHM 1
+#endif
+
#ifdef LWIP_DEBUG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/checksum.c Tue Sep 10 15:14:42 2013 +0300
@@ -0,0 +1,126 @@
+/* Copyright (C) 2013 - Adam Green (https://github.com/adamgreen)
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+#if defined(TOOLCHAIN_GCC) && defined(__thumb2__)
+
+
+/* This is a hand written Thumb-2 assembly language version of the
+ algorithm 3 version of lwip_standard_chksum in lwIP's inet_chksum.c. It
+ performs the checksumming 32-bits at a time and even unrolls the loop to
+ perform two of these 32-bit adds per loop iteration.
+
+ Returns:
+ 16-bit 1's complement summation (not inversed).
+
+ NOTE: This function does return a uint16_t from the assembly language code
+ but is marked as void so that GCC doesn't issue warning because it
+ doesn't know about this low level return.
+*/
+__attribute__((naked)) void /*uint16_t*/ thumb2_checksum(const void* pData, int length)
+{
+ __asm (
+ ".syntax unified\n"
+ ".thumb\n"
+
+ // Push non-volatile registers we use on stack. Push link register too to
+ // keep stack 8-byte aligned and allow single pop to restore and return.
+ " push {r4, lr}\n"
+ // Initialize sum, r2, to 0.
+ " movs r2, #0\n"
+ // Remember whether pData was at odd address in r3. This is used later to
+ // know if it needs to swap the result since the summation will be done at
+ // an offset of 1, rather than 0.
+ " ands r3, r0, #1\n"
+ // Need to 2-byte align? If not skip ahead.
+ " beq 1$\n"
+ // We can return if there are no bytes to sum.
+ " cbz r1, 9$\n"
+
+ // 2-byte align.
+ // Place the first data byte in odd summation location since it needs to be
+ // swapped later. It's ok to overwrite r2 here as it only had a value of 0
+ // up until now. Advance r0 pointer and decrement r1 length as we go.
+ " ldrb r2, [r0], #1\n"
+ " lsls r2, r2, #8\n"
+ " subs r1, r1, #1\n"
+
+ // Need to 4-byte align? If not skip ahead.
+ "1$:\n"
+ " ands r4, r0, #3\n"
+ " beq 2$\n"
+ // Have more than 1 byte left to align? If not skip ahead to take care of
+ // trailing byte.
+ " cmp r1, #2\n"
+ " blt 7$\n"
+
+ // 4-byte align.
+ " ldrh r4, [r0], #2\n"
+ " adds r2, r2, r4\n"
+ " subs r1, r1, #2\n"
+
+ // Main summing loop which sums up data 2 words at a time.
+ // Make sure that we have more than 7 bytes left to sum.
+ "2$:\n"
+ " cmp r1, #8\n"
+ " blt 3$\n"
+ // Sum next two words. Applying previous upper 16-bit carry to
+ // lower 16-bits.
+ " ldr r4, [r0], #4\n"
+ " adds r2, r4\n"
+ " adc r2, r2, #0\n"
+ " ldr r4, [r0], #4\n"
+ " adds r2, r4\n"
+ " adc r2, r2, #0\n"
+ " subs r1, r1, #8\n"
+ " b 2$\n"
+
+ // Sum up any remaining half-words.
+ "3$:\n"
+ // Make sure that we have more than 1 byte left to sum.
+ " cmp r1, #2\n"
+ " blt 7$\n"
+ // Sum up next half word, continue to apply carry.
+ " ldrh r4, [r0], #2\n"
+ " adds r2, r4\n"
+ " adc r2, r2, #0\n"
+ " subs r1, r1, #2\n"
+ " b 3$\n"
+
+ // Handle trailing byte, if it exists
+ "7$:\n"
+ " cbz r1, 8$\n"
+ " ldrb r4, [r0]\n"
+ " adds r2, r4\n"
+ " adc r2, r2, #0\n"
+
+ // Fold 32-bit checksum into 16-bit checksum.
+ "8$:\n"
+ " ubfx r4, r2, #16, #16\n"
+ " ubfx r2, r2, #0, #16\n"
+ " adds r2, r4\n"
+ " ubfx r4, r2, #16, #16\n"
+ " ubfx r2, r2, #0, #16\n"
+ " adds r2, r4\n"
+
+ // Swap bytes if started at odd address
+ " cbz r3, 9$\n"
+ " rev16 r2, r2\n"
+
+ // Return final sum.
+ "9$: mov r0, r2\n"
+ " pop {r4, pc}\n"
+ );
+}
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/memcpy.c Tue Sep 10 15:14:42 2013 +0300
@@ -0,0 +1,59 @@
+/* Copyright (C) 2013 - Adam Green (https://github.com/adamgreen)
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+#if defined(TOOLCHAIN_GCC) && defined(__thumb2__)
+
+#include <stdio.h>
+
+
+/* This is a hand written Thumb-2 assembly language version of the
+ standard C memcpy() function that can be used by the lwIP networking
+ stack to improve its performance. It copies 4 bytes at a time and
+ unrolls the loop to perform 4 of these copies per loop iteration.
+*/
+__attribute__((naked)) void thumb2_memcpy(void* pDest, const void* pSource, size_t length)
+{
+ __asm (
+ ".syntax unified\n"
+ ".thumb\n"
+
+ // Copy 16 bytes at a time first.
+ " lsrs r3, r2, #4\n"
+ " beq.n 2$\n"
+ "1$: ldr r12, [r1], #4\n"
+ " str r12, [r0], #4\n"
+ " ldr r12, [r1], #4\n"
+ " str r12, [r0], #4\n"
+ " ldr r12, [r1], #4\n"
+ " str r12, [r0], #4\n"
+ " ldr r12, [r1], #4\n"
+ " str r12, [r0], #4\n"
+ " subs r3, #1\n"
+ " bne 1$\n"
+
+ // Copy byte by byte for what is left.
+ "2$:\n"
+ " ands r3, r2, #0xf\n"
+ " beq.n 4$\n"
+ "3$: ldrb r12, [r1], #1\n"
+ " strb r12, [r0], #1\n"
+ " subs r3, #1\n"
+ " bne 3$\n"
+
+ // Return to caller.
+ "4$: bx lr\n"
+ );
+}
+
+#endif
mbed official


