Forked from STM32F7 internet for nucleo F746ZG
Dependents: Nucleo_F746ZG_Ethernet_MQTT_Ultrasound
Fork of F7_Ethernet by
lwip-sys/arch/checksum.c@0:d26c1b55cfca, 2016-06-19 (annotated)
- Committer:
- DieterGraef
- Date:
- Sun Jun 19 16:23:40 2016 +0000
- Revision:
- 0:d26c1b55cfca
Ethernet Library for Nucleo stm32f746ZG and Disco stm32f746NG works under arm and gcc environment
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
DieterGraef | 0:d26c1b55cfca | 1 | /* Copyright (C) 2013 - Adam Green (https://github.com/adamgreen) |
DieterGraef | 0:d26c1b55cfca | 2 | |
DieterGraef | 0:d26c1b55cfca | 3 | Licensed under the Apache License, Version 2.0 (the "License"); |
DieterGraef | 0:d26c1b55cfca | 4 | you may not use this file except in compliance with the License. |
DieterGraef | 0:d26c1b55cfca | 5 | You may obtain a copy of the License at |
DieterGraef | 0:d26c1b55cfca | 6 | |
DieterGraef | 0:d26c1b55cfca | 7 | http://www.apache.org/licenses/LICENSE-2.0 |
DieterGraef | 0:d26c1b55cfca | 8 | |
DieterGraef | 0:d26c1b55cfca | 9 | Unless required by applicable law or agreed to in writing, software |
DieterGraef | 0:d26c1b55cfca | 10 | distributed under the License is distributed on an "AS IS" BASIS, |
DieterGraef | 0:d26c1b55cfca | 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
DieterGraef | 0:d26c1b55cfca | 12 | See the License for the specific language governing permissions and |
DieterGraef | 0:d26c1b55cfca | 13 | limitations under the License. |
DieterGraef | 0:d26c1b55cfca | 14 | */ |
DieterGraef | 0:d26c1b55cfca | 15 | #if defined(TOOLCHAIN_GCC) && defined(__thumb2__) |
DieterGraef | 0:d26c1b55cfca | 16 | |
DieterGraef | 0:d26c1b55cfca | 17 | |
DieterGraef | 0:d26c1b55cfca | 18 | /* This is a hand written Thumb-2 assembly language version of the |
DieterGraef | 0:d26c1b55cfca | 19 | algorithm 3 version of lwip_standard_chksum in lwIP's inet_chksum.c. It |
DieterGraef | 0:d26c1b55cfca | 20 | performs the checksumming 32-bits at a time and even unrolls the loop to |
DieterGraef | 0:d26c1b55cfca | 21 | perform two of these 32-bit adds per loop iteration. |
DieterGraef | 0:d26c1b55cfca | 22 | |
DieterGraef | 0:d26c1b55cfca | 23 | Returns: |
DieterGraef | 0:d26c1b55cfca | 24 | 16-bit 1's complement summation (not inversed). |
DieterGraef | 0:d26c1b55cfca | 25 | |
DieterGraef | 0:d26c1b55cfca | 26 | NOTE: This function does return a uint16_t from the assembly language code |
DieterGraef | 0:d26c1b55cfca | 27 | but is marked as void so that GCC doesn't issue warning because it |
DieterGraef | 0:d26c1b55cfca | 28 | doesn't know about this low level return. |
DieterGraef | 0:d26c1b55cfca | 29 | */ |
DieterGraef | 0:d26c1b55cfca | 30 | __attribute__((naked)) void /*uint16_t*/ thumb2_checksum(const void* pData, int length) |
DieterGraef | 0:d26c1b55cfca | 31 | { |
DieterGraef | 0:d26c1b55cfca | 32 | __asm ( |
DieterGraef | 0:d26c1b55cfca | 33 | ".syntax unified\n" |
DieterGraef | 0:d26c1b55cfca | 34 | ".thumb\n" |
DieterGraef | 0:d26c1b55cfca | 35 | |
DieterGraef | 0:d26c1b55cfca | 36 | // Push non-volatile registers we use on stack. Push link register too to |
DieterGraef | 0:d26c1b55cfca | 37 | // keep stack 8-byte aligned and allow single pop to restore and return. |
DieterGraef | 0:d26c1b55cfca | 38 | " push {r4, lr}\n" |
DieterGraef | 0:d26c1b55cfca | 39 | // Initialize sum, r2, to 0. |
DieterGraef | 0:d26c1b55cfca | 40 | " movs r2, #0\n" |
DieterGraef | 0:d26c1b55cfca | 41 | // Remember whether pData was at odd address in r3. This is used later to |
DieterGraef | 0:d26c1b55cfca | 42 | // know if it needs to swap the result since the summation will be done at |
DieterGraef | 0:d26c1b55cfca | 43 | // an offset of 1, rather than 0. |
DieterGraef | 0:d26c1b55cfca | 44 | " ands r3, r0, #1\n" |
DieterGraef | 0:d26c1b55cfca | 45 | // Need to 2-byte align? If not skip ahead. |
DieterGraef | 0:d26c1b55cfca | 46 | " beq 1$\n" |
DieterGraef | 0:d26c1b55cfca | 47 | // We can return if there are no bytes to sum. |
DieterGraef | 0:d26c1b55cfca | 48 | " cbz r1, 9$\n" |
DieterGraef | 0:d26c1b55cfca | 49 | |
DieterGraef | 0:d26c1b55cfca | 50 | // 2-byte align. |
DieterGraef | 0:d26c1b55cfca | 51 | // Place the first data byte in odd summation location since it needs to be |
DieterGraef | 0:d26c1b55cfca | 52 | // swapped later. It's ok to overwrite r2 here as it only had a value of 0 |
DieterGraef | 0:d26c1b55cfca | 53 | // up until now. Advance r0 pointer and decrement r1 length as we go. |
DieterGraef | 0:d26c1b55cfca | 54 | " ldrb r2, [r0], #1\n" |
DieterGraef | 0:d26c1b55cfca | 55 | " lsls r2, r2, #8\n" |
DieterGraef | 0:d26c1b55cfca | 56 | " subs r1, r1, #1\n" |
DieterGraef | 0:d26c1b55cfca | 57 | |
DieterGraef | 0:d26c1b55cfca | 58 | // Need to 4-byte align? If not skip ahead. |
DieterGraef | 0:d26c1b55cfca | 59 | "1$:\n" |
DieterGraef | 0:d26c1b55cfca | 60 | " ands r4, r0, #3\n" |
DieterGraef | 0:d26c1b55cfca | 61 | " beq 2$\n" |
DieterGraef | 0:d26c1b55cfca | 62 | // Have more than 1 byte left to align? If not skip ahead to take care of |
DieterGraef | 0:d26c1b55cfca | 63 | // trailing byte. |
DieterGraef | 0:d26c1b55cfca | 64 | " cmp r1, #2\n" |
DieterGraef | 0:d26c1b55cfca | 65 | " blt 7$\n" |
DieterGraef | 0:d26c1b55cfca | 66 | |
DieterGraef | 0:d26c1b55cfca | 67 | // 4-byte align. |
DieterGraef | 0:d26c1b55cfca | 68 | " ldrh r4, [r0], #2\n" |
DieterGraef | 0:d26c1b55cfca | 69 | " adds r2, r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 70 | " subs r1, r1, #2\n" |
DieterGraef | 0:d26c1b55cfca | 71 | |
DieterGraef | 0:d26c1b55cfca | 72 | // Main summing loop which sums up data 2 words at a time. |
DieterGraef | 0:d26c1b55cfca | 73 | // Make sure that we have more than 7 bytes left to sum. |
DieterGraef | 0:d26c1b55cfca | 74 | "2$:\n" |
DieterGraef | 0:d26c1b55cfca | 75 | " cmp r1, #8\n" |
DieterGraef | 0:d26c1b55cfca | 76 | " blt 3$\n" |
DieterGraef | 0:d26c1b55cfca | 77 | // Sum next two words. Applying previous upper 16-bit carry to |
DieterGraef | 0:d26c1b55cfca | 78 | // lower 16-bits. |
DieterGraef | 0:d26c1b55cfca | 79 | " ldr r4, [r0], #4\n" |
DieterGraef | 0:d26c1b55cfca | 80 | " adds r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 81 | " adc r2, r2, #0\n" |
DieterGraef | 0:d26c1b55cfca | 82 | " ldr r4, [r0], #4\n" |
DieterGraef | 0:d26c1b55cfca | 83 | " adds r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 84 | " adc r2, r2, #0\n" |
DieterGraef | 0:d26c1b55cfca | 85 | " subs r1, r1, #8\n" |
DieterGraef | 0:d26c1b55cfca | 86 | " b 2$\n" |
DieterGraef | 0:d26c1b55cfca | 87 | |
DieterGraef | 0:d26c1b55cfca | 88 | // Sum up any remaining half-words. |
DieterGraef | 0:d26c1b55cfca | 89 | "3$:\n" |
DieterGraef | 0:d26c1b55cfca | 90 | // Make sure that we have more than 1 byte left to sum. |
DieterGraef | 0:d26c1b55cfca | 91 | " cmp r1, #2\n" |
DieterGraef | 0:d26c1b55cfca | 92 | " blt 7$\n" |
DieterGraef | 0:d26c1b55cfca | 93 | // Sum up next half word, continue to apply carry. |
DieterGraef | 0:d26c1b55cfca | 94 | " ldrh r4, [r0], #2\n" |
DieterGraef | 0:d26c1b55cfca | 95 | " adds r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 96 | " adc r2, r2, #0\n" |
DieterGraef | 0:d26c1b55cfca | 97 | " subs r1, r1, #2\n" |
DieterGraef | 0:d26c1b55cfca | 98 | " b 3$\n" |
DieterGraef | 0:d26c1b55cfca | 99 | |
DieterGraef | 0:d26c1b55cfca | 100 | // Handle trailing byte, if it exists |
DieterGraef | 0:d26c1b55cfca | 101 | "7$:\n" |
DieterGraef | 0:d26c1b55cfca | 102 | " cbz r1, 8$\n" |
DieterGraef | 0:d26c1b55cfca | 103 | " ldrb r4, [r0]\n" |
DieterGraef | 0:d26c1b55cfca | 104 | " adds r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 105 | " adc r2, r2, #0\n" |
DieterGraef | 0:d26c1b55cfca | 106 | |
DieterGraef | 0:d26c1b55cfca | 107 | // Fold 32-bit checksum into 16-bit checksum. |
DieterGraef | 0:d26c1b55cfca | 108 | "8$:\n" |
DieterGraef | 0:d26c1b55cfca | 109 | " ubfx r4, r2, #16, #16\n" |
DieterGraef | 0:d26c1b55cfca | 110 | " ubfx r2, r2, #0, #16\n" |
DieterGraef | 0:d26c1b55cfca | 111 | " adds r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 112 | " ubfx r4, r2, #16, #16\n" |
DieterGraef | 0:d26c1b55cfca | 113 | " ubfx r2, r2, #0, #16\n" |
DieterGraef | 0:d26c1b55cfca | 114 | " adds r2, r4\n" |
DieterGraef | 0:d26c1b55cfca | 115 | |
DieterGraef | 0:d26c1b55cfca | 116 | // Swap bytes if started at odd address |
DieterGraef | 0:d26c1b55cfca | 117 | " cbz r3, 9$\n" |
DieterGraef | 0:d26c1b55cfca | 118 | " rev16 r2, r2\n" |
DieterGraef | 0:d26c1b55cfca | 119 | |
DieterGraef | 0:d26c1b55cfca | 120 | // Return final sum. |
DieterGraef | 0:d26c1b55cfca | 121 | "9$: mov r0, r2\n" |
DieterGraef | 0:d26c1b55cfca | 122 | " pop {r4, pc}\n" |
DieterGraef | 0:d26c1b55cfca | 123 | ); |
DieterGraef | 0:d26c1b55cfca | 124 | } |
DieterGraef | 0:d26c1b55cfca | 125 | |
DieterGraef | 0:d26c1b55cfca | 126 | #endif |