Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
c-utility/src/utf8_checker.c@0:f7f1f0d76dd6, 2018-08-23 (annotated)
- Committer:
- XinZhangMS
- Date:
- Thu Aug 23 06:52:14 2018 +0000
- Revision:
- 0:f7f1f0d76dd6
azure-c-sdk for mbed os supporting NUCLEO_F767ZI
Who changed what in which revision?
| User | Revision | Line number | New contents of line |
|---|---|---|---|
| XinZhangMS | 0:f7f1f0d76dd6 | 1 | // Copyright (c) Microsoft. All rights reserved. |
| XinZhangMS | 0:f7f1f0d76dd6 | 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. |
| XinZhangMS | 0:f7f1f0d76dd6 | 3 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 4 | #ifdef __cplusplus |
| XinZhangMS | 0:f7f1f0d76dd6 | 5 | #include <cstdlib> |
| XinZhangMS | 0:f7f1f0d76dd6 | 6 | #include <cstddef> |
| XinZhangMS | 0:f7f1f0d76dd6 | 7 | #include <cstdint> |
| XinZhangMS | 0:f7f1f0d76dd6 | 8 | #else |
| XinZhangMS | 0:f7f1f0d76dd6 | 9 | #include <stdlib.h> |
| XinZhangMS | 0:f7f1f0d76dd6 | 10 | #include <stdbool.h> |
| XinZhangMS | 0:f7f1f0d76dd6 | 11 | #include <stddef.h> |
| XinZhangMS | 0:f7f1f0d76dd6 | 12 | #include <stdint.h> |
| XinZhangMS | 0:f7f1f0d76dd6 | 13 | #endif |
| XinZhangMS | 0:f7f1f0d76dd6 | 14 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 15 | #include "azure_c_shared_utility/utf8_checker.h" |
| XinZhangMS | 0:f7f1f0d76dd6 | 16 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 17 | bool utf8_checker_is_valid_utf8(const unsigned char* utf8_str, size_t length) |
| XinZhangMS | 0:f7f1f0d76dd6 | 18 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 19 | bool result; |
| XinZhangMS | 0:f7f1f0d76dd6 | 20 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 21 | if (utf8_str == NULL) |
| XinZhangMS | 0:f7f1f0d76dd6 | 22 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 23 | /* Codes_SRS_UTF8_CHECKER_01_002: [ If `utf8_checker_is_valid_utf8` is called with NULL `utf8_str` it shall return false. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 24 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 25 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 26 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 27 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 28 | size_t pos = 0; |
| XinZhangMS | 0:f7f1f0d76dd6 | 29 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 30 | /* Codes_SRS_UTF8_CHECKER_01_003: [ If `length` is 0, `utf8_checker_is_valid_utf8` shall consider `utf8_str` to be valid UTF-8 and return true. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 31 | result = true; |
| XinZhangMS | 0:f7f1f0d76dd6 | 32 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 33 | while ((result == true) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 34 | (pos < length)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 35 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 36 | /* Codes_SRS_UTF8_CHECKER_01_001: [ `utf8_checker_is_valid_utf8` shall verify that the sequence of chars pointed to by `utf8_str` represent UTF-8 encoded codepoints. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 37 | if ((utf8_str[pos] >> 3) == 0x1E) |
| XinZhangMS | 0:f7f1f0d76dd6 | 38 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 39 | /* 4 bytes */ |
| XinZhangMS | 0:f7f1f0d76dd6 | 40 | /* Codes_SRS_UTF8_CHECKER_01_009: [ 000uuuuu zzzzyyyy yyxxxxxx 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 41 | uint32_t code_point = (utf8_str[pos] & 0x07); |
| XinZhangMS | 0:f7f1f0d76dd6 | 42 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 43 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 44 | if ((pos < length) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 45 | ((utf8_str[pos] >> 6) == 0x02)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 46 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 47 | code_point <<= 6; |
| XinZhangMS | 0:f7f1f0d76dd6 | 48 | code_point += utf8_str[pos] & 0x3F; |
| XinZhangMS | 0:f7f1f0d76dd6 | 49 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 50 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 51 | if ((pos < length) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 52 | ((utf8_str[pos] >> 6) == 0x02)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 53 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 54 | code_point <<= 6; |
| XinZhangMS | 0:f7f1f0d76dd6 | 55 | code_point += utf8_str[pos] & 0x3F; |
| XinZhangMS | 0:f7f1f0d76dd6 | 56 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 57 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 58 | if ((pos < length) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 59 | ((utf8_str[pos] >> 6) == 0x02)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 60 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 61 | code_point <<= 6; |
| XinZhangMS | 0:f7f1f0d76dd6 | 62 | code_point += utf8_str[pos] & 0x3F; |
| XinZhangMS | 0:f7f1f0d76dd6 | 63 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 64 | if (code_point <= 0xFFFF) |
| XinZhangMS | 0:f7f1f0d76dd6 | 65 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 66 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 67 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 68 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 69 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 70 | /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 71 | result = true; |
| XinZhangMS | 0:f7f1f0d76dd6 | 72 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 73 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 74 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 75 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 76 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 77 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 78 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 79 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 80 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 81 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 82 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 83 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 84 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 85 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 86 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 87 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 88 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 89 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 90 | else if ((utf8_str[pos] >> 4) == 0x0E) |
| XinZhangMS | 0:f7f1f0d76dd6 | 91 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 92 | /* 3 bytes */ |
| XinZhangMS | 0:f7f1f0d76dd6 | 93 | /* Codes_SRS_UTF8_CHECKER_01_008: [ zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 94 | uint32_t code_point = (utf8_str[pos] & 0x0F); |
| XinZhangMS | 0:f7f1f0d76dd6 | 95 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 96 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 97 | if ((pos < length) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 98 | ((utf8_str[pos] >> 6) == 0x02)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 99 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 100 | code_point <<= 6; |
| XinZhangMS | 0:f7f1f0d76dd6 | 101 | code_point += utf8_str[pos] & 0x3F; |
| XinZhangMS | 0:f7f1f0d76dd6 | 102 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 103 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 104 | if ((pos < length) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 105 | ((utf8_str[pos] >> 6) == 0x02)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 106 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 107 | code_point <<= 6; |
| XinZhangMS | 0:f7f1f0d76dd6 | 108 | code_point += utf8_str[pos] & 0x3F; |
| XinZhangMS | 0:f7f1f0d76dd6 | 109 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 110 | if (code_point <= 0x7FF) |
| XinZhangMS | 0:f7f1f0d76dd6 | 111 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 112 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 113 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 114 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 115 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 116 | /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 117 | result = true; |
| XinZhangMS | 0:f7f1f0d76dd6 | 118 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 119 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 120 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 121 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 122 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 123 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 124 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 125 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 126 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 127 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 128 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 129 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 130 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 131 | else if ((utf8_str[pos] >> 5) == 0x06) |
| XinZhangMS | 0:f7f1f0d76dd6 | 132 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 133 | /* 2 bytes */ |
| XinZhangMS | 0:f7f1f0d76dd6 | 134 | /* Codes_SRS_UTF8_CHECKER_01_007: [ 00000yyy yyxxxxxx 110yyyyy 10xxxxxx ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 135 | uint32_t code_point = (utf8_str[pos] & 0x1F); |
| XinZhangMS | 0:f7f1f0d76dd6 | 136 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 137 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 138 | if ((pos < length) && |
| XinZhangMS | 0:f7f1f0d76dd6 | 139 | ((utf8_str[pos] >> 6) == 0x02)) |
| XinZhangMS | 0:f7f1f0d76dd6 | 140 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 141 | code_point <<= 6; |
| XinZhangMS | 0:f7f1f0d76dd6 | 142 | code_point += utf8_str[pos] & 0x3F; |
| XinZhangMS | 0:f7f1f0d76dd6 | 143 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 144 | if (code_point <= 0x7F) |
| XinZhangMS | 0:f7f1f0d76dd6 | 145 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 146 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 147 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 148 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 149 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 150 | /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 151 | result = true; |
| XinZhangMS | 0:f7f1f0d76dd6 | 152 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 153 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 154 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 155 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 156 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 157 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 158 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 159 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 160 | else if ((utf8_str[pos] >> 7) == 0x00) |
| XinZhangMS | 0:f7f1f0d76dd6 | 161 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 162 | /* 1 byte */ |
| XinZhangMS | 0:f7f1f0d76dd6 | 163 | /* Codes_SRS_UTF8_CHECKER_01_006: [ 00000000 0xxxxxxx 0xxxxxxx ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 164 | /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/ |
| XinZhangMS | 0:f7f1f0d76dd6 | 165 | result = true; |
| XinZhangMS | 0:f7f1f0d76dd6 | 166 | pos++; |
| XinZhangMS | 0:f7f1f0d76dd6 | 167 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 168 | else |
| XinZhangMS | 0:f7f1f0d76dd6 | 169 | { |
| XinZhangMS | 0:f7f1f0d76dd6 | 170 | /* error */ |
| XinZhangMS | 0:f7f1f0d76dd6 | 171 | result = false; |
| XinZhangMS | 0:f7f1f0d76dd6 | 172 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 173 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 174 | } |
| XinZhangMS | 0:f7f1f0d76dd6 | 175 | |
| XinZhangMS | 0:f7f1f0d76dd6 | 176 | return result; |
| XinZhangMS | 0:f7f1f0d76dd6 | 177 | } |