Xin Zhang / azure-iot-c-sdk-f767zi

Dependents:   samplemqtt

c-utility/src/utf8_checker.c

Committer:
XinZhangMS
Date:
2018-08-23
Revision:
0:f7f1f0d76dd6

File content as of revision 0:f7f1f0d76dd6:

 // Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

#ifdef __cplusplus
#include <cstdlib>
#include <cstddef>
#include <cstdint>
#else
#include <stdlib.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#endif

#include "azure_c_shared_utility/utf8_checker.h"

bool utf8_checker_is_valid_utf8(const unsigned char* utf8_str, size_t length)
{
    bool result;

    if (utf8_str == NULL)
    {
        /* Codes_SRS_UTF8_CHECKER_01_002: [ If `utf8_checker_is_valid_utf8` is called with NULL `utf8_str` it shall return false. ]*/
        result = false;
    }
    else
    {
        size_t pos = 0;

        /* Codes_SRS_UTF8_CHECKER_01_003: [ If `length` is 0, `utf8_checker_is_valid_utf8` shall consider `utf8_str` to be valid UTF-8 and return true. ]*/
        result = true;

        while ((result == true) &&
               (pos < length))
        {
            /* Codes_SRS_UTF8_CHECKER_01_001: [ `utf8_checker_is_valid_utf8` shall verify that the sequence of chars pointed to by `utf8_str` represent UTF-8 encoded codepoints. ]*/
            if ((utf8_str[pos] >> 3) == 0x1E)
            {
                /* 4 bytes */
                /* Codes_SRS_UTF8_CHECKER_01_009: [ 000uuuuu zzzzyyyy yyxxxxxx 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]*/
                uint32_t code_point = (utf8_str[pos] & 0x07);

                pos++;
                if ((pos < length) &&
                    ((utf8_str[pos] >> 6) == 0x02))
                {
                    code_point <<= 6;
                    code_point += utf8_str[pos] & 0x3F;

                    pos++;
                    if ((pos < length) &&
                        ((utf8_str[pos] >> 6) == 0x02))
                    {
                        code_point <<= 6;
                        code_point += utf8_str[pos] & 0x3F;

                        pos++;
                        if ((pos < length) &&
                            ((utf8_str[pos] >> 6) == 0x02))
                        {
                            code_point <<= 6;
                            code_point += utf8_str[pos] & 0x3F;

                            if (code_point <= 0xFFFF)
                            {
                                result = false;
                            }
                            else
                            {
                                /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/
                                result = true;
                                pos++;
                            }
                        }
                        else
                        {
                            result = false;
                        }
                    }
                    else
                    {
                        result = false;
                    }
                }
                else
                {
                    result = false;
                }
            }
            else if ((utf8_str[pos] >> 4) == 0x0E)
            {
                /* 3 bytes */
                /* Codes_SRS_UTF8_CHECKER_01_008: [ zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx ]*/
                uint32_t code_point = (utf8_str[pos] & 0x0F);

                pos++;
                if ((pos < length) &&
                    ((utf8_str[pos] >> 6) == 0x02))
                {
                    code_point <<= 6;
                    code_point += utf8_str[pos] & 0x3F;

                    pos++;
                    if ((pos < length) &&
                        ((utf8_str[pos] >> 6) == 0x02))
                    {
                        code_point <<= 6;
                        code_point += utf8_str[pos] & 0x3F;

                        if (code_point <= 0x7FF)
                        {
                            result = false;
                        }
                        else
                        {
                            /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/
                            result = true;
                            pos++;
                        }
                    }
                    else
                    {
                        result = false;
                    }
                }
                else
                {
                    result = false;
                }
            }
            else if ((utf8_str[pos] >> 5) == 0x06)
            {
                /* 2 bytes */
                /* Codes_SRS_UTF8_CHECKER_01_007: [ 00000yyy yyxxxxxx 110yyyyy 10xxxxxx ]*/
                uint32_t code_point = (utf8_str[pos] & 0x1F);

                pos++;
                if ((pos < length) &&
                    ((utf8_str[pos] >> 6) == 0x02))
                {
                    code_point <<= 6;
                    code_point += utf8_str[pos] & 0x3F;

                    if (code_point <= 0x7F)
                    {
                        result = false;
                    }
                    else
                    {
                        /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/
                        result = true;
                        pos++;
                    }
                }
                else
                {
                    result = false;
                }
            }
            else if ((utf8_str[pos] >> 7) == 0x00)
            {
                /* 1 byte */
                /* Codes_SRS_UTF8_CHECKER_01_006: [ 00000000 0xxxxxxx 0xxxxxxx ]*/
                /* Codes_SRS_UTF8_CHECKER_01_005: [ On success it shall return true. ]*/
                result = true;
                pos++;
            }
            else
            {
                /* error */
                result = false;
            }
        }
    }

    return result;
}