UTF-8 to SJIS conversion

Dependencies:   mbed

This example program converts UTF-8 code to Shift-JIS code.

main.cpp

Committer:
MACRUM
Date:
2013-08-25
Revision:
2:43bc6ceb9ae5
Parent:
1:9bd53bc5ff51

File content as of revision 2:43bc6ceb9ae5:

#include "mbed.h"
#include <locale.h>
#include <cwchar>

#pragma import __use_all_ctype

char  buf_s[256];
const char  buf_u[256] = {0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E,0xE6,0x96,0x87,0xE5,0xAD,0x97,0xE5,0x88,0x97,0x5F,0x41,0x42,0x43,0x44,0x5F,0x30,0x31,0x32,0x33,0x00};
wchar_t wstr[256];

static int utf8tosjis(const char* utfBuffer, int utfBufLen, char* sjisBuffer, int sjisBufLen)
{
    int         i, wi;
    wchar_t     wc;
    mbstate_t   state = {0};
    size_t      ret;
    char *      current_locale;
    i = wi = 0;

    current_locale = setlocale(LC_CTYPE, "UTF-8");
    if (current_locale == NULL)
        return 0;

    while (1) {
        ret = mbrtowc(&wc, utfBuffer+i, 3, &state);
        if (ret == (size_t)-2 || ret == (size_t)-1) {
            printf("\nThere was a problem decoding the multibyte string.\n");
            return ret;
        } else if (ret == 0) {
            break;          /* we hit \0, end of string */
        } else {
            i += ret;
            wstr[wi++] = wc;
        }
    }
    wstr[wi] = L'\0';
    
    current_locale = setlocale(LC_CTYPE, "SJIS");
    if (current_locale == NULL)
        return 0;

    ret = wcstombs(sjisBuffer, wstr, sjisBufLen);

    return ret;
}

int main() {

    size_t sz;
    
    // assuming UTF-8 is NULL terminated
    sz = strlen(buf_u);
    
    printf("\nUTF-8:\n");
    for(int i=0; i<sz; i++) {
        printf("0x%02x ", buf_u[i]);
    }

    int len = utf8tosjis(buf_u, sz, buf_s, sizeof(buf_s));

    printf("\nSJIS:\n");
    if (len != -1) {
        for(int i=0; i<len; i++) {
            printf("0x%02x ", buf_s[i]);
        }
        printf("\n");
    }

    return 0;
}