UTF-8 to SJIS conversion

Dependencies:   mbed

This example program converts UTF-8 code to Shift-JIS code.

Committer:
MACRUM
Date:
Sun Aug 25 00:04:41 2013 +0000
Revision:
2:43bc6ceb9ae5
Parent:
1:9bd53bc5ff51
Fixed code conversion problem

Who changed what in which revision?

UserRevisionLine numberNew contents of line
MACRUM 0:1215a9611188 1 #include "mbed.h"
MACRUM 0:1215a9611188 2 #include <locale.h>
MACRUM 0:1215a9611188 3 #include <cwchar>
MACRUM 0:1215a9611188 4
MACRUM 0:1215a9611188 5 #pragma import __use_all_ctype
MACRUM 0:1215a9611188 6
MACRUM 2:43bc6ceb9ae5 7 char buf_s[256];
MACRUM 2:43bc6ceb9ae5 8 const char buf_u[256] = {0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E,0xE6,0x96,0x87,0xE5,0xAD,0x97,0xE5,0x88,0x97,0x5F,0x41,0x42,0x43,0x44,0x5F,0x30,0x31,0x32,0x33,0x00};
MACRUM 2:43bc6ceb9ae5 9 wchar_t wstr[256];
MACRUM 0:1215a9611188 10
MACRUM 0:1215a9611188 11 static int utf8tosjis(const char* utfBuffer, int utfBufLen, char* sjisBuffer, int sjisBufLen)
MACRUM 0:1215a9611188 12 {
MACRUM 0:1215a9611188 13 int i, wi;
MACRUM 0:1215a9611188 14 wchar_t wc;
MACRUM 0:1215a9611188 15 mbstate_t state = {0};
MACRUM 0:1215a9611188 16 size_t ret;
MACRUM 1:9bd53bc5ff51 17 char * current_locale;
MACRUM 1:9bd53bc5ff51 18 i = wi = 0;
MACRUM 0:1215a9611188 19
MACRUM 1:9bd53bc5ff51 20 current_locale = setlocale(LC_CTYPE, "UTF-8");
MACRUM 2:43bc6ceb9ae5 21 if (current_locale == NULL)
MACRUM 2:43bc6ceb9ae5 22 return 0;
MACRUM 0:1215a9611188 23
MACRUM 0:1215a9611188 24 while (1) {
MACRUM 0:1215a9611188 25 ret = mbrtowc(&wc, utfBuffer+i, 3, &state);
MACRUM 0:1215a9611188 26 if (ret == (size_t)-2 || ret == (size_t)-1) {
MACRUM 0:1215a9611188 27 printf("\nThere was a problem decoding the multibyte string.\n");
MACRUM 2:43bc6ceb9ae5 28 return ret;
MACRUM 0:1215a9611188 29 } else if (ret == 0) {
MACRUM 2:43bc6ceb9ae5 30 break; /* we hit \0, end of string */
MACRUM 0:1215a9611188 31 } else {
MACRUM 0:1215a9611188 32 i += ret;
MACRUM 0:1215a9611188 33 wstr[wi++] = wc;
MACRUM 0:1215a9611188 34 }
MACRUM 0:1215a9611188 35 }
MACRUM 0:1215a9611188 36 wstr[wi] = L'\0';
MACRUM 2:43bc6ceb9ae5 37
MACRUM 2:43bc6ceb9ae5 38 current_locale = setlocale(LC_CTYPE, "SJIS");
MACRUM 2:43bc6ceb9ae5 39 if (current_locale == NULL)
MACRUM 2:43bc6ceb9ae5 40 return 0;
MACRUM 2:43bc6ceb9ae5 41
MACRUM 2:43bc6ceb9ae5 42 ret = wcstombs(sjisBuffer, wstr, sjisBufLen);
MACRUM 2:43bc6ceb9ae5 43
MACRUM 0:1215a9611188 44 return ret;
MACRUM 0:1215a9611188 45 }
MACRUM 0:1215a9611188 46
MACRUM 0:1215a9611188 47 int main() {
MACRUM 2:43bc6ceb9ae5 48
MACRUM 2:43bc6ceb9ae5 49 size_t sz;
MACRUM 2:43bc6ceb9ae5 50
MACRUM 2:43bc6ceb9ae5 51 // assuming UTF-8 is NULL terminated
MACRUM 2:43bc6ceb9ae5 52 sz = strlen(buf_u);
MACRUM 2:43bc6ceb9ae5 53
MACRUM 2:43bc6ceb9ae5 54 printf("\nUTF-8:\n");
MACRUM 2:43bc6ceb9ae5 55 for(int i=0; i<sz; i++) {
MACRUM 2:43bc6ceb9ae5 56 printf("0x%02x ", buf_u[i]);
MACRUM 2:43bc6ceb9ae5 57 }
MACRUM 2:43bc6ceb9ae5 58
MACRUM 2:43bc6ceb9ae5 59 int len = utf8tosjis(buf_u, sz, buf_s, sizeof(buf_s));
MACRUM 2:43bc6ceb9ae5 60
MACRUM 2:43bc6ceb9ae5 61 printf("\nSJIS:\n");
MACRUM 2:43bc6ceb9ae5 62 if (len != -1) {
MACRUM 2:43bc6ceb9ae5 63 for(int i=0; i<len; i++) {
MACRUM 2:43bc6ceb9ae5 64 printf("0x%02x ", buf_s[i]);
MACRUM 2:43bc6ceb9ae5 65 }
MACRUM 2:43bc6ceb9ae5 66 printf("\n");
MACRUM 2:43bc6ceb9ae5 67 }
MACRUM 2:43bc6ceb9ae5 68
MACRUM 0:1215a9611188 69 return 0;
MACRUM 0:1215a9611188 70 }