ZBar bar code reader . http://zbar.sourceforge.net/ ZBar is licensed under the GNU LGPL 2.1 to enable development of both open source and commercial projects.
Dependents: GR-PEACH_Camera_in_barcode levkov_ov7670
qrdectxt.c
00001 /*Copyright (C) 2008-2009 Timothy B. Terriberry (tterribe@xiph.org) 00002 You can redistribute this library and/or modify it under the terms of the 00003 GNU Lesser General Public License as published by the Free Software 00004 Foundation; either version 2.1 of the License, or (at your option) any later 00005 version.*/ 00006 #include <stdio.h> 00007 #include <stdlib.h> 00008 #include <string.h> 00009 #if (1) /* does not convert character code */ 00010 typedef void *iconv_t; 00011 #else 00012 #include <iconv.h> 00013 #endif 00014 #include "qrcode.h" 00015 #include "qrdec.h" 00016 #include "util.h" 00017 #include "image.h" 00018 #include "error.h" 00019 #include "img_scanner.h" 00020 00021 static int text_is_ascii(const unsigned char *_text,int _len){ 00022 int i; 00023 for(i=0;i<_len;i++)if(_text[i]>=0x80)return 0; 00024 return 1; 00025 } 00026 00027 static int text_is_latin1(const unsigned char *_text,int _len){ 00028 int i; 00029 for(i=0;i<_len;i++){ 00030 /*The following line fails to compile correctly with gcc 3.4.4 on ARM with 00031 any optimizations enabled.*/ 00032 if(_text[i]>=0x80&&_text[i]<0xA0)return 0; 00033 } 00034 return 1; 00035 } 00036 00037 static void enc_list_mtf(iconv_t _enc_list[3],iconv_t _enc){ 00038 int i; 00039 for(i=0;i<3;i++)if(_enc_list[i]==_enc){ 00040 int j; 00041 for(j=i;j-->0;)_enc_list[j+1]=_enc_list[j]; 00042 _enc_list[0]=_enc; 00043 break; 00044 } 00045 } 00046 00047 int qr_code_data_list_extract_text(const qr_code_data_list *_qrlist, 00048 zbar_image_scanner_t *iscn, 00049 zbar_image_t *img) 00050 { 00051 iconv_t sjis_cd; 00052 iconv_t utf8_cd; 00053 iconv_t latin1_cd; 00054 const qr_code_data *qrdata; 00055 int nqrdata; 00056 unsigned char *mark; 00057 char **text; 00058 int ntext; 00059 int i; 00060 qrdata=_qrlist->qrdata; 00061 nqrdata=_qrlist->nqrdata; 00062 text=(char **)malloc(nqrdata*sizeof(*text)); 00063 mark=(unsigned char *)calloc(nqrdata,sizeof(*mark)); 00064 ntext=0; 00065 /*This is the encoding the standard says is the default.*/ 00066 #if (1) /* does not convert character code */ 00067 latin1_cd="ISO8859-1"; // dummy address 00068 #else 00069 latin1_cd=iconv_open("UTF-8","ISO8859-1"); 00070 #endif 00071 /*But this one is often used, as well.*/ 00072 #if (1) /* does not convert character code */ 00073 sjis_cd="SJIS"; // dummy address 00074 #else 00075 sjis_cd=iconv_open("UTF-8","SJIS"); 00076 #endif 00077 /*This is a trivial conversion just to check validity without extra code.*/ 00078 #if (1) /* does not convert character code */ 00079 utf8_cd="UTF-8"; // dummy address 00080 #else 00081 utf8_cd=iconv_open("UTF-8","UTF-8"); 00082 #endif 00083 for(i=0;i<nqrdata;i++)if(!mark[i]){ 00084 const qr_code_data *qrdataj; 00085 const qr_code_data_entry *entry; 00086 iconv_t enc_list[3]; 00087 iconv_t eci_cd; 00088 int sa[16]; 00089 int sa_size; 00090 char *sa_text; 00091 size_t sa_ntext; 00092 size_t sa_ctext; 00093 int fnc1; 00094 int eci; 00095 int err; 00096 int j; 00097 int k; 00098 /*Step 0: Collect the other QR codes belonging to this S-A group.*/ 00099 if(qrdata[i].sa_size){ 00100 unsigned sa_parity; 00101 sa_size=qrdata[i].sa_size; 00102 sa_parity=qrdata[i].sa_parity; 00103 for(j=0;j<sa_size;j++)sa[j]=-1; 00104 for(j=i;j<nqrdata;j++)if(!mark[j]){ 00105 /*TODO: We could also match version, ECC level, etc. if size and 00106 parity alone are too ambiguous.*/ 00107 if(qrdata[j].sa_size==sa_size&&qrdata[j].sa_parity==sa_parity&& 00108 sa[qrdata[j].sa_index]<0){ 00109 sa[qrdata[j].sa_index]=j; 00110 mark[j]=1; 00111 } 00112 } 00113 /*TODO: If the S-A group is complete, check the parity.*/ 00114 } 00115 else{ 00116 sa[0]=i; 00117 sa_size=1; 00118 } 00119 00120 sa_ctext=0; 00121 fnc1=0; 00122 /*Step 1: Detect FNC1 markers and estimate the required buffer size.*/ 00123 for(j=0;j<sa_size;j++)if(sa[j]>=0){ 00124 qrdataj=qrdata+sa[j]; 00125 for(k=0;k<qrdataj->nentries;k++){ 00126 int shift; 00127 entry=qrdataj->entries+k; 00128 shift=0; 00129 switch(entry->mode){ 00130 /*FNC1 applies to the entire code and ignores subsequent markers.*/ 00131 case QR_MODE_FNC1_1ST: 00132 case QR_MODE_FNC1_2ND:fnc1=1;break; 00133 /*2 SJIS bytes will be at most 4 UTF-8 bytes.*/ 00134 case QR_MODE_KANJI:shift++; 00135 /*We assume at most 4 UTF-8 bytes per input byte. 00136 I believe this is true for all the encodings we actually use.*/ 00137 case QR_MODE_BYTE:shift++; 00138 default:{ 00139 /*The remaining two modes are already valid UTF-8.*/ 00140 if(QR_MODE_HAS_DATA(entry->mode)){ 00141 sa_ctext+=entry->payload.data.len<<shift; 00142 } 00143 }break; 00144 } 00145 } 00146 } 00147 00148 /*Step 2: Convert the entries.*/ 00149 sa_text=(char *)malloc((sa_ctext+1)*sizeof(*sa_text)); 00150 sa_ntext=0; 00151 eci=-1; 00152 enc_list[0]=sjis_cd; 00153 enc_list[1]=latin1_cd; 00154 enc_list[2]=utf8_cd; 00155 eci_cd=(iconv_t)-1; 00156 err=0; 00157 zbar_symbol_t *syms = NULL, **sym = &syms; 00158 for(j = 0; j < sa_size && !err; j++, sym = &(*sym)->next) { 00159 *sym = _zbar_image_scanner_alloc_sym(iscn, ZBAR_QRCODE, 0); 00160 (*sym)->datalen = sa_ntext; 00161 if(sa[j]<0){ 00162 /* generic placeholder for unfinished results */ 00163 (*sym)->type = ZBAR_PARTIAL; 00164 00165 /*Skip all contiguous missing segments.*/ 00166 for(j++;j<sa_size&&sa[j]<0;j++); 00167 /*If there aren't any more, stop.*/ 00168 if(j>=sa_size)break; 00169 00170 /* mark break in data */ 00171 sa_text[sa_ntext++]='\0'; 00172 (*sym)->datalen = sa_ntext; 00173 00174 /* advance to next symbol */ 00175 sym = &(*sym)->next; 00176 *sym = _zbar_image_scanner_alloc_sym(iscn, ZBAR_QRCODE, 0); 00177 } 00178 00179 qrdataj=qrdata+sa[j]; 00180 /* expose bounding box */ 00181 sym_add_point(*sym, qrdataj->bbox[0][0], qrdataj->bbox[0][1]); 00182 sym_add_point(*sym, qrdataj->bbox[2][0], qrdataj->bbox[2][1]); 00183 sym_add_point(*sym, qrdataj->bbox[3][0], qrdataj->bbox[3][1]); 00184 sym_add_point(*sym, qrdataj->bbox[1][0], qrdataj->bbox[1][1]); 00185 00186 for(k=0;k<qrdataj->nentries&&!err;k++){ 00187 size_t inleft; 00188 size_t outleft; 00189 char *in; 00190 char *out; 00191 entry=qrdataj->entries+k; 00192 switch(entry->mode){ 00193 case QR_MODE_NUM:{ 00194 if(sa_ctext-sa_ntext>=(size_t)entry->payload.data.len){ 00195 memcpy(sa_text+sa_ntext,entry->payload.data.buf, 00196 entry->payload.data.len*sizeof(*sa_text)); 00197 sa_ntext+=entry->payload.data.len; 00198 } 00199 else err=1; 00200 }break; 00201 case QR_MODE_ALNUM:{ 00202 char *p; 00203 in=(char *)entry->payload.data.buf; 00204 inleft=entry->payload.data.len; 00205 /*FNC1 uses '%' as an escape character.*/ 00206 if(fnc1)for(;;){ 00207 size_t plen; 00208 char c; 00209 p=memchr(in,'%',inleft*sizeof(*in)); 00210 if(p==NULL)break; 00211 plen=p-in; 00212 if(sa_ctext-sa_ntext<plen+1)break; 00213 memcpy(sa_text+sa_ntext,in,plen*sizeof(*in)); 00214 sa_ntext+=plen; 00215 /*Two '%'s is a literal '%'*/ 00216 if(plen+1<inleft&&p[1]=='%'){ 00217 c='%'; 00218 plen++; 00219 p++; 00220 } 00221 /*One '%' is the ASCII group separator.*/ 00222 else c=0x1D; 00223 sa_text[sa_ntext++]=c; 00224 inleft-=plen+1; 00225 in=p+1; 00226 } 00227 else p=NULL; 00228 if(p!=NULL||sa_ctext-sa_ntext<inleft)err=1; 00229 else{ 00230 memcpy(sa_text+sa_ntext,in,inleft*sizeof(*sa_text)); 00231 sa_ntext+=inleft; 00232 } 00233 }break; 00234 /*TODO: This will not handle a multi-byte sequence split between 00235 multiple data blocks. 00236 Does such a thing occur? 00237 Is it allowed? 00238 It requires copying buffers around to handle correctly.*/ 00239 case QR_MODE_BYTE:{ 00240 in=(char *)entry->payload.data.buf; 00241 inleft=entry->payload.data.len; 00242 out=sa_text+sa_ntext; 00243 outleft=sa_ctext-sa_ntext; 00244 /*If we have no specified encoding, attempt to auto-detect it.*/ 00245 if(eci<0){ 00246 int ei; 00247 /*First check for the UTF-8 BOM.*/ 00248 if(inleft>=3&& 00249 in[0]==(char)0xEF&&in[1]==(char)0xBB&&in[2]==(char)0xBF){ 00250 in+=3; 00251 inleft-=3; 00252 /*Actually try converting (to check validity).*/ 00253 #if (1) /* does not convert character code */ 00254 err=(in == NULL) || ( out == NULL ) || inleft > outleft; 00255 if (inleft > outleft) inleft = outleft; 00256 memcpy(out, in, inleft); 00257 #else 00258 err=utf8_cd==(iconv_t)-1|| 00259 iconv(utf8_cd,&in,&inleft,&out,&outleft)==(size_t)-1; 00260 #endif 00261 if(!err){ 00262 sa_ntext=out-sa_text; 00263 enc_list_mtf(enc_list,utf8_cd); 00264 continue; 00265 } 00266 in=(char *)entry->payload.data.buf; 00267 inleft=entry->payload.data.len; 00268 out=sa_text+sa_ntext; 00269 outleft=sa_ctext-sa_ntext; 00270 } 00271 /*If the text is 8-bit clean, prefer UTF-8 over SJIS, since SJIS 00272 will corrupt the backslashes used for DoCoMo formats.*/ 00273 else if(text_is_ascii((unsigned char *)in,inleft)){ 00274 enc_list_mtf(enc_list,utf8_cd); 00275 } 00276 /*Try our list of encodings.*/ 00277 for(ei=0;ei<3;ei++)if(enc_list[ei]!=(iconv_t)-1){ 00278 /*According to the standard, ISO/IEC 8859-1 (one hyphen) is 00279 supposed to be used, but reality is not always so. 00280 It's got an invalid range that is used often with SJIS 00281 and UTF-8, though, which makes detection easier. 00282 However, iconv() does not properly reject characters in 00283 those ranges, since ISO-8859-1 (two hyphens) defines a 00284 number of seldom-used control code characters there. 00285 So if we see any of those characters, move this 00286 conversion to the end of the list.*/ 00287 if(ei<2&&enc_list[ei]==latin1_cd&& 00288 !text_is_latin1((unsigned char *)in,inleft)){ 00289 int ej; 00290 for(ej=ei+1;ej<3;ej++)enc_list[ej-1]=enc_list[ej]; 00291 enc_list[2]=latin1_cd; 00292 } 00293 #if (1) /* does not convert character code */ 00294 err=(in == NULL) || ( out == NULL ) || inleft > outleft; 00295 if (inleft > outleft) inleft = outleft; 00296 memcpy(out, in, inleft); 00297 #else 00298 err=iconv(enc_list[ei],&in,&inleft,&out,&outleft)==(size_t)-1; 00299 #endif 00300 if(!err){ 00301 sa_ntext=out-sa_text; 00302 enc_list_mtf(enc_list,enc_list[ei]); 00303 break; 00304 } 00305 in=(char *)entry->payload.data.buf; 00306 inleft=entry->payload.data.len; 00307 out=sa_text+sa_ntext; 00308 outleft=sa_ctext-sa_ntext; 00309 } 00310 } 00311 /*We were actually given a character set; use it.*/ 00312 else{ 00313 #if (1) /* does not convert character code */ 00314 err=(in == NULL) || ( out == NULL ) || inleft > outleft; 00315 if (inleft > outleft) inleft = outleft; 00316 memcpy(out, in, inleft); 00317 #else 00318 err=eci_cd==(iconv_t)-1|| 00319 iconv(eci_cd,&in,&inleft,&out,&outleft)==(size_t)-1; 00320 #endif 00321 if(!err)sa_ntext=out-sa_text; 00322 } 00323 }break; 00324 /*Kanji mode always uses SJIS.*/ 00325 case QR_MODE_KANJI:{ 00326 in=(char *)entry->payload.data.buf; 00327 inleft=entry->payload.data.len; 00328 out=sa_text+sa_ntext; 00329 outleft=sa_ctext-sa_ntext; 00330 #if (1) /* does not convert character code */ 00331 err=(in == NULL) || ( out == NULL ) || inleft > outleft; 00332 if (inleft > outleft) inleft = outleft; 00333 memcpy(out, in, inleft); 00334 #else 00335 err=sjis_cd==(iconv_t)-1|| 00336 iconv(sjis_cd,&in,&inleft,&out,&outleft)==(size_t)-1; 00337 #endif 00338 if(!err)sa_ntext=out-sa_text; 00339 }break; 00340 /*Check to see if a character set was specified.*/ 00341 case QR_MODE_ECI:{ 00342 const char *enc; 00343 char buf[16]; 00344 unsigned cur_eci; 00345 cur_eci=entry->payload.eci; 00346 if(cur_eci<=QR_ECI_ISO8859_16&&cur_eci!=14){ 00347 if(cur_eci!=QR_ECI_GLI0&&cur_eci!=QR_ECI_CP437){ 00348 sprintf(buf,"ISO8859-%i",QR_MAXI(cur_eci,3)-2); 00349 enc=buf; 00350 } 00351 /*Note that CP437 requires an iconv compiled with 00352 --enable-extra-encodings, and thus may not be available.*/ 00353 else enc="CP437"; 00354 } 00355 else if(cur_eci==QR_ECI_SJIS)enc="SJIS"; 00356 /*Don't know what this ECI code specifies, but not an encoding that 00357 we recognize.*/ 00358 else continue; 00359 eci=cur_eci; 00360 #if (1) /* does not convert character code */ 00361 eci_cd=enc; 00362 #else 00363 eci_cd=iconv_open("UTF-8",enc); 00364 #endif 00365 }break; 00366 /*Silence stupid compiler warnings.*/ 00367 default:break; 00368 } 00369 } 00370 /*If eci should be reset between codes, do so.*/ 00371 if(eci<=QR_ECI_GLI1){ 00372 eci=-1; 00373 #if (1) /* does not convert character code */ 00374 #else 00375 if(eci_cd!=(iconv_t)-1)iconv_close(eci_cd); 00376 #endif 00377 00378 } 00379 } 00380 #if (1) /* does not convert character code */ 00381 #else 00382 if(eci_cd!=(iconv_t)-1)iconv_close(eci_cd); 00383 #endif 00384 if(!err){ 00385 sa_text[sa_ntext++]='\0'; 00386 if(sa_ctext+1>sa_ntext){ 00387 sa_text=(char *)realloc(sa_text,sa_ntext*sizeof(*sa_text)); 00388 } 00389 00390 zbar_symbol_t *sa_sym; 00391 if(sa_size == 1) 00392 sa_sym = syms; 00393 else { 00394 /* create "virtual" container symbol for composite result */ 00395 sa_sym = _zbar_image_scanner_alloc_sym(iscn, ZBAR_QRCODE, 0); 00396 sa_sym->syms = _zbar_symbol_set_create(); 00397 sa_sym->syms->head = syms; 00398 00399 /* cheap out w/axis aligned bbox for now */ 00400 int xmin = img->width, xmax = -2; 00401 int ymin = img->height, ymax = -2; 00402 00403 /* fixup data references */ 00404 for(; syms; syms = syms->next) { 00405 _zbar_symbol_refcnt(syms, 1); 00406 if(syms->type == ZBAR_PARTIAL) 00407 sa_sym->type = ZBAR_PARTIAL; 00408 else 00409 for(j = 0; j < syms->npts; j++) { 00410 int u = syms->pts[j].x; 00411 if(xmin >= u) xmin = u - 1; 00412 if(xmax <= u) xmax = u + 1; 00413 u = syms->pts[j].y; 00414 if(ymin >= u) ymin = u - 1; 00415 if(ymax <= u) ymax = u + 1; 00416 } 00417 syms->data = sa_text + syms->datalen; 00418 int next = (syms->next) ? syms->next->datalen : sa_ntext; 00419 assert(next > syms->datalen); 00420 syms->datalen = next - syms->datalen - 1; 00421 } 00422 if(xmax >= -1) { 00423 sym_add_point(sa_sym, xmin, ymin); 00424 sym_add_point(sa_sym, xmin, ymax); 00425 sym_add_point(sa_sym, xmax, ymax); 00426 sym_add_point(sa_sym, xmax, ymin); 00427 } 00428 } 00429 sa_sym->data = sa_text; 00430 sa_sym->data_alloc = sa_ntext; 00431 sa_sym->datalen = sa_ntext - 1; 00432 00433 _zbar_image_scanner_add_sym(iscn, sa_sym); 00434 } 00435 else { 00436 _zbar_image_scanner_recycle_syms(iscn, syms); 00437 free(sa_text); 00438 } 00439 } 00440 #if (1) /* does not convert character code */ 00441 #else 00442 if(utf8_cd!=(iconv_t)-1)iconv_close(utf8_cd); 00443 if(sjis_cd!=(iconv_t)-1)iconv_close(sjis_cd); 00444 if(latin1_cd!=(iconv_t)-1)iconv_close(latin1_cd); 00445 #endif 00446 free(mark); 00447 return ntext; 00448 } 00449
Generated on Tue Jul 12 2022 18:54:12 by 1.7.2