ZBar bar code reader . http://zbar.sourceforge.net/ ZBar is licensed under the GNU LGPL 2.1 to enable development of both open source and commercial projects.

Dependents:   GR-PEACH_Camera_in_barcode levkov_ov7670

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers qrdectxt.c Source File

qrdectxt.c

00001 /*Copyright (C) 2008-2009  Timothy B. Terriberry (tterribe@xiph.org)
00002   You can redistribute this library and/or modify it under the terms of the
00003    GNU Lesser General Public License as published by the Free Software
00004    Foundation; either version 2.1 of the License, or (at your option) any later
00005    version.*/
00006 #include <stdio.h>
00007 #include <stdlib.h>
00008 #include <string.h>
00009 #if (1) /* does not convert character code */
00010 typedef void *iconv_t;
00011 #else
00012 #include <iconv.h>
00013 #endif
00014 #include "qrcode.h"
00015 #include "qrdec.h"
00016 #include "util.h"
00017 #include "image.h"
00018 #include "error.h"
00019 #include "img_scanner.h"
00020 
00021 static int text_is_ascii(const unsigned char *_text,int _len){
00022   int i;
00023   for(i=0;i<_len;i++)if(_text[i]>=0x80)return 0;
00024   return 1;
00025 }
00026 
00027 static int text_is_latin1(const unsigned char *_text,int _len){
00028   int i;
00029   for(i=0;i<_len;i++){
00030     /*The following line fails to compile correctly with gcc 3.4.4 on ARM with
00031        any optimizations enabled.*/
00032     if(_text[i]>=0x80&&_text[i]<0xA0)return 0;
00033   }
00034   return 1;
00035 }
00036 
00037 static void enc_list_mtf(iconv_t _enc_list[3],iconv_t _enc){
00038   int i;
00039   for(i=0;i<3;i++)if(_enc_list[i]==_enc){
00040     int j;
00041     for(j=i;j-->0;)_enc_list[j+1]=_enc_list[j];
00042     _enc_list[0]=_enc;
00043     break;
00044   }
00045 }
00046 
00047 int qr_code_data_list_extract_text(const qr_code_data_list *_qrlist,
00048                                    zbar_image_scanner_t *iscn,
00049                                    zbar_image_t *img)
00050 {
00051   iconv_t              sjis_cd;
00052   iconv_t              utf8_cd;
00053   iconv_t              latin1_cd;
00054   const qr_code_data  *qrdata;
00055   int                  nqrdata;
00056   unsigned char       *mark;
00057   char               **text;
00058   int                  ntext;
00059   int                  i;
00060   qrdata=_qrlist->qrdata;
00061   nqrdata=_qrlist->nqrdata;
00062   text=(char **)malloc(nqrdata*sizeof(*text));
00063   mark=(unsigned char *)calloc(nqrdata,sizeof(*mark));
00064   ntext=0;
00065   /*This is the encoding the standard says is the default.*/
00066 #if (1) /* does not convert character code */
00067   latin1_cd="ISO8859-1";    //  dummy address
00068 #else
00069   latin1_cd=iconv_open("UTF-8","ISO8859-1");
00070 #endif
00071   /*But this one is often used, as well.*/
00072 #if (1) /* does not convert character code */
00073   sjis_cd="SJIS";    //  dummy address
00074 #else
00075   sjis_cd=iconv_open("UTF-8","SJIS");
00076 #endif
00077   /*This is a trivial conversion just to check validity without extra code.*/
00078 #if (1) /* does not convert character code */
00079   utf8_cd="UTF-8";   //  dummy address
00080 #else
00081   utf8_cd=iconv_open("UTF-8","UTF-8");
00082 #endif
00083   for(i=0;i<nqrdata;i++)if(!mark[i]){
00084     const qr_code_data       *qrdataj;
00085     const qr_code_data_entry *entry;
00086     iconv_t                   enc_list[3];
00087     iconv_t                   eci_cd;
00088     int                       sa[16];
00089     int                       sa_size;
00090     char                     *sa_text;
00091     size_t                    sa_ntext;
00092     size_t                    sa_ctext;
00093     int                       fnc1;
00094     int                       eci;
00095     int                       err;
00096     int                       j;
00097     int                       k;
00098     /*Step 0: Collect the other QR codes belonging to this S-A group.*/
00099     if(qrdata[i].sa_size){
00100       unsigned sa_parity;
00101       sa_size=qrdata[i].sa_size;
00102       sa_parity=qrdata[i].sa_parity;
00103       for(j=0;j<sa_size;j++)sa[j]=-1;
00104       for(j=i;j<nqrdata;j++)if(!mark[j]){
00105         /*TODO: We could also match version, ECC level, etc. if size and
00106            parity alone are too ambiguous.*/
00107         if(qrdata[j].sa_size==sa_size&&qrdata[j].sa_parity==sa_parity&&
00108          sa[qrdata[j].sa_index]<0){
00109           sa[qrdata[j].sa_index]=j;
00110           mark[j]=1;
00111         }
00112       }
00113       /*TODO: If the S-A group is complete, check the parity.*/
00114     }
00115     else{
00116       sa[0]=i;
00117       sa_size=1;
00118     }
00119 
00120     sa_ctext=0;
00121     fnc1=0;
00122     /*Step 1: Detect FNC1 markers and estimate the required buffer size.*/
00123     for(j=0;j<sa_size;j++)if(sa[j]>=0){
00124       qrdataj=qrdata+sa[j];
00125       for(k=0;k<qrdataj->nentries;k++){
00126         int shift;
00127         entry=qrdataj->entries+k;
00128         shift=0;
00129         switch(entry->mode){
00130           /*FNC1 applies to the entire code and ignores subsequent markers.*/
00131           case QR_MODE_FNC1_1ST:
00132           case QR_MODE_FNC1_2ND:fnc1=1;break;
00133           /*2 SJIS bytes will be at most 4 UTF-8 bytes.*/
00134           case QR_MODE_KANJI:shift++;
00135           /*We assume at most 4 UTF-8 bytes per input byte.
00136             I believe this is true for all the encodings we actually use.*/
00137           case QR_MODE_BYTE:shift++;
00138           default:{
00139             /*The remaining two modes are already valid UTF-8.*/
00140             if(QR_MODE_HAS_DATA(entry->mode)){
00141               sa_ctext+=entry->payload.data.len<<shift;
00142             }
00143           }break;
00144         }
00145       }
00146     }
00147 
00148     /*Step 2: Convert the entries.*/
00149     sa_text=(char *)malloc((sa_ctext+1)*sizeof(*sa_text));
00150     sa_ntext=0;
00151     eci=-1;
00152     enc_list[0]=sjis_cd;
00153     enc_list[1]=latin1_cd;
00154     enc_list[2]=utf8_cd;
00155     eci_cd=(iconv_t)-1;
00156     err=0;
00157     zbar_symbol_t *syms = NULL, **sym = &syms;
00158     for(j = 0; j < sa_size && !err; j++, sym = &(*sym)->next) {
00159       *sym = _zbar_image_scanner_alloc_sym(iscn, ZBAR_QRCODE, 0);
00160       (*sym)->datalen = sa_ntext;
00161       if(sa[j]<0){
00162         /* generic placeholder for unfinished results */
00163         (*sym)->type = ZBAR_PARTIAL;
00164 
00165         /*Skip all contiguous missing segments.*/
00166         for(j++;j<sa_size&&sa[j]<0;j++);
00167         /*If there aren't any more, stop.*/
00168         if(j>=sa_size)break;
00169 
00170         /* mark break in data */
00171         sa_text[sa_ntext++]='\0';
00172         (*sym)->datalen = sa_ntext;
00173 
00174         /* advance to next symbol */
00175         sym = &(*sym)->next;
00176         *sym = _zbar_image_scanner_alloc_sym(iscn, ZBAR_QRCODE, 0);
00177       }
00178 
00179       qrdataj=qrdata+sa[j];
00180       /* expose bounding box */
00181       sym_add_point(*sym, qrdataj->bbox[0][0], qrdataj->bbox[0][1]);
00182       sym_add_point(*sym, qrdataj->bbox[2][0], qrdataj->bbox[2][1]);
00183       sym_add_point(*sym, qrdataj->bbox[3][0], qrdataj->bbox[3][1]);
00184       sym_add_point(*sym, qrdataj->bbox[1][0], qrdataj->bbox[1][1]);
00185 
00186       for(k=0;k<qrdataj->nentries&&!err;k++){
00187         size_t              inleft;
00188         size_t              outleft;
00189         char               *in;
00190         char               *out;
00191         entry=qrdataj->entries+k;
00192         switch(entry->mode){
00193           case QR_MODE_NUM:{
00194             if(sa_ctext-sa_ntext>=(size_t)entry->payload.data.len){
00195               memcpy(sa_text+sa_ntext,entry->payload.data.buf,
00196                entry->payload.data.len*sizeof(*sa_text));
00197               sa_ntext+=entry->payload.data.len;
00198             }
00199             else err=1;
00200           }break;
00201           case QR_MODE_ALNUM:{
00202             char *p;
00203             in=(char *)entry->payload.data.buf;
00204             inleft=entry->payload.data.len;
00205             /*FNC1 uses '%' as an escape character.*/
00206             if(fnc1)for(;;){
00207               size_t plen;
00208               char   c;
00209               p=memchr(in,'%',inleft*sizeof(*in));
00210               if(p==NULL)break;
00211               plen=p-in;
00212               if(sa_ctext-sa_ntext<plen+1)break;
00213               memcpy(sa_text+sa_ntext,in,plen*sizeof(*in));
00214               sa_ntext+=plen;
00215               /*Two '%'s is a literal '%'*/
00216               if(plen+1<inleft&&p[1]=='%'){
00217                 c='%';
00218                 plen++;
00219                 p++;
00220               }
00221               /*One '%' is the ASCII group separator.*/
00222               else c=0x1D;
00223               sa_text[sa_ntext++]=c;
00224               inleft-=plen+1;
00225               in=p+1;
00226             }
00227             else p=NULL;
00228             if(p!=NULL||sa_ctext-sa_ntext<inleft)err=1;
00229             else{
00230               memcpy(sa_text+sa_ntext,in,inleft*sizeof(*sa_text));
00231               sa_ntext+=inleft;
00232             }
00233           }break;
00234           /*TODO: This will not handle a multi-byte sequence split between
00235              multiple data blocks.
00236             Does such a thing occur?
00237             Is it allowed?
00238             It requires copying buffers around to handle correctly.*/
00239           case QR_MODE_BYTE:{
00240             in=(char *)entry->payload.data.buf;
00241             inleft=entry->payload.data.len;
00242             out=sa_text+sa_ntext;
00243             outleft=sa_ctext-sa_ntext;
00244             /*If we have no specified encoding, attempt to auto-detect it.*/
00245             if(eci<0){
00246               int ei;
00247               /*First check for the UTF-8 BOM.*/
00248               if(inleft>=3&&
00249                in[0]==(char)0xEF&&in[1]==(char)0xBB&&in[2]==(char)0xBF){
00250                 in+=3;
00251                 inleft-=3;
00252                 /*Actually try converting (to check validity).*/
00253 #if (1) /* does not convert character code */
00254                 err=(in == NULL) || ( out == NULL ) || inleft > outleft;
00255                 if (inleft > outleft) inleft = outleft;
00256                 memcpy(out, in, inleft);
00257 #else
00258                 err=utf8_cd==(iconv_t)-1||
00259                  iconv(utf8_cd,&in,&inleft,&out,&outleft)==(size_t)-1;
00260 #endif
00261                 if(!err){
00262                   sa_ntext=out-sa_text;
00263                   enc_list_mtf(enc_list,utf8_cd);
00264                   continue;
00265                 }
00266                 in=(char *)entry->payload.data.buf;
00267                 inleft=entry->payload.data.len;
00268                 out=sa_text+sa_ntext;
00269                 outleft=sa_ctext-sa_ntext;
00270               }
00271               /*If the text is 8-bit clean, prefer UTF-8 over SJIS, since SJIS
00272                  will corrupt the backslashes used for DoCoMo formats.*/
00273               else if(text_is_ascii((unsigned char *)in,inleft)){
00274                 enc_list_mtf(enc_list,utf8_cd);
00275               }
00276               /*Try our list of encodings.*/
00277               for(ei=0;ei<3;ei++)if(enc_list[ei]!=(iconv_t)-1){
00278                 /*According to the standard, ISO/IEC 8859-1 (one hyphen) is
00279                    supposed to be used, but reality is not always so.
00280                   It's got an invalid range that is used often with SJIS
00281                    and UTF-8, though, which makes detection easier.
00282                   However, iconv() does not properly reject characters in
00283                    those ranges, since ISO-8859-1 (two hyphens) defines a
00284                    number of seldom-used control code characters there.
00285                   So if we see any of those characters, move this
00286                    conversion to the end of the list.*/
00287                 if(ei<2&&enc_list[ei]==latin1_cd&&
00288                  !text_is_latin1((unsigned char *)in,inleft)){
00289                   int ej;
00290                   for(ej=ei+1;ej<3;ej++)enc_list[ej-1]=enc_list[ej];
00291                   enc_list[2]=latin1_cd;
00292                 }
00293 #if (1) /* does not convert character code */
00294                 err=(in == NULL) || ( out == NULL ) || inleft > outleft;
00295                 if (inleft > outleft) inleft = outleft;
00296                 memcpy(out, in, inleft);
00297 #else
00298                 err=iconv(enc_list[ei],&in,&inleft,&out,&outleft)==(size_t)-1;
00299 #endif
00300                 if(!err){
00301                   sa_ntext=out-sa_text;
00302                   enc_list_mtf(enc_list,enc_list[ei]);
00303                   break;
00304                 }
00305                 in=(char *)entry->payload.data.buf;
00306                 inleft=entry->payload.data.len;
00307                 out=sa_text+sa_ntext;
00308                 outleft=sa_ctext-sa_ntext;
00309               }
00310             }
00311             /*We were actually given a character set; use it.*/
00312             else{
00313 #if (1) /* does not convert character code */
00314               err=(in == NULL) || ( out == NULL ) || inleft > outleft;
00315               if (inleft > outleft) inleft = outleft;
00316               memcpy(out, in, inleft);
00317 #else
00318               err=eci_cd==(iconv_t)-1||
00319                iconv(eci_cd,&in,&inleft,&out,&outleft)==(size_t)-1;
00320 #endif
00321               if(!err)sa_ntext=out-sa_text;
00322             }
00323           }break;
00324           /*Kanji mode always uses SJIS.*/
00325           case QR_MODE_KANJI:{
00326             in=(char *)entry->payload.data.buf;
00327             inleft=entry->payload.data.len;
00328             out=sa_text+sa_ntext;
00329             outleft=sa_ctext-sa_ntext;
00330 #if (1) /* does not convert character code */
00331             err=(in == NULL) || ( out == NULL ) || inleft > outleft;
00332             if (inleft > outleft) inleft = outleft;
00333             memcpy(out, in, inleft);
00334 #else
00335             err=sjis_cd==(iconv_t)-1||
00336              iconv(sjis_cd,&in,&inleft,&out,&outleft)==(size_t)-1;
00337 #endif
00338             if(!err)sa_ntext=out-sa_text;
00339           }break;
00340           /*Check to see if a character set was specified.*/
00341           case QR_MODE_ECI:{
00342             const char *enc;
00343             char        buf[16];
00344             unsigned    cur_eci;
00345             cur_eci=entry->payload.eci;
00346             if(cur_eci<=QR_ECI_ISO8859_16&&cur_eci!=14){
00347               if(cur_eci!=QR_ECI_GLI0&&cur_eci!=QR_ECI_CP437){
00348                 sprintf(buf,"ISO8859-%i",QR_MAXI(cur_eci,3)-2);
00349                 enc=buf;
00350               }
00351               /*Note that CP437 requires an iconv compiled with
00352                  --enable-extra-encodings, and thus may not be available.*/
00353               else enc="CP437";
00354             }
00355             else if(cur_eci==QR_ECI_SJIS)enc="SJIS";
00356             /*Don't know what this ECI code specifies, but not an encoding that
00357                we recognize.*/
00358             else continue;
00359             eci=cur_eci;
00360 #if (1) /* does not convert character code */
00361             eci_cd=enc;
00362 #else
00363             eci_cd=iconv_open("UTF-8",enc);
00364 #endif
00365           }break;
00366           /*Silence stupid compiler warnings.*/
00367           default:break;
00368         }
00369       }
00370       /*If eci should be reset between codes, do so.*/
00371       if(eci<=QR_ECI_GLI1){
00372         eci=-1;
00373 #if (1) /* does not convert character code */
00374 #else
00375         if(eci_cd!=(iconv_t)-1)iconv_close(eci_cd);
00376 #endif
00377 
00378       }
00379     }
00380 #if (1) /* does not convert character code */
00381 #else
00382     if(eci_cd!=(iconv_t)-1)iconv_close(eci_cd);
00383 #endif
00384     if(!err){
00385       sa_text[sa_ntext++]='\0';
00386       if(sa_ctext+1>sa_ntext){
00387         sa_text=(char *)realloc(sa_text,sa_ntext*sizeof(*sa_text));
00388       }
00389 
00390       zbar_symbol_t *sa_sym;
00391       if(sa_size == 1)
00392           sa_sym = syms;
00393       else {
00394           /* create "virtual" container symbol for composite result */
00395           sa_sym = _zbar_image_scanner_alloc_sym(iscn, ZBAR_QRCODE, 0);
00396           sa_sym->syms = _zbar_symbol_set_create();
00397           sa_sym->syms->head = syms;
00398 
00399           /* cheap out w/axis aligned bbox for now */
00400           int xmin = img->width, xmax = -2;
00401           int ymin = img->height, ymax = -2;
00402 
00403           /* fixup data references */
00404           for(; syms; syms = syms->next) {
00405               _zbar_symbol_refcnt(syms, 1);
00406               if(syms->type == ZBAR_PARTIAL)
00407                   sa_sym->type = ZBAR_PARTIAL;
00408               else
00409                   for(j = 0; j < syms->npts; j++) {
00410                       int u = syms->pts[j].x;
00411                       if(xmin >= u) xmin = u - 1;
00412                       if(xmax <= u) xmax = u + 1;
00413                       u = syms->pts[j].y;
00414                       if(ymin >= u) ymin = u - 1;
00415                       if(ymax <= u) ymax = u + 1;
00416                   }
00417               syms->data = sa_text + syms->datalen;
00418               int next = (syms->next) ? syms->next->datalen : sa_ntext;
00419               assert(next > syms->datalen);
00420               syms->datalen = next - syms->datalen - 1;
00421           }
00422           if(xmax >= -1) {
00423               sym_add_point(sa_sym, xmin, ymin);
00424               sym_add_point(sa_sym, xmin, ymax);
00425               sym_add_point(sa_sym, xmax, ymax);
00426               sym_add_point(sa_sym, xmax, ymin);
00427           }
00428       }
00429       sa_sym->data = sa_text;
00430       sa_sym->data_alloc = sa_ntext;
00431       sa_sym->datalen = sa_ntext - 1;
00432 
00433       _zbar_image_scanner_add_sym(iscn, sa_sym);
00434     }
00435     else {
00436         _zbar_image_scanner_recycle_syms(iscn, syms);
00437         free(sa_text);
00438     }
00439   }
00440 #if (1) /* does not convert character code */
00441 #else
00442   if(utf8_cd!=(iconv_t)-1)iconv_close(utf8_cd);
00443   if(sjis_cd!=(iconv_t)-1)iconv_close(sjis_cd);
00444   if(latin1_cd!=(iconv_t)-1)iconv_close(latin1_cd);
00445 #endif
00446   free(mark);
00447   return ntext;
00448 }
00449