SC_FUNC int scan_utf8(FILE *fp,const char *filename) { static void *resetpos=NULL; int utf8=TRUE; int firstchar=TRUE,bom_found=FALSE; const unsigned char *ptr; resetpos=pc_getpossrc(fp,resetpos); while (utf8 && pc_readsrc(fp,srcline,sLINEMAX)!=NULL) { ptr=srcline; // if (firstchar) { // /* check whether the very first character on the very first line // * starts with a byte order mark (BOM) // */ // cell c=get_utf8_char(ptr,&ptr); // bom_found= (c==0xfeff); // utf8= (c>=0); // firstchar=FALSE; // } /* if */ while (utf8 && *ptr!='\0') utf8= (get_utf8_char(ptr,&ptr)>=0); } /* while */ pc_resetsrc(fp,resetpos); if (bom_found) { unsigned char bom[3]; pc_readsrc(fp,bom,3); /* read the BOM again to strip it from the file */ assert(bom[0]==0xef && bom[1]==0xbb && bom[2]==0xbf); if (!utf8) error(77,filename); /* malformed UTF-8 encoding */ } /* if */ return utf8; }
SC_FUNC int scan_utf8(FILE *fp,const char *filename) { #if defined NO_UTF8 return 0; #else void *resetpos=pc_getpossrc(fp); int utf8=TRUE; int firstchar=TRUE,bom_found=FALSE; const unsigned char *ptr; while (utf8 && pc_readsrc(fp,pline,sLINEMAX)!=NULL) { ptr=pline; if (firstchar) { /* check whether the very first character on the very first line * starts with a BYTE order mark */ cell c=get_utf8_char(ptr,&ptr); bom_found= (c==0xfeff); utf8= (c>=0); firstchar=FALSE; } /* if */ while (utf8 && *ptr!='\0') utf8= (get_utf8_char(ptr,&ptr)>=0); } /* while */ pc_resetsrc(fp,resetpos); if (bom_found) { unsigned char bom[3]; if (!utf8) error(77,filename); /* malformed UTF-8 encoding */ pc_readsrc(fp,bom,3); assert(bom[0]==0xef && bom[1]==0xbb && bom[2]==0xbf); } /* if */ return utf8; #endif /* NO_UTF8 */ }