static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes // (suggested by divVerent). Explicitly allow cut-off UTF-8. if (bstr_validate_utf8(buf) > -8) return "UTF-8"; if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
/* process options and do some other initializations, then go through the file list and process files one by one at the end, exit and return 0 on succes, 1 on failure, 2 on troubles */ int main(int argc, char *argv[]) { char **pp_file, **flist; /* filename list pointer */ long int err; /* nonzero if process_file() ever returned nonzero */ EncaAnalyser an; /* Process command line arguments. */ pp_file = flist = process_opt(argc, argv); /* Initialization. */ if (options.verbosity_level > 2) fprintf(stderr, "Initializing language %s\n", options.language); an = enca_analyser_alloc(options.language); if (!an) { fprintf(stderr, "%s: Language `%s' is unknown or not supported.\n" "Run `%s --list languages' to get list " "of supported languages.\n" "Run `%s -L none' to test only language independent, " "multibyte encodings.\n", program_name, options.language, program_name, program_name); exit(EXIT_TROUBLE); } enca_set_threshold(an, 1.38); enca_set_multibyte(an, 1); enca_set_ambiguity(an, 1); enca_set_garbage_test(an, 1); /* Any files specified on command line? */ if (pp_file == NULL) { /* No => read stdin. */ err = process_file(an, NULL); } else { /* Process file list, cumultate the worst error in err. */ err = 0; while (*pp_file != NULL) { err |= process_file(an, *pp_file); enca_free(*pp_file); pp_file++; } } process_file(NULL, NULL); enca_analyser_free(an); enca_free(options.language); enca_free(options.target_enc_str); enca_free(flist); if (err & EXIT_TROUBLE) err = EXIT_TROUBLE; return err; }
void *ass_guess_buffer_cp(ASS_Library *library, unsigned char *buffer, int buflen, char *preferred_language, char *fallback) { const char **languages; size_t langcnt; EncaAnalyser analyser; EncaEncoding encoding; char *detected_sub_cp = NULL; int i; languages = enca_get_languages(&langcnt); ass_msg(library, MSGL_V, "ENCA supported languages"); for (i = 0; i < langcnt; i++) { ass_msg(library, MSGL_V, "lang %s", languages[i]); } for (i = 0; i < langcnt; i++) { const char *tmp; if (strcasecmp(languages[i], preferred_language) != 0) continue; analyser = enca_analyser_alloc(languages[i]); encoding = enca_analyse_const(analyser, buffer, buflen); tmp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); if (tmp && encoding.charset != ENCA_CS_UNKNOWN) { detected_sub_cp = strdup(tmp); ass_msg(library, MSGL_INFO, "ENCA detected charset: %s", tmp); } enca_analyser_free(analyser); } free(languages); if (!detected_sub_cp) { detected_sub_cp = strdup(fallback); ass_msg(library, MSGL_INFO, "ENCA detection failed: fallback to %s", fallback); } return detected_sub_cp; }
int cepgdata2xmltv::Process(int argc, char *argv[]) { FILE *f=fopen("/var/lib/epgsources/epgdata2xmltv","r"); if (!f) { esyslog("failed to open epgdata2xmltv config"); return 1; } char *line=NULL,*lptr=NULL; size_t size; if (getline(&line,&size,f)==(ssize_t) -1) { fclose(f); esyslog("failed to read epgdata2xmltv config"); return 1; } if (getline(&line,&size,f)==(ssize_t) -1) { fclose(f); if (line) free(line); esyslog("failed to read epgdata2xmltv config"); return 1; } char *sc=strchr(line,';'); if (sc) { *sc=0; sc++; } else { sc=line; } int daysmax=atoi(sc); if (daysmax<0) daysmax=1; int daysinadvance=atoi(argv[1]); if (daysinadvance<0) daysinadvance=1; if (daysinadvance>daysmax) daysinadvance=daysmax; bool head=false; char *xmlmem=NULL; time_t t=time(NULL); int carg=3; if (!strcmp(argv[3],"1") || !strcmp(argv[3],"0")) carg++; for (int day=0; day<=daysinadvance; day++) { time_t td=t+(day*86400); struct tm *tm; tm=localtime(&td); char vgl[10]; sprintf(vgl,"%04i%02i%02i",tm->tm_year+1900,tm->tm_mon+1,tm->tm_mday); char *dest=NULL; if (asprintf(&dest,"/tmp/%s_epgdata.zip",vgl)==-1) { esyslog("failed to allocate string"); continue; } bool ok=false; do { bool offline=true; struct stat statbuf; if (stat(dest,&statbuf)==-1) { if (Fetch(dest,argv[2],day)) { ok=true; break; } offline=false; } struct zip *zip=zip_open(dest,0,NULL); if (!zip) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to open %s",dest); ok=true; break; } int i=zip_name_locate(zip,"qy.dtd",ZIP_FL_NOCASE); if (i==-1) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed read qy.dtd in %s",dest); ok=true; break; } struct zip_file *zfile=zip_fopen_index(zip,i,0); if (!zfile) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to read qy.dtd from %s",dest); ok=true; break; } struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1) { if (offline) { if (unlink(dest)==-1) { zip_fclose(zfile); esyslog("cannot unlink %s",dest); ok=true; break; } continue; } zip_fclose(zfile); esyslog("failed to stat qy.dtd in %s",dest); ok=true; break; } if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure if (dtdmem) { free(dtdmem); dtdmem=NULL; } dtdmem=(char *) malloc(sb.size+1); int size=zip_fread(zfile,dtdmem,sb.size); if (size!=sb.size) { zip_fclose(zfile); esyslog("failed to read qy.dtd from %s",dest); ok=true; break; } dtdmem[size]=0; zip_fclose(zfile); int entries=zip_get_num_files(zip); for (int i=0; i<entries; i++) { const char *name=zip_get_name(zip,i,0); if (strstr(name,"xml")) { // check date of xml if (strstr(name,vgl)) { struct zip_file *zfile=zip_fopen_index(zip,i,0); if (!zfile) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to read %s from %s",name,dest); ok=true; break; } struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to stat %s in %s",name,dest); ok=true; break; } if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure xmlmem=(char *) malloc(sb.size+1); int size=zip_fread(zfile,xmlmem,sb.size); if (size!=sb.size) { zip_fclose(zfile); free(xmlmem); xmlmem=NULL; esyslog("failed to read %s from %s",name,dest); ok=true; break; } xmlmem[size]=0; xmlmem=strreplace(xmlmem,"iso-8859-1","Windows-1252"); zip_fclose(zfile); ok=true; break; } } } if (!strcmp(argv[3],"1")) { int entries=zip_get_num_files(zip); for (int i=0; i<entries; i++) { const char *name=zip_get_name(zip,i,0); if (strstr(name,"jpg")) { char *destjpg; if (asprintf(&destjpg,"/var/lib/epgsources/epgdata2xmltv-img/%s",name)!=-1) { struct stat statbuf; if (stat(destjpg,&statbuf)==-1) { struct zip_file *zfile=zip_fopen_index(zip,i,0); if (zfile) { struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)!=-1) { if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure char *jpg=(char *) malloc(sb.size+1); if (jpg) { int size=zip_fread(zfile,jpg,sb.size); if (size==sb.size) { FILE *j=fopen(destjpg,"w+"); if (j) { fwrite(jpg,size,1,j); fclose(j); } } } } zip_fclose(zfile); } } free(destjpg); } } } } zip_close(zip); if (!ok) { if (offline) { if (unlink(dest)==-1) { ok=true; break; } continue; } else { esyslog("found no valid data in %s",dest); if (xmlmem) free(xmlmem); xmlmem=NULL; ok=true; break; } } } while (ok==false); free(dest); if (!line) { line=(char *) malloc(81); size=80; } if (!xmlmem) continue; long offset=ftell(f); xmlDocPtr pxmlDoc; if (!pxsltStylesheet) LoadXSLT(); int xmlsize=strlen(xmlmem); if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { EncaAnalyser analyser=enca_analyser_alloc("__"); if (analyser) { EncaEncoding encoding=enca_analyse_const(analyser, (unsigned char *) xmlmem,xmlsize); const char *cs=enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); if (cs) { if (!strcmp(cs,"UTF-8")) { xmlmem=strreplace(xmlmem,"Windows-1252","UTF-8"); } else { esyslog("enca returned %s, please report!",cs); } } enca_analyser_free(analyser); } string s = xmlmem; int reps=pcrecpp::RE("&(?![a-zA-Z]{1,8};)").GlobalReplace("%amp;",&s); if (reps) { xmlmem = (char *)realloc(xmlmem, s.size()+1); xmlsize = s.size(); strcpy(xmlmem,s.c_str()); } if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { esyslog("failed parsing xml"); free(xmlmem); xmlmem=NULL; continue; } } for (;;) { lptr=line+1; line[0]=' '; if (getline(&lptr,&size,f)==-1) break; char *channel=line; char *sc=strchr(channel,';'); if (sc) *sc=0; bool use=false; for (int i=carg; i<argc; i++) { if (!strcasecmp(lptr,argv[i])) { use=true; break; } } if (use) { if (!head) { printf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); printf("<tv generator-info-name=\"epgdata2xmltv\">\n"); for (int i=carg; i<argc; i++) { printf("<channel id=\"%s\">\n",argv[i]); printf("<display-name lang=\"de\">%s</display-name>\n",argv[i]); printf("</channel>\n"); } head=true; } int num=atoi(sc+1); if (num>0) { char *channelnum=strdup(sc+1); char *lf=strchr(channelnum,10); if (lf) *lf=0; channel[0]='"'; *sc++='"'; *sc=0; const char *params[5] = { "channelid", channel, "channelnum",channelnum,NULL }; Translate(pxmlDoc,params); if (channelnum) free(channelnum); } } } xmlFreeDoc (pxmlDoc); fseek(f,offset,SEEK_SET); if (dtdmem) { free(dtdmem); dtdmem=NULL; } if (xmlmem) { free(xmlmem); xmlmem=NULL; } } if (line) free(line); fclose(f); if (head) printf("</tv>\n"); return head ? 0 : 1; }