static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes // (suggested by divVerent). Explicitly allow cut-off UTF-8. if (bstr_validate_utf8(buf) > -8) return "UTF-8"; if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
void *ass_guess_buffer_cp(ASS_Library *library, unsigned char *buffer, int buflen, char *preferred_language, char *fallback) { const char **languages; size_t langcnt; EncaAnalyser analyser; EncaEncoding encoding; char *detected_sub_cp = NULL; int i; languages = enca_get_languages(&langcnt); ass_msg(library, MSGL_V, "ENCA supported languages"); for (i = 0; i < langcnt; i++) { ass_msg(library, MSGL_V, "lang %s", languages[i]); } for (i = 0; i < langcnt; i++) { const char *tmp; if (strcasecmp(languages[i], preferred_language) != 0) continue; analyser = enca_analyser_alloc(languages[i]); encoding = enca_analyse_const(analyser, buffer, buflen); tmp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); if (tmp && encoding.charset != ENCA_CS_UNKNOWN) { detected_sub_cp = strdup(tmp); ass_msg(library, MSGL_INFO, "ENCA detected charset: %s", tmp); } enca_analyser_free(analyser); } free(languages); if (!detected_sub_cp) { detected_sub_cp = strdup(fallback); ass_msg(library, MSGL_INFO, "ENCA detection failed: fallback to %s", fallback); } return detected_sub_cp; }
int cepgdata2xmltv::Process(int argc, char *argv[]) { FILE *f=fopen("/var/lib/epgsources/epgdata2xmltv","r"); if (!f) { esyslog("failed to open epgdata2xmltv config"); return 1; } char *line=NULL,*lptr=NULL; size_t size; if (getline(&line,&size,f)==(ssize_t) -1) { fclose(f); esyslog("failed to read epgdata2xmltv config"); return 1; } if (getline(&line,&size,f)==(ssize_t) -1) { fclose(f); if (line) free(line); esyslog("failed to read epgdata2xmltv config"); return 1; } char *sc=strchr(line,';'); if (sc) { *sc=0; sc++; } else { sc=line; } int daysmax=atoi(sc); if (daysmax<0) daysmax=1; int daysinadvance=atoi(argv[1]); if (daysinadvance<0) daysinadvance=1; if (daysinadvance>daysmax) daysinadvance=daysmax; bool head=false; char *xmlmem=NULL; time_t t=time(NULL); int carg=3; if (!strcmp(argv[3],"1") || !strcmp(argv[3],"0")) carg++; for (int day=0; day<=daysinadvance; day++) { time_t td=t+(day*86400); struct tm *tm; tm=localtime(&td); char vgl[10]; sprintf(vgl,"%04i%02i%02i",tm->tm_year+1900,tm->tm_mon+1,tm->tm_mday); char *dest=NULL; if (asprintf(&dest,"/tmp/%s_epgdata.zip",vgl)==-1) { esyslog("failed to allocate string"); continue; } bool ok=false; do { bool offline=true; struct stat statbuf; if (stat(dest,&statbuf)==-1) { if (Fetch(dest,argv[2],day)) { ok=true; break; } offline=false; } struct zip *zip=zip_open(dest,0,NULL); if (!zip) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to open %s",dest); ok=true; break; } int i=zip_name_locate(zip,"qy.dtd",ZIP_FL_NOCASE); if (i==-1) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed read qy.dtd in %s",dest); ok=true; break; } struct zip_file *zfile=zip_fopen_index(zip,i,0); if (!zfile) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to read qy.dtd from %s",dest); ok=true; break; } struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1) { if (offline) { if (unlink(dest)==-1) { zip_fclose(zfile); esyslog("cannot unlink %s",dest); ok=true; break; } continue; } zip_fclose(zfile); esyslog("failed to stat qy.dtd in %s",dest); ok=true; break; } if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure if (dtdmem) { free(dtdmem); dtdmem=NULL; } dtdmem=(char *) malloc(sb.size+1); int size=zip_fread(zfile,dtdmem,sb.size); if (size!=sb.size) { zip_fclose(zfile); esyslog("failed to read qy.dtd from %s",dest); ok=true; break; } dtdmem[size]=0; zip_fclose(zfile); int entries=zip_get_num_files(zip); for (int i=0; i<entries; i++) { const char *name=zip_get_name(zip,i,0); if (strstr(name,"xml")) { // check date of xml if (strstr(name,vgl)) { struct zip_file *zfile=zip_fopen_index(zip,i,0); if (!zfile) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to read %s from %s",name,dest); ok=true; break; } struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to stat %s in %s",name,dest); ok=true; break; } if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure xmlmem=(char *) malloc(sb.size+1); int size=zip_fread(zfile,xmlmem,sb.size); if (size!=sb.size) { zip_fclose(zfile); free(xmlmem); xmlmem=NULL; esyslog("failed to read %s from %s",name,dest); ok=true; break; } xmlmem[size]=0; xmlmem=strreplace(xmlmem,"iso-8859-1","Windows-1252"); zip_fclose(zfile); ok=true; break; } } } if (!strcmp(argv[3],"1")) { int entries=zip_get_num_files(zip); for (int i=0; i<entries; i++) { const char *name=zip_get_name(zip,i,0); if (strstr(name,"jpg")) { char *destjpg; if (asprintf(&destjpg,"/var/lib/epgsources/epgdata2xmltv-img/%s",name)!=-1) { struct stat statbuf; if (stat(destjpg,&statbuf)==-1) { struct zip_file *zfile=zip_fopen_index(zip,i,0); if (zfile) { struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)!=-1) { if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure char *jpg=(char *) malloc(sb.size+1); if (jpg) { int size=zip_fread(zfile,jpg,sb.size); if (size==sb.size) { FILE *j=fopen(destjpg,"w+"); if (j) { fwrite(jpg,size,1,j); fclose(j); } } } } zip_fclose(zfile); } } free(destjpg); } } } } zip_close(zip); if (!ok) { if (offline) { if (unlink(dest)==-1) { ok=true; break; } continue; } else { esyslog("found no valid data in %s",dest); if (xmlmem) free(xmlmem); xmlmem=NULL; ok=true; break; } } } while (ok==false); free(dest); if (!line) { line=(char *) malloc(81); size=80; } if (!xmlmem) continue; long offset=ftell(f); xmlDocPtr pxmlDoc; if (!pxsltStylesheet) LoadXSLT(); int xmlsize=strlen(xmlmem); if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { EncaAnalyser analyser=enca_analyser_alloc("__"); if (analyser) { EncaEncoding encoding=enca_analyse_const(analyser, (unsigned char *) xmlmem,xmlsize); const char *cs=enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); if (cs) { if (!strcmp(cs,"UTF-8")) { xmlmem=strreplace(xmlmem,"Windows-1252","UTF-8"); } else { esyslog("enca returned %s, please report!",cs); } } enca_analyser_free(analyser); } string s = xmlmem; int reps=pcrecpp::RE("&(?![a-zA-Z]{1,8};)").GlobalReplace("%amp;",&s); if (reps) { xmlmem = (char *)realloc(xmlmem, s.size()+1); xmlsize = s.size(); strcpy(xmlmem,s.c_str()); } if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { esyslog("failed parsing xml"); free(xmlmem); xmlmem=NULL; continue; } } for (;;) { lptr=line+1; line[0]=' '; if (getline(&lptr,&size,f)==-1) break; char *channel=line; char *sc=strchr(channel,';'); if (sc) *sc=0; bool use=false; for (int i=carg; i<argc; i++) { if (!strcasecmp(lptr,argv[i])) { use=true; break; } } if (use) { if (!head) { printf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); printf("<tv generator-info-name=\"epgdata2xmltv\">\n"); for (int i=carg; i<argc; i++) { printf("<channel id=\"%s\">\n",argv[i]); printf("<display-name lang=\"de\">%s</display-name>\n",argv[i]); printf("</channel>\n"); } head=true; } int num=atoi(sc+1); if (num>0) { char *channelnum=strdup(sc+1); char *lf=strchr(channelnum,10); if (lf) *lf=0; channel[0]='"'; *sc++='"'; *sc=0; const char *params[5] = { "channelid", channel, "channelnum",channelnum,NULL }; Translate(pxmlDoc,params); if (channelnum) free(channelnum); } } } xmlFreeDoc (pxmlDoc); fseek(f,offset,SEEK_SET); if (dtdmem) { free(dtdmem); dtdmem=NULL; } if (xmlmem) { free(xmlmem); xmlmem=NULL; } } if (line) free(line); fclose(f); if (head) printf("</tv>\n"); return head ? 0 : 1; }
/* process file named fname this is the `boss' function returns 0 on succes, 1 on failure, 2 on troubles */ static int process_file(EncaAnalyser an, const char *fname) { static int utf8 = ENCA_CS_UNKNOWN; static Buffer *buffer = NULL; /* persistent i/o buffer */ int ot_is_convert = (options.output_type == OTYPE_CONVERT); EncaEncoding result; /* the guessed encoding */ File *file; /* the processed file */ if (!an) { buffer_free(buffer); return 0; } /* Initialize when we are called the first time. */ if (buffer == NULL) buffer = buffer_new(buffer_size); if (!enca_charset_is_known(utf8)) { utf8 = enca_name_to_charset("utf8"); assert(enca_charset_is_known(utf8)); } /* Read sample. */ file = file_new(fname, buffer); if (file_open(file, ot_is_convert ? "r+b" : "rb") != 0) { file_free(file); return EXIT_TROUBLE; } if (file_read(file) == -1) { file_free(file); return EXIT_TROUBLE; } if (!ot_is_convert) file_close(file); /* Guess encoding. */ dwim_libenca_options(an, file); if (ot_is_convert) result = enca_analyse_const(an, buffer->data, buffer->pos); else result = enca_analyse(an, buffer->data, buffer->pos); /* Is conversion required? */ if (ot_is_convert) { int err = 0; if (enca_charset_is_known(result.charset)) err = convert(file, result); else { if (enca_errno(an) != ENCA_EEMPTY) { fprintf(stderr, "%s: Cannot convert `%s' from unknown encoding\n", program_name, ffname_r(file->name)); } /* Copy stdin to stdout unchanged. */ if (file->name == NULL) err = copy_and_convert(file, file, NULL); } file_free(file); if ((err == ERR_OK && !enca_charset_is_known(result.charset) && enca_errno(an) != ENCA_EEMPTY) || err == ERR_CANNOT) return 1; return (err == ERR_OK) ? EXIT_SUCCESS : EXIT_TROUBLE; } /* Print results. */ print_results(file->name, an, result, enca_errno(an)); if (result.charset == utf8) double_utf8_chk(an, buffer->data, buffer->pos); file_free(file); return enca_charset_is_known(result.charset) ? EXIT_SUCCESS : EXIT_FAILURE; }