/* try to ask for conversion from from_enc to to_enc returns 0 on success, nonzero on failure on fatal error simply aborts program */ static int do_iconv_open(EncaEncoding from_enc, EncaEncoding to_enc, iconv_t *icd) { const char *to_name, *from_name; if (!enca_charset_is_known(to_enc.charset)) to_name = options.target_enc_str; else to_name = enca_charset_name(to_enc.charset, ENCA_NAME_STYLE_ICONV); from_name = enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ICONV); assert(from_name != NULL); assert(to_name != NULL); /* Iconv_open() paramters has reverse order than we use. */ *icd = iconv_open(to_name, from_name); if (*icd != (iconv_t)-1) return 0; /* Failure, EINVAL means this conversion is not possible. */ if (errno == EINVAL) return ERR_CANNOT; /* But otherwise we are in deep trouble, we've got out of memory or file descriptors. */ fprintf(stderr, "%s: Aborting: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); return 0; }
/* convert file using UNIX98 iconv functions returns 0 on success, nonzero error code otherwise when iconv implementation is not transitive (ICONV_TRANSITIVE is not defined), it may help to perform conversion via Unicode, so we try it too (probably UCS-2/ISO-10646, but maybe UTF-8---whatever has been detected at configure time) */ int convert_iconv(File *file, EncaEncoding from_enc) { static int ascii = ENCA_CS_UNKNOWN; File *tempfile = NULL; int err; iconv_t icd; if (!enca_charset_is_known(ascii)) { ascii = enca_name_to_charset("ascii"); assert(enca_charset_is_known(ascii)); } /* When iconv doesn't know the encodings, it can't convert between them. * We also don't try conversion to ASCII, it can only damage the files and * upset users, nothing else. * And fail early on really silly surfaces. */ if (!enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ICONV) || (enca_charset_is_known(options.target_enc.charset) && !enca_charset_name(options.target_enc.charset, ENCA_NAME_STYLE_ICONV)) || options.target_enc.charset == ascii || !acceptable_surface(from_enc) || !acceptable_surface(options.target_enc)) return ERR_CANNOT; /* Is the conversion possible? */ if (do_iconv_open(from_enc, options.target_enc, &icd) != 0) return ERR_CANNOT; /* Since iconv doesn't recode files in place, we make a temporary file and copy contents of file fname to it. save the current content first, then copy the rest. When the file is stdin, fake-reopen it to stdout. */ err = ERR_IOFAIL; if ((tempfile = file_temporary(file->buffer, 1)) && file_write(tempfile) != -1 && copy_and_convert(file, tempfile, NULL) == 0 && (!file->name || file_seek(file, 0, SEEK_SET) == 0) && file_seek(tempfile, 0, SEEK_SET) == 0 && (!file->name || file_truncate(file, 0) == 0) && (file->name || (file_close(file) == 0 && file_open(file, "wb") == 0))) { /* Create the second buffer when we don't have any yet but don't make it unnecessarily large, system default suffices */ if (!buffer_iconv) buffer_iconv = buffer_new(0); tempfile->buffer = buffer_iconv; err = iconv_one_step(tempfile, file, icd); } file_free(tempfile); do_iconv_close(icd); return err; }
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
/** * Checks for doubly-encoded UTF-8 and prints a line when it looks so. **/ static void double_utf8_chk(EncaAnalyser an, const unsigned char *sample, size_t size) { size_t dbl, i; int *candidates; if (options.output_type != OTYPE_DETAILS && options.output_type != OTYPE_HUMAN) return; dbl = enca_double_utf8_check(an, sample, size); if (!dbl) return; candidates = enca_double_utf8_get_candidates(an); if (dbl == 1) printf(" Doubly-encoded to UTF-8 from"); else printf(" Doubly-encoded to UTF-8 from one of:"); for (i = 0; i < dbl; i++) printf(" %s", enca_charset_name(candidates[i], ENCA_NAME_STYLE_ENCA)); putchar('\n'); enca_free(candidates); }
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes // (suggested by divVerent). Explicitly allow cut-off UTF-8. if (bstr_validate_utf8(buf) > -8) return "UTF-8"; if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
void *ass_guess_buffer_cp(ASS_Library *library, unsigned char *buffer, int buflen, char *preferred_language, char *fallback) { const char **languages; size_t langcnt; EncaAnalyser analyser; EncaEncoding encoding; char *detected_sub_cp = NULL; int i; languages = enca_get_languages(&langcnt); ass_msg(library, MSGL_V, "ENCA supported languages"); for (i = 0; i < langcnt; i++) { ass_msg(library, MSGL_V, "lang %s", languages[i]); } for (i = 0; i < langcnt; i++) { const char *tmp; if (strcasecmp(languages[i], preferred_language) != 0) continue; analyser = enca_analyser_alloc(languages[i]); encoding = enca_analyse_const(analyser, buffer, buflen); tmp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); if (tmp && encoding.charset != ENCA_CS_UNKNOWN) { detected_sub_cp = strdup(tmp); ass_msg(library, MSGL_INFO, "ENCA detected charset: %s", tmp); } enca_analyser_free(analyser); } free(languages); if (!detected_sub_cp) { detected_sub_cp = strdup(fallback); ass_msg(library, MSGL_INFO, "ENCA detection failed: fallback to %s", fallback); } return detected_sub_cp; }
int cepgdata2xmltv::Process(int argc, char *argv[]) { FILE *f=fopen("/var/lib/epgsources/epgdata2xmltv","r"); if (!f) { esyslog("failed to open epgdata2xmltv config"); return 1; } char *line=NULL,*lptr=NULL; size_t size; if (getline(&line,&size,f)==(ssize_t) -1) { fclose(f); esyslog("failed to read epgdata2xmltv config"); return 1; } if (getline(&line,&size,f)==(ssize_t) -1) { fclose(f); if (line) free(line); esyslog("failed to read epgdata2xmltv config"); return 1; } char *sc=strchr(line,';'); if (sc) { *sc=0; sc++; } else { sc=line; } int daysmax=atoi(sc); if (daysmax<0) daysmax=1; int daysinadvance=atoi(argv[1]); if (daysinadvance<0) daysinadvance=1; if (daysinadvance>daysmax) daysinadvance=daysmax; bool head=false; char *xmlmem=NULL; time_t t=time(NULL); int carg=3; if (!strcmp(argv[3],"1") || !strcmp(argv[3],"0")) carg++; for (int day=0; day<=daysinadvance; day++) { time_t td=t+(day*86400); struct tm *tm; tm=localtime(&td); char vgl[10]; sprintf(vgl,"%04i%02i%02i",tm->tm_year+1900,tm->tm_mon+1,tm->tm_mday); char *dest=NULL; if (asprintf(&dest,"/tmp/%s_epgdata.zip",vgl)==-1) { esyslog("failed to allocate string"); continue; } bool ok=false; do { bool offline=true; struct stat statbuf; if (stat(dest,&statbuf)==-1) { if (Fetch(dest,argv[2],day)) { ok=true; break; } offline=false; } struct zip *zip=zip_open(dest,0,NULL); if (!zip) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to open %s",dest); ok=true; break; } int i=zip_name_locate(zip,"qy.dtd",ZIP_FL_NOCASE); if (i==-1) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed read qy.dtd in %s",dest); ok=true; break; } struct zip_file *zfile=zip_fopen_index(zip,i,0); if (!zfile) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to read qy.dtd from %s",dest); ok=true; break; } struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1) { if (offline) { if (unlink(dest)==-1) { zip_fclose(zfile); esyslog("cannot unlink %s",dest); ok=true; break; } continue; } zip_fclose(zfile); esyslog("failed to stat qy.dtd in %s",dest); ok=true; break; } if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure if (dtdmem) { free(dtdmem); dtdmem=NULL; } dtdmem=(char *) malloc(sb.size+1); int size=zip_fread(zfile,dtdmem,sb.size); if (size!=sb.size) { zip_fclose(zfile); esyslog("failed to read qy.dtd from %s",dest); ok=true; break; } dtdmem[size]=0; zip_fclose(zfile); int entries=zip_get_num_files(zip); for (int i=0; i<entries; i++) { const char *name=zip_get_name(zip,i,0); if (strstr(name,"xml")) { // check date of xml if (strstr(name,vgl)) { struct zip_file *zfile=zip_fopen_index(zip,i,0); if (!zfile) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to read %s from %s",name,dest); ok=true; break; } struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1) { if (offline) { if (unlink(dest)==-1) { esyslog("cannot unlink %s",dest); ok=true; break; } continue; } esyslog("failed to stat %s in %s",name,dest); ok=true; break; } if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure xmlmem=(char *) malloc(sb.size+1); int size=zip_fread(zfile,xmlmem,sb.size); if (size!=sb.size) { zip_fclose(zfile); free(xmlmem); xmlmem=NULL; esyslog("failed to read %s from %s",name,dest); ok=true; break; } xmlmem[size]=0; xmlmem=strreplace(xmlmem,"iso-8859-1","Windows-1252"); zip_fclose(zfile); ok=true; break; } } } if (!strcmp(argv[3],"1")) { int entries=zip_get_num_files(zip); for (int i=0; i<entries; i++) { const char *name=zip_get_name(zip,i,0); if (strstr(name,"jpg")) { char *destjpg; if (asprintf(&destjpg,"/var/lib/epgsources/epgdata2xmltv-img/%s",name)!=-1) { struct stat statbuf; if (stat(destjpg,&statbuf)==-1) { struct zip_file *zfile=zip_fopen_index(zip,i,0); if (zfile) { struct zip_stat sb; memset(&sb,0,sizeof(sb)); if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)!=-1) { if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure char *jpg=(char *) malloc(sb.size+1); if (jpg) { int size=zip_fread(zfile,jpg,sb.size); if (size==sb.size) { FILE *j=fopen(destjpg,"w+"); if (j) { fwrite(jpg,size,1,j); fclose(j); } } } } zip_fclose(zfile); } } free(destjpg); } } } } zip_close(zip); if (!ok) { if (offline) { if (unlink(dest)==-1) { ok=true; break; } continue; } else { esyslog("found no valid data in %s",dest); if (xmlmem) free(xmlmem); xmlmem=NULL; ok=true; break; } } } while (ok==false); free(dest); if (!line) { line=(char *) malloc(81); size=80; } if (!xmlmem) continue; long offset=ftell(f); xmlDocPtr pxmlDoc; if (!pxsltStylesheet) LoadXSLT(); int xmlsize=strlen(xmlmem); if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { EncaAnalyser analyser=enca_analyser_alloc("__"); if (analyser) { EncaEncoding encoding=enca_analyse_const(analyser, (unsigned char *) xmlmem,xmlsize); const char *cs=enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV); if (cs) { if (!strcmp(cs,"UTF-8")) { xmlmem=strreplace(xmlmem,"Windows-1252","UTF-8"); } else { esyslog("enca returned %s, please report!",cs); } } enca_analyser_free(analyser); } string s = xmlmem; int reps=pcrecpp::RE("&(?![a-zA-Z]{1,8};)").GlobalReplace("%amp;",&s); if (reps) { xmlmem = (char *)realloc(xmlmem, s.size()+1); xmlsize = s.size(); strcpy(xmlmem,s.c_str()); } if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL) { esyslog("failed parsing xml"); free(xmlmem); xmlmem=NULL; continue; } } for (;;) { lptr=line+1; line[0]=' '; if (getline(&lptr,&size,f)==-1) break; char *channel=line; char *sc=strchr(channel,';'); if (sc) *sc=0; bool use=false; for (int i=carg; i<argc; i++) { if (!strcasecmp(lptr,argv[i])) { use=true; break; } } if (use) { if (!head) { printf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); printf("<tv generator-info-name=\"epgdata2xmltv\">\n"); for (int i=carg; i<argc; i++) { printf("<channel id=\"%s\">\n",argv[i]); printf("<display-name lang=\"de\">%s</display-name>\n",argv[i]); printf("</channel>\n"); } head=true; } int num=atoi(sc+1); if (num>0) { char *channelnum=strdup(sc+1); char *lf=strchr(channelnum,10); if (lf) *lf=0; channel[0]='"'; *sc++='"'; *sc=0; const char *params[5] = { "channelid", channel, "channelnum",channelnum,NULL }; Translate(pxmlDoc,params); if (channelnum) free(channelnum); } } } xmlFreeDoc (pxmlDoc); fseek(f,offset,SEEK_SET); if (dtdmem) { free(dtdmem); dtdmem=NULL; } if (xmlmem) { free(xmlmem); xmlmem=NULL; } } if (line) free(line); fclose(f); if (head) printf("</tv>\n"); return head ? 0 : 1; }
/** * Prints results. **/ static void print_results(const char *fname, EncaAnalyser an, EncaEncoding result, int gerrno) { char *s; EncaSurface surf = result.surface & ~enca_charset_natural_surface(result.charset); if (options.prefix_filename) printf("%s: ", ffname_r(fname)); switch (options.output_type) { case OTYPE_ALIASES: print_aliases(result.charset); break; case OTYPE_CANON: if (surf) { s = enca_get_surface_name(surf, ENCA_NAME_STYLE_ENCA); fputs(enca_charset_name(result.charset, ENCA_NAME_STYLE_ENCA), stdout); puts(s); enca_free(s); } else puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_ENCA)); break; case OTYPE_HUMAN: case OTYPE_DETAILS: if (surf) { s = enca_get_surface_name(surf, ENCA_NAME_STYLE_HUMAN); puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_HUMAN)); indent_surface(s); enca_free(s); } else puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_HUMAN)); break; case OTYPE_RFC1345: puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_RFC1345)); break; case OTYPE_CS2CS: if (enca_charset_name(result.charset, ENCA_NAME_STYLE_CSTOCS) != NULL) puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_CSTOCS)); else puts(enca_charset_name(ENCA_CS_UNKNOWN, ENCA_NAME_STYLE_CSTOCS)); break; case OTYPE_ICONV: if (enca_charset_name(result.charset, ENCA_NAME_STYLE_ICONV) != NULL) puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_ICONV)); else puts(enca_charset_name(ENCA_CS_UNKNOWN, ENCA_NAME_STYLE_ICONV)); break; case OTYPE_MIME: if (enca_charset_name(result.charset, ENCA_NAME_STYLE_MIME) != NULL) puts(enca_charset_name(result.charset, ENCA_NAME_STYLE_MIME)); else puts(enca_charset_name(ENCA_CS_UNKNOWN, ENCA_NAME_STYLE_MIME)); break; default: abort(); break; } if (gerrno && options.output_type == OTYPE_DETAILS) { printf(" Failure reason: %s.\n", enca_strerror(an, gerrno)); } }
/* fork and the child executes Settings.Convertor on fname create temporary file containing stdin when fname == NULL and convert it passing special option STDOUT to convertor (that is assumed to delete the temporary file itself) from_enc, to_enc are encoding names as should be passed to convertor returns 0 on success, nonzero on failure; on critical failure (like we cannot fork()) it simply aborts */ int convert_external(File *file, const EncaEncoding from_enc) { /* special fourth parameter passed to external convertor to instruct it to send result to stdout */ static const char *STDOUT_CONV = "-"; pid_t pid; int status; File *tempfile = NULL; char *from_name, *target_name; if (*extern_convertor == '\0') { fprintf(stderr, "%s: No external convertor defined!\n", program_name); return ERR_CANNOT; } if (options.verbosity_level > 2) fprintf(stderr, " launching `%s' to convert `%s'\n", extern_convertor, ffname_r(file->name)); /* Is conversion of stdin requested? */ if (file->name == NULL) { /* Then we have to copy it to a temporary file. */ tempfile = file_temporary(file->buffer, 0); if (tempfile == NULL) return ERR_IOFAIL; if (copy_and_convert(file, tempfile, NULL) != 0) { file_unlink(tempfile->name); file_free(tempfile); return ERR_IOFAIL; } } /* Construct the charset names before fork() */ from_name = enca_strconcat(enca_charset_name(from_enc.charset, ENCA_NAME_STYLE_ENCA), enca_get_surface_name(from_enc.surface, ENCA_NAME_STYLE_ENCA), NULL); if (enca_charset_is_known(options.target_enc.charset) && (options.target_enc.surface & ENCA_SURFACE_UNKNOWN) == 0) { target_name = enca_strconcat(enca_charset_name(options.target_enc.charset, ENCA_NAME_STYLE_ENCA), enca_get_surface_name(options.target_enc.surface, ENCA_NAME_STYLE_ENCA), NULL); } else target_name = enca_strdup(options.target_enc_str); /* Fork. */ pid = vfork(); if (pid == 0) { /* Child. */ if (tempfile) execlp(extern_convertor, extern_convertor, from_name, target_name, tempfile->name, STDOUT_CONV, NULL); else execlp(extern_convertor, extern_convertor, from_name, target_name, file->name, NULL); exit(ERR_EXEC); } /* Parent. */ if (pid == -1) { fprintf(stderr, "%s: Cannot fork() to execute convertor: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); } /* Wait until the child returns. */ if (waitpid(pid, &status, 0) == -1) { /* Error. */ fprintf(stderr, "%s: wait_pid() error while waiting for convertor: %s\n", program_name, strerror(errno)); exit(EXIT_TROUBLE); } if (!WIFEXITED(status)) { /* Child exited abnormally. */ fprintf(stderr, "%s: Child convertor process has been murdered.\n", program_name); exit(EXIT_TROUBLE); } enca_free(from_name); enca_free(target_name); if (tempfile) { unlink(tempfile->name); file_free(tempfile); } /* Child exited normally, test exit status. */ if (WEXITSTATUS(status) != EXIT_SUCCESS) { /* This means child was unable to execute convertor or convertor failed. */ fprintf(stderr, "%s: External convertor failed (error code %d)\n", program_name, WEXITSTATUS(status)); if (WEXITSTATUS(status) == ERR_EXEC) return ERR_EXEC; else return ERR_CANNOT; } /* Success! Wow! */ return ERR_OK; }