示例#1
0
文件: charset_conv.c 项目: 0x0all/mpv
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language)
{
    if (!language || !language[0])
        language = "__"; // neutral language

    const char *detected_cp = NULL;

    EncaAnalyser analyser = enca_analyser_alloc(language);
    if (analyser) {
        enca_set_termination_strictness(analyser, 0);
        EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len);
        const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV);
        if (tmp && enc.charset != ENCA_CS_UNKNOWN)
            detected_cp = tmp;
        enca_analyser_free(analyser);
    } else {
        mp_err(log, "ENCA doesn't know language '%s'\n", language);
        size_t langcnt;
        const char **languages = enca_get_languages(&langcnt);
        mp_err(log, "ENCA supported languages:");
        for (int i = 0; i < langcnt; i++)
            mp_err(log, " %s", languages[i]);
        mp_err(log, "\n");
        free(languages);
    }

    return detected_cp;
}
示例#2
0
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language)
{
    // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes
    // (suggested by divVerent). Explicitly allow cut-off UTF-8.
    if (bstr_validate_utf8(buf) > -8)
        return "UTF-8";

    if (!language || !language[0])
        language = "__"; // neutral language

    const char *detected_cp = NULL;

    EncaAnalyser analyser = enca_analyser_alloc(language);
    if (analyser) {
        enca_set_termination_strictness(analyser, 0);
        EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len);
        const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV);
        if (tmp && enc.charset != ENCA_CS_UNKNOWN)
            detected_cp = tmp;
        enca_analyser_free(analyser);
    } else {
        mp_err(log, "ENCA doesn't know language '%s'\n", language);
        size_t langcnt;
        const char **languages = enca_get_languages(&langcnt);
        mp_err(log, "ENCA supported languages:");
        for (int i = 0; i < langcnt; i++)
            mp_err(log, " %s", languages[i]);
        mp_err(log, "\n");
        free(languages);
    }

    return detected_cp;
}
示例#3
0
void *ass_guess_buffer_cp(ASS_Library *library, unsigned char *buffer,
                          int buflen, char *preferred_language,
                          char *fallback)
{
    const char **languages;
    size_t langcnt;
    EncaAnalyser analyser;
    EncaEncoding encoding;
    char *detected_sub_cp = NULL;
    int i;

    languages = enca_get_languages(&langcnt);
    ass_msg(library, MSGL_V, "ENCA supported languages");
    for (i = 0; i < langcnt; i++) {
        ass_msg(library, MSGL_V, "lang %s", languages[i]);
    }

    for (i = 0; i < langcnt; i++) {
        const char *tmp;

        if (strcasecmp(languages[i], preferred_language) != 0)
            continue;
        analyser = enca_analyser_alloc(languages[i]);
        encoding = enca_analyse_const(analyser, buffer, buflen);
        tmp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
        if (tmp && encoding.charset != ENCA_CS_UNKNOWN) {
            detected_sub_cp = strdup(tmp);
            ass_msg(library, MSGL_INFO, "ENCA detected charset: %s", tmp);
        }
        enca_analyser_free(analyser);
    }

    free(languages);

    if (!detected_sub_cp) {
        detected_sub_cp = strdup(fallback);
        ass_msg(library, MSGL_INFO,
            "ENCA detection failed: fallback to %s", fallback);
    }

    return detected_sub_cp;
}
int cepgdata2xmltv::Process(int argc, char *argv[])
{
    FILE *f=fopen("/var/lib/epgsources/epgdata2xmltv","r");
    if (!f)
    {
        esyslog("failed to open epgdata2xmltv config");
        return 1;
    }
    char *line=NULL,*lptr=NULL;
    size_t size;
    if (getline(&line,&size,f)==(ssize_t) -1)
    {
        fclose(f);
        esyslog("failed to read epgdata2xmltv config");
        return 1;
    }
    if (getline(&line,&size,f)==(ssize_t) -1)
    {
        fclose(f);
        if (line) free(line);
        esyslog("failed to read epgdata2xmltv config");
        return 1;
    }
    char *sc=strchr(line,';');
    if (sc)
    {
        *sc=0;
        sc++;
    }
    else
    {
        sc=line;
    }
    int daysmax=atoi(sc);
    if (daysmax<0) daysmax=1;
    int daysinadvance=atoi(argv[1]);
    if (daysinadvance<0) daysinadvance=1;
    if (daysinadvance>daysmax) daysinadvance=daysmax;

    bool head=false;
    char *xmlmem=NULL;

    time_t t=time(NULL);

    int carg=3;
    if (!strcmp(argv[3],"1") || !strcmp(argv[3],"0"))  carg++;

    for (int day=0; day<=daysinadvance; day++)
    {
        time_t td=t+(day*86400);
        struct tm *tm;
        tm=localtime(&td);
        char vgl[10];
        sprintf(vgl,"%04i%02i%02i",tm->tm_year+1900,tm->tm_mon+1,tm->tm_mday);

        char *dest=NULL;
        if (asprintf(&dest,"/tmp/%s_epgdata.zip",vgl)==-1)
        {
            esyslog("failed to allocate string");
            continue;
        }

        bool ok=false;
        do
        {
            bool offline=true;
            struct stat statbuf;
            if (stat(dest,&statbuf)==-1)
            {
                if (Fetch(dest,argv[2],day))
                {
                    ok=true;
                    break;
                }
                offline=false;
            }

            struct zip *zip=zip_open(dest,0,NULL);
            if (!zip)
            {
                if (offline)
                {
                    if (unlink(dest)==-1)
                    {
                        esyslog("cannot unlink %s",dest);
                        ok=true;
                        break;
                    }
                    continue;
                }
                esyslog("failed to open %s",dest);
                ok=true;
                break;
            }

            int i=zip_name_locate(zip,"qy.dtd",ZIP_FL_NOCASE);
            if (i==-1)
            {
                if (offline)
                {
                    if (unlink(dest)==-1)
                    {
                        esyslog("cannot unlink %s",dest);
                        ok=true;
                        break;
                    }
                    continue;
                }
                esyslog("failed read qy.dtd in %s",dest);
                ok=true;
                break;
            }

            struct zip_file *zfile=zip_fopen_index(zip,i,0);
            if (!zfile)
            {
                if (offline)
                {
                    if (unlink(dest)==-1)
                    {
                        esyslog("cannot unlink %s",dest);
                        ok=true;
                        break;
                    }
                    continue;
                }
                esyslog("failed to read qy.dtd from %s",dest);
                ok=true;
                break;
            }
            struct zip_stat sb;
            memset(&sb,0,sizeof(sb));
            if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1)
            {
                if (offline)
                {
                    if (unlink(dest)==-1)
                    {
                        zip_fclose(zfile);
                        esyslog("cannot unlink %s",dest);
                        ok=true;
                        break;
                    }
                    continue;
                }
                zip_fclose(zfile);
                esyslog("failed to stat qy.dtd in %s",dest);
                ok=true;
                break;
            }
            if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure
            if (dtdmem) {
                free(dtdmem);
                dtdmem=NULL;
            }
            dtdmem=(char *) malloc(sb.size+1);
            int size=zip_fread(zfile,dtdmem,sb.size);
            if (size!=sb.size)
            {
                zip_fclose(zfile);
                esyslog("failed to read qy.dtd from %s",dest);
                ok=true;
                break;
            }
            dtdmem[size]=0;
            zip_fclose(zfile);

            int entries=zip_get_num_files(zip);
            for (int i=0; i<entries; i++)
            {
                const char *name=zip_get_name(zip,i,0);
                if (strstr(name,"xml"))
                {
                    // check date of xml
                    if (strstr(name,vgl))
                    {
                        struct zip_file *zfile=zip_fopen_index(zip,i,0);
                        if (!zfile)
                        {
                            if (offline)
                            {
                                if (unlink(dest)==-1)
                                {
                                    esyslog("cannot unlink %s",dest);
                                    ok=true;
                                    break;
                                }
                                continue;
                            }
                            esyslog("failed to read %s from %s",name,dest);
                            ok=true;
                            break;
                        }

                        struct zip_stat sb;
                        memset(&sb,0,sizeof(sb));
                        if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)==-1)
                        {
                            if (offline)
                            {
                                if (unlink(dest)==-1)
                                {
                                    esyslog("cannot unlink %s",dest);
                                    ok=true;
                                    break;
                                }
                                continue;
                            }
                            esyslog("failed to stat %s in %s",name,dest);
                            ok=true;
                            break;
                        }
                        if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure
                        xmlmem=(char *) malloc(sb.size+1);
                        int size=zip_fread(zfile,xmlmem,sb.size);
                        if (size!=sb.size)
                        {
                            zip_fclose(zfile);
                            free(xmlmem);
                            xmlmem=NULL;
                            esyslog("failed to read %s from %s",name,dest);
                            ok=true;
                            break;
                        }
                        xmlmem[size]=0;
                        xmlmem=strreplace(xmlmem,"iso-8859-1","Windows-1252");
                        zip_fclose(zfile);
                        ok=true;
                        break;
                    }
                }
            }

            if (!strcmp(argv[3],"1")) {
                int entries=zip_get_num_files(zip);
                for (int i=0; i<entries; i++)
                {
                    const char *name=zip_get_name(zip,i,0);
                    if (strstr(name,"jpg")) {

                        char *destjpg;
                        if (asprintf(&destjpg,"/var/lib/epgsources/epgdata2xmltv-img/%s",name)!=-1) {
                            struct stat statbuf;
                            if (stat(destjpg,&statbuf)==-1) {
                                struct zip_file *zfile=zip_fopen_index(zip,i,0);
                                if (zfile)
                                {
                                    struct zip_stat sb;
                                    memset(&sb,0,sizeof(sb));
                                    if (zip_stat_index(zip,i,ZIP_FL_UNCHANGED,&sb)!=-1) {
                                        if (sizeof(sb.size>4)) sb.size &= 0x00FFFFFF; // just to be sure
                                        char *jpg=(char *) malloc(sb.size+1);
                                        if (jpg) {
                                            int size=zip_fread(zfile,jpg,sb.size);
                                            if (size==sb.size) {
                                                FILE *j=fopen(destjpg,"w+");
                                                if (j) {
                                                    fwrite(jpg,size,1,j);
                                                    fclose(j);
                                                }
                                            }
                                        }
                                    }
                                    zip_fclose(zfile);
                                }
                            }
                            free(destjpg);
                        }
                    }
                }
            }

            zip_close(zip);
            if (!ok)
            {
                if (offline)
                {
                    if (unlink(dest)==-1)
                    {
                        ok=true;
                        break;
                    }
                    continue;
                }
                else
                {
                    esyslog("found no valid data in %s",dest);
                    if (xmlmem) free(xmlmem);
                    xmlmem=NULL;
                    ok=true;
                    break;
                }
            }
        }
        while (ok==false);
        free(dest);

        if (!line)
        {
            line=(char *) malloc(81);
            size=80;
        }
        if (!xmlmem) continue;
        long offset=ftell(f);

        xmlDocPtr pxmlDoc;
        if (!pxsltStylesheet) LoadXSLT();
        int xmlsize=strlen(xmlmem);
        if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL)
        {
            EncaAnalyser analyser=enca_analyser_alloc("__");
            if (analyser) {
                EncaEncoding encoding=enca_analyse_const(analyser, (unsigned char *) xmlmem,xmlsize);
                const char *cs=enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
                if (cs) {
                    if (!strcmp(cs,"UTF-8")) {
                        xmlmem=strreplace(xmlmem,"Windows-1252","UTF-8");
                    } else {
                        esyslog("enca returned %s, please report!",cs);
                    }
                }
                enca_analyser_free(analyser);
            }

            string s = xmlmem;
            int reps=pcrecpp::RE("&(?![a-zA-Z]{1,8};)").GlobalReplace("%amp;",&s);
            if (reps) {
                xmlmem = (char *)realloc(xmlmem, s.size()+1);
                xmlsize = s.size();
                strcpy(xmlmem,s.c_str());
            }

            if ((pxmlDoc=xmlParseMemory(xmlmem,xmlsize))==NULL)
            {
                esyslog("failed parsing xml");
                free(xmlmem);
                xmlmem=NULL;
                continue;
            }
        }

        for (;;)
        {
            lptr=line+1;
            line[0]=' ';
            if (getline(&lptr,&size,f)==-1) break;
            char *channel=line;
            char *sc=strchr(channel,';');
            if (sc) *sc=0;

            bool use=false;
            for (int i=carg; i<argc; i++)
            {
                if (!strcasecmp(lptr,argv[i]))
                {
                    use=true;
                    break;
                }
            }

            if (use)
            {
                if (!head)
                {
                    printf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
                    printf("<tv generator-info-name=\"epgdata2xmltv\">\n");

                    for (int i=carg; i<argc; i++)
                    {
                        printf("<channel id=\"%s\">\n",argv[i]);
                        printf("<display-name lang=\"de\">%s</display-name>\n",argv[i]);
                        printf("</channel>\n");
                    }
                    head=true;
                }

                int num=atoi(sc+1);
                if (num>0)
                {
                    char *channelnum=strdup(sc+1);
                    char *lf=strchr(channelnum,10);
                    if (lf) *lf=0;
                    channel[0]='"';
                    *sc++='"';
                    *sc=0;
                    const char *params[5] =
                    {
                        "channelid", channel, "channelnum",channelnum,NULL
                    };
                    Translate(pxmlDoc,params);
                    if (channelnum) free(channelnum);
                }
            }
        }
        xmlFreeDoc (pxmlDoc);
        fseek(f,offset,SEEK_SET);
        if (dtdmem) {
            free(dtdmem);
            dtdmem=NULL;
        }
        if (xmlmem) {
            free(xmlmem);
            xmlmem=NULL;
        }
    }
    if (line) free(line);
    fclose(f);

    if (head) printf("</tv>\n");
    return head ? 0 : 1;
}
示例#5
0
/* process file named fname
   this is the `boss' function
   returns 0 on succes, 1 on failure, 2 on troubles */
static int
process_file(EncaAnalyser an,
             const char *fname)
{
  static int utf8 = ENCA_CS_UNKNOWN;
  static Buffer *buffer = NULL; /* persistent i/o buffer */
  int ot_is_convert = (options.output_type == OTYPE_CONVERT);

  EncaEncoding result; /* the guessed encoding */
  File *file; /* the processed file */

  if (!an) {
    buffer_free(buffer);
    return 0;
  }

  /* Initialize when we are called the first time. */
  if (buffer == NULL)
    buffer = buffer_new(buffer_size);

  if (!enca_charset_is_known(utf8)) {
    utf8 = enca_name_to_charset("utf8");
    assert(enca_charset_is_known(utf8));
  }

  /* Read sample. */
  file = file_new(fname, buffer);
  if (file_open(file, ot_is_convert ? "r+b" : "rb") != 0) {
    file_free(file);
    return EXIT_TROUBLE;
  }
  if (file_read(file) == -1) {
    file_free(file);
    return EXIT_TROUBLE;
  }
  if (!ot_is_convert)
    file_close(file);

  /* Guess encoding. */
  dwim_libenca_options(an, file);
  if (ot_is_convert)
    result = enca_analyse_const(an, buffer->data, buffer->pos);
  else
    result = enca_analyse(an, buffer->data, buffer->pos);

  /* Is conversion required? */
  if (ot_is_convert) {
    int err = 0;

    if (enca_charset_is_known(result.charset))
      err = convert(file, result);
    else {
      if (enca_errno(an) != ENCA_EEMPTY) {
        fprintf(stderr, "%s: Cannot convert `%s' from unknown encoding\n",
                        program_name,
                        ffname_r(file->name));
      }
      /* Copy stdin to stdout unchanged. */
      if (file->name == NULL)
        err = copy_and_convert(file, file, NULL);
    }

    file_free(file);
    if ((err == ERR_OK && !enca_charset_is_known(result.charset)
         && enca_errno(an) != ENCA_EEMPTY)
        || err == ERR_CANNOT)
      return 1;

    return (err == ERR_OK) ? EXIT_SUCCESS : EXIT_TROUBLE;
  }

  /* Print results. */
  print_results(file->name, an, result, enca_errno(an));
  if (result.charset == utf8)
    double_utf8_chk(an, buffer->data, buffer->pos);

  file_free(file);

  return enca_charset_is_known(result.charset) ? EXIT_SUCCESS : EXIT_FAILURE;
}