예제 #1
0
static String HHVM_STATIC_METHOD(Locale, lookup, CArrRef langtag,
                                 const String& locale,
                                 bool canonicalize, const String& def) {
  String locname(localeOrDefault(locale), CopyString);
  std::vector<std::pair<String,String>> cur_arr;
  for (ArrayIter iter(langtag); iter; ++iter) {
    auto val = iter.second();
    if (!val.isString()) {
      s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: "
                             "locale array element is not a string");
      return def;
    }
    String normalized(val.toString(), CopyString);
    normalize_for_match(normalized);
    if (canonicalize) {
      normalized = get_icu_value(normalized, LOC_CANONICALIZE);
      if (normalized.isNull()) {
        s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: "
                               "unable to canonicalize lang_tag");
        return def;
      }
      normalize_for_match(normalized);
    }
    cur_arr.push_back(std::make_pair(val,normalized));
  }

  if (canonicalize) {
    locname = get_icu_value(locname, LOC_CANONICALIZE);
    if (locname.isNull()) {
      s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: "
                             "unable to canonicalize loc_range");
      return def;
    }
  }

  normalize_for_match(locname);
  while (locname.size() > 0) {
    for (auto &p : cur_arr) {
      if (locname.same(p.second)) {
        return canonicalize ? p.second : p.first;
      }
    }
    locname = locale_suffix_strip(locname);
  }
  return def;
}
예제 #2
0
static Variant get_icu_display_value(const String &locale,
                                     const String &disp_locale,
                                     LocaleTag tag) {
  String locname(locale);
  if (tag != LOC_DISPLAY) {
    int ofs = getGrandfatheredOffset(locale);
    if (ofs >= 0) {
      if (tag == LOC_LANG) {
        locname = getGrandfatheredPreferred(ofs);
      } else {
        return false;
      }
    }
  }

  int32_t (*ulocfunc)(const char *loc, const char *dloc,
                      UChar *dest, int32_t destcap, UErrorCode *err);
  switch (tag) {
    case LOC_LANG:     ulocfunc = uloc_getDisplayLanguage; break;
    case LOC_SCRIPT:   ulocfunc = uloc_getDisplayScript;   break;
    case LOC_REGION:   ulocfunc = uloc_getDisplayCountry;  break;
    case LOC_VARIANT:  ulocfunc = uloc_getDisplayVariant;  break;
    case LOC_DISPLAY:  ulocfunc = uloc_getDisplayName;     break;
    default:
      assert(false);
      return false;
  }

  icu::UnicodeString buf;
  auto ubuf = buf.getBuffer(64);
  do {
    UErrorCode error = U_ZERO_ERROR;
    int32_t len = ulocfunc(locname.c_str(), disp_locale.c_str(),
                           ubuf, buf.getCapacity(), &error);
    if (error != U_BUFFER_OVERFLOW_ERROR &&
        error != U_STRING_NOT_TERMINATED_WARNING) {
      if (U_FAILURE(error)) {
        s_intl_error->setError(error, "locale_get_display_%s : unable to "
                                      "get locale %s",
                                      LocaleName(tag).c_str(),
                                      LocaleName(tag).c_str());
        return false;
      }
      buf.releaseBuffer(len);

      error = U_ZERO_ERROR;
      String out(u8(buf, error));
      if (U_FAILURE(error)) {
        s_intl_error->setError(error, "Unable to convert result from "
                                      "locale_get_display_%s to UTF-8",
                                      LocaleName(tag).c_str());
        return false;
      }
      return out;
    }
    if (len <= buf.getCapacity()) {
      // Avoid infinite loop
      buf.releaseBuffer(0);
      s_intl_error->setError(U_INTERNAL_PROGRAM_ERROR,
                             "Got invalid response from ICU");
      return false;
    }

    // Grow the buffer to sufficient size
    buf.releaseBuffer(0);
    ubuf = buf.getBuffer(len);
  } while (true);

  not_reached();
  return false;
}
예제 #3
0
파일: gffread.cpp 프로젝트: xiongxu/gffread
int main(int argc, char * const argv[]) {
 GArgs args(argc, argv, 
   "debug;merge;cluster-only;help;force-exons;no-pseudo;MINCOV=MINPID=hvOUNHWCVJMKQNSXTDAPRZFGLEm:g:i:r:s:t:a:b:o:w:x:y:d:");
 args.printError(USAGE, true);
 if (args.getOpt('h') || args.getOpt("help")) {
    GMessage("%s",USAGE);
    exit(1);
    }
 debugMode=(args.getOpt("debug")!=NULL);
 decodeChars=(args.getOpt('D')!=NULL);
 forceExons=(args.getOpt("force-exons")!=NULL);
 NoPseudo=(args.getOpt("no-pseudo")!=NULL);
 mRNAOnly=(args.getOpt('O')==NULL);
 //sortByLoc=(args.getOpt('S')!=NULL);
 addDescr=(args.getOpt('A')!=NULL);
 verbose=(args.getOpt('v')!=NULL);
 wCDSonly=(args.getOpt('C')!=NULL);
 validCDSonly=(args.getOpt('V')!=NULL);
 altPhases=(args.getOpt('H')!=NULL);
 fmtGTF=(args.getOpt('T')!=NULL); //switch output format to GTF
 bothStrands=(args.getOpt('B')!=NULL);
 fullCDSonly=(args.getOpt('J')!=NULL);
 spliceCheck=(args.getOpt('N')!=NULL);
 bool matchAllIntrons=(args.getOpt('K')==NULL);
 bool fuzzSpan=(args.getOpt('Q')!=NULL);
 if (args.getOpt('M') || args.getOpt("merge")) {
    doCluster=true;
    doCollapseRedundant=true;
    }
   else {
    if (!matchAllIntrons || fuzzSpan) {
      GMessage("%s",USAGE);
      GMessage("Error: -K or -Q options require -M/--merge option!\n");
      exit(1);
      }
    }
 if (args.getOpt("cluster-only")) {
    doCluster=true;
    doCollapseRedundant=false;
    if (!matchAllIntrons || fuzzSpan) {
      GMessage("%s",USAGE);
      GMessage("Error: -K or -Q options have no effect with --cluster-only.\n");
      exit(1);
      }
    }
 if (fullCDSonly) validCDSonly=true;
 if (verbose) { 
     fprintf(stderr, "Command line was:\n");
     args.printCmdLine(stderr);
     }

 fullattr=(args.getOpt('F')!=NULL);
 if (args.getOpt('G')==NULL) 
    noExonAttr=!fullattr;
   else {
     noExonAttr=true;
     fullattr=true;
     }
 if (NoPseudo && !fullattr) {
	 noExonAttr=true;
	 fullattr=true;
 }
 ensembl_convert=(args.getOpt('L')!=NULL);
 if (ensembl_convert) {
    fullattr=true;
    noExonAttr=false;
    //sortByLoc=true;
    }
    
 mergeCloseExons=(args.getOpt('Z')!=NULL);
 multiExon=(args.getOpt('U')!=NULL);
 writeExonSegs=(args.getOpt('W')!=NULL);
 tracklabel=args.getOpt('t');
 GFastaDb gfasta(args.getOpt('g'));
 //if (gfasta.fastaPath!=NULL)
 //    sortByLoc=true; //enforce sorting by chromosome/contig
 GStr s=args.getOpt('i');
 if (!s.is_empty()) maxintron=s.asInt();
 
 FILE* f_repl=NULL;
 s=args.getOpt('d');
 if (!s.is_empty()) {
   if (s=="-") f_repl=stdout;
     else {
       f_repl=fopen(s.chars(), "w");
       if (f_repl==NULL) GError("Error creating file %s\n", s.chars());
       }
   }
 
 rfltWithin=(args.getOpt('R')!=NULL);
 s=args.getOpt('r');
 if (!s.is_empty()) {
   s.trim();
   if (s[0]=='+' || s[0]=='-') {
     rfltStrand=s[0];
     s.cut(0,1);
     }
   int isep=s.index(':');
   if (isep>0) { //gseq name given
      if (rfltStrand==0 && (s[isep-1]=='+' || s[isep-1]=='-')) {
        isep--;
        rfltStrand=s[isep];
        s.cut(isep,1);
        }
      if (isep>0) 
          rfltGSeq=Gstrdup((s.substr(0,isep)).chars());
      s.cut(0,isep+1);
      }
   GStr gsend;
   char slast=s[s.length()-1];
   if (rfltStrand==0 && (slast=='+' || slast=='-')) {
      s.chomp(slast);
      rfltStrand=slast;
      }
   if (s.index("..")>=0) gsend=s.split("..");
                    else gsend=s.split('-');
   if (!s.is_empty()) rfltStart=(uint)s.asInt();
   if (!gsend.is_empty()) {
      rfltEnd=(uint)gsend.asInt();
      if (rfltEnd==0) rfltEnd=MAX_UINT;
      }
   } //gseq/range filtering
 else {
   if (rfltWithin)
     GError("Error: option -R requires -r!\n");
   //if (rfltWholeTranscript)
   //  GError("Error: option -P requires -r!\n");
   }
 s=args.getOpt('m');
 if (!s.is_empty()) {
   FILE* ft=fopen(s,"r");
   if (ft==NULL) GError("Error opening reference table: %s\n",s.chars());
   loadRefTable(ft, reftbl);
   fclose(ft);
   }
 s=args.getOpt('s');
 if (!s.is_empty()) {
   FILE* fsize=fopen(s,"r");
   if (fsize==NULL) GError("Error opening info file: %s\n",s.chars());
   loadSeqInfo(fsize, seqinfo);
   fclose(fsize);
   }

 openfw(f_out, args, 'o');
 //if (f_out==NULL) f_out=stdout;
 if (gfasta.fastaPath==NULL && (validCDSonly || spliceCheck || args.getOpt('w')!=NULL || args.getOpt('x')!=NULL || args.getOpt('y')!=NULL))
  GError("Error: -g option is required for options -w, -x, -y, -V, -N, -M !\n");

 openfw(f_w, args, 'w');
 openfw(f_x, args, 'x');
 openfw(f_y, args, 'y');
 if (f_y!=NULL || f_x!=NULL) wCDSonly=true;
 //useBadCDS=useBadCDS || (fgtfok==NULL && fgtfbad==NULL && f_y==NULL && f_x==NULL);
 
 int numfiles = args.startNonOpt();
 //GList<GffObj> gfkept(false,true); //unsorted, free items on delete
 int out_counter=0; //number of records printed
 while (true) {
   GStr infile;
   if (numfiles) {
          infile=args.nextNonOpt();
          if (infile.is_empty()) break;
          if (infile=="-") { f_in=stdin; infile="stdin"; }
               else 
                 if ((f_in=fopen(infile, "r"))==NULL)
                    GError("Error: cannot open input file %s!\n",infile.chars());
          }
        else 
          infile="-";
   GffLoader gffloader(infile.chars());
   gffloader.transcriptsOnly=mRNAOnly;
   gffloader.fullAttributes=fullattr;
   gffloader.noExonAttrs=noExonAttr;
   gffloader.mergeCloseExons=mergeCloseExons;
   gffloader.showWarnings=(args.getOpt('E')!=NULL);
   gffloader.noPseudo=NoPseudo;
   gffloader.load(g_data, &validateGffRec, doCluster, doCollapseRedundant, 
                             matchAllIntrons, fuzzSpan, forceExons);
   if (doCluster) 
     collectLocusData(g_data);
   if (numfiles==0) break;
   }
   
 GStr loctrack("gffcl");
 if (tracklabel) loctrack=tracklabel;
 g_data.setSorted(&gseqCmpName);
 GffPrintMode exonPrinting;
 if (fmtGTF) {
	 exonPrinting = pgtfAny;
 } else {
	 exonPrinting = forceExons ? pgffBoth : pgffAny;
 }
 bool firstGff3Print=!fmtGTF;
 if (doCluster) {
   //grouped in loci
   for (int g=0;g<g_data.Count();g++) {
     GenomicSeqData* gdata=g_data[g];
     int gfs_i=0;
     for (int l=0;l<gdata->loci.Count();l++) {
       GffLocus& loc=*(gdata->loci[l]);
       //check all non-replaced transcripts in this locus:
       int numvalid=0;
       int idxfirstvalid=-1;
       for (int i=0;i<loc.rnas.Count();i++) {
         GffObj& t=*(loc.rnas[i]);
         if (f_out) {
          while (gfs_i<gdata->gfs.Count() && gdata->gfs[gfs_i]->start<=t.start) {
             GffObj& gfst=*(gdata->gfs[gfs_i]);
             if ((gfst.udata&4)==0) { //never printed
               gfst.udata|=4;
               if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
               if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons)
                gfst.addExon(gfst.start,gfst.end);
               gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
               }
             ++gfs_i;
          }
         }
         GTData* tdata=(GTData*)(t.uptr);
         if (tdata->replaced_by!=NULL) {
            if (f_repl && (t.udata & 8)==0) {
               //t.udata|=8;
               fprintf(f_repl, "%s", t.getID());
               GTData* rby=tdata;
               while (rby->replaced_by!=NULL) {
                  fprintf(f_repl," => %s", rby->replaced_by->getID());
                  rby->rna->udata|=8;
                  rby=(GTData*)(rby->replaced_by->uptr);
                  }
               fprintf(f_repl, "\n");
               }
            continue;
            }
         if (process_transcript(gfasta, t)) {
             t.udata|=4; //tag it as valid
             numvalid++;
             if (idxfirstvalid<0) idxfirstvalid=i;
             }
         }
       if (f_out && numvalid>0) {
         GStr locname("RLOC_");
         locname.appendfmt("%08d",loc.locus_num);
         if (!fmtGTF) {
           if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
           fprintf(f_out,"%s\t%s\tlocus\t%d\t%d\t.\t%c\t.\tID=%s;locus=%s",
                    loc.rnas[0]->getGSeqName(), loctrack.chars(), loc.start, loc.end, loc.strand,
                     locname.chars(), locname.chars());
           //const char* loc_gname=loc.getGeneName();
           if (loc.gene_names.Count()>0) { //print all gene names associated to this locus
              fprintf(f_out, ";genes=%s",loc.gene_names.First()->name.chars());
              for (int i=1;i<loc.gene_names.Count();i++) {
                fprintf(f_out, ",%s",loc.gene_names[i]->name.chars());
                }
              }
           if (loc.gene_ids.Count()>0) { //print all GeneIDs names associated to this locus
              fprintf(f_out, ";geneIDs=%s",loc.gene_ids.First()->name.chars());
              for (int i=1;i<loc.gene_ids.Count();i++) {
                fprintf(f_out, ",%s",loc.gene_ids[i]->name.chars());
                }
              }
           fprintf(f_out, ";transcripts=%s",loc.rnas[idxfirstvalid]->getID());
           for (int i=idxfirstvalid+1;i<loc.rnas.Count();i++) {
              fprintf(f_out, ",%s",loc.rnas[i]->getID());
              }
           fprintf(f_out, "\n");
           }
         //now print all valid, non-replaced transcripts in this locus:
         for (int i=0;i<loc.rnas.Count();i++) {
           GffObj& t=*(loc.rnas[i]);
           GTData* tdata=(GTData*)(t.uptr);
           if (tdata->replaced_by!=NULL || ((t.udata & 4)==0)) continue;
           t.addAttr("locus", locname.chars());
           out_counter++;
           if (fmtGTF) t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
               else {
                if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
                //print the parent first, if any
                if (t.parent!=NULL && ((t.parent->udata & 4)==0)) {
                    GTData* pdata=(GTData*)(t.parent->uptr);
                    if (pdata && pdata->geneinfo!=NULL) 
                         pdata->geneinfo->finalize();
                    t.parent->addAttr("locus", locname.chars());
                    t.parent->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
                    t.parent->udata|=4;
                    }
                t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
                }
            }
          } //have valid transcripts to print
       }//for each locus
     //print the rest of the isolated pseudo/gene/region features not printed yet
     if (f_out) {
      while (gfs_i<gdata->gfs.Count()) {
         GffObj& gfst=*(gdata->gfs[gfs_i]);
         if ((gfst.udata&4)==0) { //never printed
           gfst.udata|=4;
           if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
           if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons)
             gfst.addExon(gfst.start,gfst.end);
           gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
           }
         ++gfs_i;
      }
     }
    } //for each genomic sequence
   }
  else {
   //not grouped into loci, print the rnas with their parents, if any
   int numvalid=0;
   for (int g=0;g<g_data.Count();g++) {
     GenomicSeqData* gdata=g_data[g];
     int gfs_i=0;
     for (int m=0;m<gdata->rnas.Count();m++) {
        GffObj& t=*(gdata->rnas[m]);
        if (f_out) {
         while (gfs_i<gdata->gfs.Count() && gdata->gfs[gfs_i]->start<=t.start) {
            GffObj& gfst=*(gdata->gfs[gfs_i]);
            if ((gfst.udata&4)==0) { //never printed
              gfst.udata|=4;
              if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
              if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons)
               gfst.addExon(gfst.start,gfst.end);
              gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
              }
            ++gfs_i;
         }
        }
        GTData* tdata=(GTData*)(t.uptr);
        if (tdata->replaced_by!=NULL) continue;
        if (process_transcript(gfasta, t)) {
           t.udata|=4; //tag it as valid
           numvalid++;
           if (f_out) {
             if (tdata->geneinfo) tdata->geneinfo->finalize();
             out_counter++;
             if (fmtGTF) t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
               else {
                if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
                //print the parent first, if any
                if (t.parent!=NULL && ((t.parent->udata & 4)==0)) {
                    GTData* pdata=(GTData*)(t.parent->uptr);
                    if (pdata && pdata->geneinfo!=NULL) 
                         pdata->geneinfo->finalize();
                    t.parent->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
                    t.parent->udata|=4;
                    }
                t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
                }
             }//GFF/GTF output requested
           } //valid transcript
        } //for each rna
     //print the rest of the isolated pseudo/gene/region features not printed yet
     if (f_out) {
      while (gfs_i<gdata->gfs.Count()) {
         GffObj& gfst=*(gdata->gfs[gfs_i]);
         if ((gfst.udata&4)==0) { //never printed
           gfst.udata|=4;
           if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; }
           if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons)
            gfst.addExon(gfst.start,gfst.end);
           gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
           }
         ++gfs_i;
      }
     }
    } //for each genomic seq
   } //not clustered
 if (f_repl && f_repl!=stdout) fclose(f_repl);
 seqinfo.Clear();
 //if (faseq!=NULL) delete faseq;
 //if (gcdb!=NULL) delete gcdb;
 GFREE(rfltGSeq);
 FRCLOSE(f_in);
 FWCLOSE(f_out);
 FWCLOSE(f_w);
 FWCLOSE(f_x);
 FWCLOSE(f_y);
 }