static String HHVM_STATIC_METHOD(Locale, lookup, CArrRef langtag, const String& locale, bool canonicalize, const String& def) { String locname(localeOrDefault(locale), CopyString); std::vector<std::pair<String,String>> cur_arr; for (ArrayIter iter(langtag); iter; ++iter) { auto val = iter.second(); if (!val.isString()) { s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: " "locale array element is not a string"); return def; } String normalized(val.toString(), CopyString); normalize_for_match(normalized); if (canonicalize) { normalized = get_icu_value(normalized, LOC_CANONICALIZE); if (normalized.isNull()) { s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: " "unable to canonicalize lang_tag"); return def; } normalize_for_match(normalized); } cur_arr.push_back(std::make_pair(val,normalized)); } if (canonicalize) { locname = get_icu_value(locname, LOC_CANONICALIZE); if (locname.isNull()) { s_intl_error->setError(U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: " "unable to canonicalize loc_range"); return def; } } normalize_for_match(locname); while (locname.size() > 0) { for (auto &p : cur_arr) { if (locname.same(p.second)) { return canonicalize ? p.second : p.first; } } locname = locale_suffix_strip(locname); } return def; }
static Variant get_icu_display_value(const String &locale, const String &disp_locale, LocaleTag tag) { String locname(locale); if (tag != LOC_DISPLAY) { int ofs = getGrandfatheredOffset(locale); if (ofs >= 0) { if (tag == LOC_LANG) { locname = getGrandfatheredPreferred(ofs); } else { return false; } } } int32_t (*ulocfunc)(const char *loc, const char *dloc, UChar *dest, int32_t destcap, UErrorCode *err); switch (tag) { case LOC_LANG: ulocfunc = uloc_getDisplayLanguage; break; case LOC_SCRIPT: ulocfunc = uloc_getDisplayScript; break; case LOC_REGION: ulocfunc = uloc_getDisplayCountry; break; case LOC_VARIANT: ulocfunc = uloc_getDisplayVariant; break; case LOC_DISPLAY: ulocfunc = uloc_getDisplayName; break; default: assert(false); return false; } icu::UnicodeString buf; auto ubuf = buf.getBuffer(64); do { UErrorCode error = U_ZERO_ERROR; int32_t len = ulocfunc(locname.c_str(), disp_locale.c_str(), ubuf, buf.getCapacity(), &error); if (error != U_BUFFER_OVERFLOW_ERROR && error != U_STRING_NOT_TERMINATED_WARNING) { if (U_FAILURE(error)) { s_intl_error->setError(error, "locale_get_display_%s : unable to " "get locale %s", LocaleName(tag).c_str(), LocaleName(tag).c_str()); return false; } buf.releaseBuffer(len); error = U_ZERO_ERROR; String out(u8(buf, error)); if (U_FAILURE(error)) { s_intl_error->setError(error, "Unable to convert result from " "locale_get_display_%s to UTF-8", LocaleName(tag).c_str()); return false; } return out; } if (len <= buf.getCapacity()) { // Avoid infinite loop buf.releaseBuffer(0); s_intl_error->setError(U_INTERNAL_PROGRAM_ERROR, "Got invalid response from ICU"); return false; } // Grow the buffer to sufficient size buf.releaseBuffer(0); ubuf = buf.getBuffer(len); } while (true); not_reached(); return false; }
int main(int argc, char * const argv[]) { GArgs args(argc, argv, "debug;merge;cluster-only;help;force-exons;no-pseudo;MINCOV=MINPID=hvOUNHWCVJMKQNSXTDAPRZFGLEm:g:i:r:s:t:a:b:o:w:x:y:d:"); args.printError(USAGE, true); if (args.getOpt('h') || args.getOpt("help")) { GMessage("%s",USAGE); exit(1); } debugMode=(args.getOpt("debug")!=NULL); decodeChars=(args.getOpt('D')!=NULL); forceExons=(args.getOpt("force-exons")!=NULL); NoPseudo=(args.getOpt("no-pseudo")!=NULL); mRNAOnly=(args.getOpt('O')==NULL); //sortByLoc=(args.getOpt('S')!=NULL); addDescr=(args.getOpt('A')!=NULL); verbose=(args.getOpt('v')!=NULL); wCDSonly=(args.getOpt('C')!=NULL); validCDSonly=(args.getOpt('V')!=NULL); altPhases=(args.getOpt('H')!=NULL); fmtGTF=(args.getOpt('T')!=NULL); //switch output format to GTF bothStrands=(args.getOpt('B')!=NULL); fullCDSonly=(args.getOpt('J')!=NULL); spliceCheck=(args.getOpt('N')!=NULL); bool matchAllIntrons=(args.getOpt('K')==NULL); bool fuzzSpan=(args.getOpt('Q')!=NULL); if (args.getOpt('M') || args.getOpt("merge")) { doCluster=true; doCollapseRedundant=true; } else { if (!matchAllIntrons || fuzzSpan) { GMessage("%s",USAGE); GMessage("Error: -K or -Q options require -M/--merge option!\n"); exit(1); } } if (args.getOpt("cluster-only")) { doCluster=true; doCollapseRedundant=false; if (!matchAllIntrons || fuzzSpan) { GMessage("%s",USAGE); GMessage("Error: -K or -Q options have no effect with --cluster-only.\n"); exit(1); } } if (fullCDSonly) validCDSonly=true; if (verbose) { fprintf(stderr, "Command line was:\n"); args.printCmdLine(stderr); } fullattr=(args.getOpt('F')!=NULL); if (args.getOpt('G')==NULL) noExonAttr=!fullattr; else { noExonAttr=true; fullattr=true; } if (NoPseudo && !fullattr) { noExonAttr=true; fullattr=true; } ensembl_convert=(args.getOpt('L')!=NULL); if (ensembl_convert) { fullattr=true; noExonAttr=false; //sortByLoc=true; } mergeCloseExons=(args.getOpt('Z')!=NULL); multiExon=(args.getOpt('U')!=NULL); writeExonSegs=(args.getOpt('W')!=NULL); tracklabel=args.getOpt('t'); GFastaDb gfasta(args.getOpt('g')); //if (gfasta.fastaPath!=NULL) // sortByLoc=true; //enforce sorting by chromosome/contig GStr s=args.getOpt('i'); if (!s.is_empty()) maxintron=s.asInt(); FILE* f_repl=NULL; s=args.getOpt('d'); if (!s.is_empty()) { if (s=="-") f_repl=stdout; else { f_repl=fopen(s.chars(), "w"); if (f_repl==NULL) GError("Error creating file %s\n", s.chars()); } } rfltWithin=(args.getOpt('R')!=NULL); s=args.getOpt('r'); if (!s.is_empty()) { s.trim(); if (s[0]=='+' || s[0]=='-') { rfltStrand=s[0]; s.cut(0,1); } int isep=s.index(':'); if (isep>0) { //gseq name given if (rfltStrand==0 && (s[isep-1]=='+' || s[isep-1]=='-')) { isep--; rfltStrand=s[isep]; s.cut(isep,1); } if (isep>0) rfltGSeq=Gstrdup((s.substr(0,isep)).chars()); s.cut(0,isep+1); } GStr gsend; char slast=s[s.length()-1]; if (rfltStrand==0 && (slast=='+' || slast=='-')) { s.chomp(slast); rfltStrand=slast; } if (s.index("..")>=0) gsend=s.split(".."); else gsend=s.split('-'); if (!s.is_empty()) rfltStart=(uint)s.asInt(); if (!gsend.is_empty()) { rfltEnd=(uint)gsend.asInt(); if (rfltEnd==0) rfltEnd=MAX_UINT; } } //gseq/range filtering else { if (rfltWithin) GError("Error: option -R requires -r!\n"); //if (rfltWholeTranscript) // GError("Error: option -P requires -r!\n"); } s=args.getOpt('m'); if (!s.is_empty()) { FILE* ft=fopen(s,"r"); if (ft==NULL) GError("Error opening reference table: %s\n",s.chars()); loadRefTable(ft, reftbl); fclose(ft); } s=args.getOpt('s'); if (!s.is_empty()) { FILE* fsize=fopen(s,"r"); if (fsize==NULL) GError("Error opening info file: %s\n",s.chars()); loadSeqInfo(fsize, seqinfo); fclose(fsize); } openfw(f_out, args, 'o'); //if (f_out==NULL) f_out=stdout; if (gfasta.fastaPath==NULL && (validCDSonly || spliceCheck || args.getOpt('w')!=NULL || args.getOpt('x')!=NULL || args.getOpt('y')!=NULL)) GError("Error: -g option is required for options -w, -x, -y, -V, -N, -M !\n"); openfw(f_w, args, 'w'); openfw(f_x, args, 'x'); openfw(f_y, args, 'y'); if (f_y!=NULL || f_x!=NULL) wCDSonly=true; //useBadCDS=useBadCDS || (fgtfok==NULL && fgtfbad==NULL && f_y==NULL && f_x==NULL); int numfiles = args.startNonOpt(); //GList<GffObj> gfkept(false,true); //unsorted, free items on delete int out_counter=0; //number of records printed while (true) { GStr infile; if (numfiles) { infile=args.nextNonOpt(); if (infile.is_empty()) break; if (infile=="-") { f_in=stdin; infile="stdin"; } else if ((f_in=fopen(infile, "r"))==NULL) GError("Error: cannot open input file %s!\n",infile.chars()); } else infile="-"; GffLoader gffloader(infile.chars()); gffloader.transcriptsOnly=mRNAOnly; gffloader.fullAttributes=fullattr; gffloader.noExonAttrs=noExonAttr; gffloader.mergeCloseExons=mergeCloseExons; gffloader.showWarnings=(args.getOpt('E')!=NULL); gffloader.noPseudo=NoPseudo; gffloader.load(g_data, &validateGffRec, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan, forceExons); if (doCluster) collectLocusData(g_data); if (numfiles==0) break; } GStr loctrack("gffcl"); if (tracklabel) loctrack=tracklabel; g_data.setSorted(&gseqCmpName); GffPrintMode exonPrinting; if (fmtGTF) { exonPrinting = pgtfAny; } else { exonPrinting = forceExons ? pgffBoth : pgffAny; } bool firstGff3Print=!fmtGTF; if (doCluster) { //grouped in loci for (int g=0;g<g_data.Count();g++) { GenomicSeqData* gdata=g_data[g]; int gfs_i=0; for (int l=0;l<gdata->loci.Count();l++) { GffLocus& loc=*(gdata->loci[l]); //check all non-replaced transcripts in this locus: int numvalid=0; int idxfirstvalid=-1; for (int i=0;i<loc.rnas.Count();i++) { GffObj& t=*(loc.rnas[i]); if (f_out) { while (gfs_i<gdata->gfs.Count() && gdata->gfs[gfs_i]->start<=t.start) { GffObj& gfst=*(gdata->gfs[gfs_i]); if ((gfst.udata&4)==0) { //never printed gfst.udata|=4; if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons) gfst.addExon(gfst.start,gfst.end); gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } ++gfs_i; } } GTData* tdata=(GTData*)(t.uptr); if (tdata->replaced_by!=NULL) { if (f_repl && (t.udata & 8)==0) { //t.udata|=8; fprintf(f_repl, "%s", t.getID()); GTData* rby=tdata; while (rby->replaced_by!=NULL) { fprintf(f_repl," => %s", rby->replaced_by->getID()); rby->rna->udata|=8; rby=(GTData*)(rby->replaced_by->uptr); } fprintf(f_repl, "\n"); } continue; } if (process_transcript(gfasta, t)) { t.udata|=4; //tag it as valid numvalid++; if (idxfirstvalid<0) idxfirstvalid=i; } } if (f_out && numvalid>0) { GStr locname("RLOC_"); locname.appendfmt("%08d",loc.locus_num); if (!fmtGTF) { if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } fprintf(f_out,"%s\t%s\tlocus\t%d\t%d\t.\t%c\t.\tID=%s;locus=%s", loc.rnas[0]->getGSeqName(), loctrack.chars(), loc.start, loc.end, loc.strand, locname.chars(), locname.chars()); //const char* loc_gname=loc.getGeneName(); if (loc.gene_names.Count()>0) { //print all gene names associated to this locus fprintf(f_out, ";genes=%s",loc.gene_names.First()->name.chars()); for (int i=1;i<loc.gene_names.Count();i++) { fprintf(f_out, ",%s",loc.gene_names[i]->name.chars()); } } if (loc.gene_ids.Count()>0) { //print all GeneIDs names associated to this locus fprintf(f_out, ";geneIDs=%s",loc.gene_ids.First()->name.chars()); for (int i=1;i<loc.gene_ids.Count();i++) { fprintf(f_out, ",%s",loc.gene_ids[i]->name.chars()); } } fprintf(f_out, ";transcripts=%s",loc.rnas[idxfirstvalid]->getID()); for (int i=idxfirstvalid+1;i<loc.rnas.Count();i++) { fprintf(f_out, ",%s",loc.rnas[i]->getID()); } fprintf(f_out, "\n"); } //now print all valid, non-replaced transcripts in this locus: for (int i=0;i<loc.rnas.Count();i++) { GffObj& t=*(loc.rnas[i]); GTData* tdata=(GTData*)(t.uptr); if (tdata->replaced_by!=NULL || ((t.udata & 4)==0)) continue; t.addAttr("locus", locname.chars()); out_counter++; if (fmtGTF) t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); else { if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } //print the parent first, if any if (t.parent!=NULL && ((t.parent->udata & 4)==0)) { GTData* pdata=(GTData*)(t.parent->uptr); if (pdata && pdata->geneinfo!=NULL) pdata->geneinfo->finalize(); t.parent->addAttr("locus", locname.chars()); t.parent->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); t.parent->udata|=4; } t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } } } //have valid transcripts to print }//for each locus //print the rest of the isolated pseudo/gene/region features not printed yet if (f_out) { while (gfs_i<gdata->gfs.Count()) { GffObj& gfst=*(gdata->gfs[gfs_i]); if ((gfst.udata&4)==0) { //never printed gfst.udata|=4; if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons) gfst.addExon(gfst.start,gfst.end); gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } ++gfs_i; } } } //for each genomic sequence } else { //not grouped into loci, print the rnas with their parents, if any int numvalid=0; for (int g=0;g<g_data.Count();g++) { GenomicSeqData* gdata=g_data[g]; int gfs_i=0; for (int m=0;m<gdata->rnas.Count();m++) { GffObj& t=*(gdata->rnas[m]); if (f_out) { while (gfs_i<gdata->gfs.Count() && gdata->gfs[gfs_i]->start<=t.start) { GffObj& gfst=*(gdata->gfs[gfs_i]); if ((gfst.udata&4)==0) { //never printed gfst.udata|=4; if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons) gfst.addExon(gfst.start,gfst.end); gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } ++gfs_i; } } GTData* tdata=(GTData*)(t.uptr); if (tdata->replaced_by!=NULL) continue; if (process_transcript(gfasta, t)) { t.udata|=4; //tag it as valid numvalid++; if (f_out) { if (tdata->geneinfo) tdata->geneinfo->finalize(); out_counter++; if (fmtGTF) t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); else { if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } //print the parent first, if any if (t.parent!=NULL && ((t.parent->udata & 4)==0)) { GTData* pdata=(GTData*)(t.parent->uptr); if (pdata && pdata->geneinfo!=NULL) pdata->geneinfo->finalize(); t.parent->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); t.parent->udata|=4; } t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } }//GFF/GTF output requested } //valid transcript } //for each rna //print the rest of the isolated pseudo/gene/region features not printed yet if (f_out) { while (gfs_i<gdata->gfs.Count()) { GffObj& gfst=*(gdata->gfs[gfs_i]); if ((gfst.udata&4)==0) { //never printed gfst.udata|=4; if (firstGff3Print) { printGff3Header(f_out, args);firstGff3Print=false; } if (gfst.exons.Count()==0 && gfst.children.Count()==0 && forceExons) gfst.addExon(gfst.start,gfst.end); gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } ++gfs_i; } } } //for each genomic seq } //not clustered if (f_repl && f_repl!=stdout) fclose(f_repl); seqinfo.Clear(); //if (faseq!=NULL) delete faseq; //if (gcdb!=NULL) delete gcdb; GFREE(rfltGSeq); FRCLOSE(f_in); FWCLOSE(f_out); FWCLOSE(f_w); FWCLOSE(f_x); FWCLOSE(f_y); }