SEXP r_cgiParameters() { int i; char *name; Stringa value=stringCreate(16); Texta keys=textCreate(8); Texta values=textCreate(8); SEXP r_keys, r_values; cgiGetInit(); while(name = cgiGetNext(value)) { textAdd(keys, name); textAdd(values, string(value)); } int n=arrayMax(keys); PROTECT(r_keys=allocVector(STRSXP, n)); PROTECT(r_values=allocVector(STRSXP, n)); for(i=0; i<n; ++i) { SET_STRING_ELT(r_keys, i, mkChar(textItem(keys,i))); SET_STRING_ELT(r_values, i, mkChar(textItem(values,i))); } setNames(r_values, r_keys); stringDestroy(value); textDestroy(keys); textDestroy(values); UNPROTECT(2); return(r_values); }
SEXP r_cgiParam(SEXP r_param, SEXP ignore_case, SEXP r_default) { if(r_param == R_NilValue) return(R_NilValue); char* name; Stringa value=stringCreate(16); char *param=cStr(r_param); char *str=NULL; SEXP res; int (*fPtr)(char*, char*); fPtr=cBool(ignore_case) ? &myStrCaseEqual : &myStrEqual; cgiGetInit(); while(name = cgiGetNext(value)) { if((*fPtr)(name, param)) { str=hlr_strdup(string(value)); break; } } stringDestroy(value); if(str) { return mkString(str); } else { return r_default; } }
int main (int argc, char *argv[]) { LineStream ls; char *line; char *pos; Stringa buffer; if (argc != 2) { usage ("%s <file.intraOffsets>"); } TH1 *his = new TH1D ("","Intra-read distribution",1000,0,1000); TCanvas *canv = new TCanvas("","canvas",1200,400); ls = ls_createFromFile (argv[1]); while (line = ls_nextLine (ls)) { his->Fill (atoi (line)); } ls_destroy (ls); his->Draw(); his->GetXaxis()->SetLabelSize (0.04); his->GetYaxis()->SetLabelSize (0.04); buffer = stringCreate (100); pos = strchr (argv[1],'.'); if (pos == NULL) { die ("Expected <file.intraOffsets>: %s",argv[1]); } *pos = '\0'; stringPrintf (buffer,"%s_intraDistribution.jpg",argv[1]); canv->Print (string (buffer),"jpg"); stringDestroy (buffer); return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; int count; int countRemoved; int i; if (argc != 3) { usage ("%s <offsetCutoff> <minNumUniqueReads>",argv[0]); } count = 0; countRemoved = 0; int offsetCutOff = atoi (argv[1]); int minNumUniqueReads = atoi (argv[2]); gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { Array starts = arrayCreate( 100, int); for (i = 0; i < arrayMax( currGE->interReads ); i++) { int currStart = arrp(currGE->interReads, i, GfrInterRead)->readStart1 + arrp(currGE->interReads, i, GfrInterRead)->readStart2; array(starts, arrayMax(starts), int) = currStart; } arraySort( starts, (ARRAYORDERF) arrayIntcmp ); arrayUniq( starts, NULL, (ARRAYORDERF) arrayIntcmp ) ; int numUniqeOffsets = arrayMax( starts ); arrayDestroy( starts ); if (arrayMax( currGE->readsTranscript1 ) != arrayMax( currGE->readsTranscript2 ) ) die( "The two ends have a different number of reads"); Texta reads = textCreate(arrayMax(currGE->readsTranscript1)); for (i = 0; i < arrayMax(currGE->readsTranscript1); i++) { Stringa strA = stringCreate( strlen(textItem( currGE->readsTranscript1, i) ) * 2 + 1); stringAppendf( strA, textItem( currGE->readsTranscript1,i)); stringAppendf( strA, textItem( currGE->readsTranscript2,i)); textAdd( reads, string(strA)); stringDestroy( strA ); } textUniqKeepOrder( reads ); int numRemaining = arrayMax( reads ); textDestroy ( reads ); if (numRemaining <= minNumUniqueReads || numUniqeOffsets <= offsetCutOff) { countRemoved++; continue; } puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); warn("%s_PCRFilter: offset=%d minNumUniqueReads=%d", argv[0],offsetCutOff, minNumUniqueReads); warn("%s_numRemoved: %d",argv[0],countRemoved); warn("%s_numGfrEntries: %d",argv[0],count); return 0; }
static char* getBreakPointSequence (char *tileCoordinate1, char *tileCoordinate2) { Stringa buffer; Stringa targetsFile; FILE *fp; Array targetSeqs; int i; Seq *currSeq; static Stringa sequence = NULL; buffer = stringCreate (100); targetsFile = stringCreate (100); stringPrintf (targetsFile,"targets_%d.txt",getpid ()); if (!(fp = fopen (string (targetsFile),"w")) ){ die ("Unable to open target file: %s",string (targetsFile)); } fprintf (fp,"%s\n%s",tileCoordinate1,tileCoordinate2); fclose (fp); stringPrintf (buffer,"%s %s/%s stdout -noMask -seqList=%s", confp_get(Conf, "BLAT_TWO_BIT_TO_FA"), confp_get(Conf, "BLAT_DATA_DIR"), confp_get(Conf, "BLAT_TWO_BIT_DATA_FILENAME"), string (targetsFile)); fasta_initFromPipe (string (buffer)); targetSeqs = fasta_readAllSequences (0); fasta_deInit (); if (arrayMax (targetSeqs) != 2) { die ("Expected only two target sequences"); } stringCreateClear (sequence,100); for (i = 0; i < arrayMax (targetSeqs); i++) { currSeq = arrp (targetSeqs,i,Seq); stringAppendf (sequence,"%s",currSeq->sequence); hlr_free (currSeq->name); hlr_free (currSeq->sequence); } arrayDestroy (targetSeqs); stringPrintf (buffer,"rm -rf %s",string (targetsFile)); hlr_system (string (buffer),0); stringDestroy (targetsFile); stringDestroy (buffer); return string (sequence); }
/** * writes circos configuration file */ int write_circosConf (char* prefix, Locus locus, Array regions, Chrdata_t *chromosomes, SVCfg_t *settings) { float rpos = 0.99; FILE *fp; Stringa buffer = stringCreate(50); int scale = getScale (regions); stringPrintf (buffer, "%s/test/circos_%s_%s_%d_%d.conf", confp_get(Conf, "WEB_CIRCOS_DIR"), prefix, locus.chromosome, locus.start, locus.end); if (!(fp = fopen (string (buffer), "w"))) { die ("Unable to open target file"); return -1; }; printf ("<h2>%i</h2>", scale); // write conf file conf_printHeader (fp, confp_get(Conf, "WEB_CIRCOS_DIR"), confp_get(Conf, "WEB_DATA_DIR"), confp_get(Conf, "WEB_SDATA_DIR"), chromosomes, prefix, locus, scale); conf_printUnits (fp, regions, chromosomes, scale); if (scale <= 10000) { conf_printDataTracks (fp, prefix, locus, confp_get(Conf, "WEB_SDATA_DIR"), confp_get(Conf, "WEB_DATA_DIR"), &rpos, regions, chromosomes, settings); } conf_printLinks (fp, confp_get(Conf, "WEB_DATA_DIR"), &rpos, prefix, locus, settings->readlim); conf_printFooter (fp); fclose (fp); stringDestroy (buffer); return 0; }
void stDestroy(symtable *st) { if (listSize(st->blockList) != 1) fprintf(stderr, "Symbol table block leak!\n"); block *b = (block *) listRemoveBack(st->blockList); stringDestroy(b->name); bstDestroy(b->symbols, bstDelSymbol); //Cleanup memory for symbols in bst free(b); listDestroy(st->blockList); free(st); st = NULL; }
void rdbu_initLoginInfo (char *filename) { /** Looks in the following places for user/password\@database info needed for logging into database in this order 1. a file named .login_db in the user's home directory 2. a file named .login_db in the current directory 3. a file named 'filename' (optional input parameter) 4. arguments 'dbuser', 'dbpassword', 'dbname' present the command line Syntax of the files: - only the first line is read - this line must have the form user/password\@dbname - each of these fields can be '-'; in this case the value of this field is not changed. E.g. if $HOME/.login_db contains -/-\@testdb and 'filename' contains scott/-@- and on the command line there is an argument -dbpassword tiger then username will be scott, password will be tiger and database will be testdb optional: if arg_init() was called before then the command line will be considered; else the command line is ignored Postcondition: rdbu_user() etc can be called */ Stringa fn; char *home = getenv ("HOME"); if (user != NULL) die ("rdbu_getLoginInfo() twice"); user = stringCreate(10); password = stringCreate(10); database = stringCreate(10); if (home != NULL) { fn = stringCreate (100); stringPrintf (fn,"%s/.login_db",home); readUserInfoFile (string (fn)); stringDestroy (fn); } readUserInfoFile (".login_db"); if (filename != NULL && *filename != '\0') readUserInfoFile (filename); if (arg_isInit ()) { if (arg_present ("dbuser")) stringCpy (user,arg_get ("dbuser")); if (arg_present ("dbpassword")) stringCpy (password,arg_get ("dbpassword")); if (arg_present ("dbname")) stringCpy (database,arg_get ("dbname")); } }
int main (int argc, char *argv[]) { int i,j,groupNumber; MrfEntry *currEntry; GffEntry *currGffEntry,*nextGffEntry; Array gffEntries; FILE *fp; Stringa buffer; short int paired; if (argc != 2) { usage ("%s <prefix>",argv[0]); } buffer = stringCreate (1000); groupNumber = 0; mrf_init ("-"); gffEntries = arrayCreate (100000,GffEntry); while (currEntry = mrf_nextEntry ()) { processRead (gffEntries, currEntry, &groupNumber); } mrf_deInit (); arraySort (gffEntries,(ARRAYORDERF)sortGffEntriesByTargetNameAndGroupNumber); i = 0; while (i < arrayMax (gffEntries)) { currGffEntry = arrp (gffEntries,i,GffEntry); stringPrintf (buffer,"%s_%s.gff",argv[1],currGffEntry->targetName); fp = fopen (string (buffer),"w"); if (fp == NULL) { die ("Unable to open file: %s",string (buffer)); } fprintf (fp,"browser hide all\n"); fprintf (fp,"track name=\"%s_%s\" visibility=2\n",argv[1],currGffEntry->targetName); fprintf (fp,"%s\n",currGffEntry->line); j = i + 1; while (j < arrayMax (gffEntries)) { nextGffEntry = arrp (gffEntries,j,GffEntry); if (!strEqual (currGffEntry->targetName,nextGffEntry->targetName)) { break; } fprintf (fp,"%s\n",nextGffEntry->line); j++; } i = j; fclose (fp); } stringDestroy (buffer); return 0; }
int generateSamEntry ( Texta tokens, SamEntry *currSamE, int* hasSeqs, int* hasQual) { int j; currSamE->qname = strdup (textItem (tokens, 0)); currSamE->flags = atoi (textItem (tokens, 1)); currSamE->rname = strdup (textItem (tokens, 2)); currSamE->pos = atoi (textItem (tokens, 3)); currSamE->mapq = atoi (textItem (tokens, 4)); currSamE->cigar = strdup (textItem (tokens, 5)); currSamE->mrnm = strdup (textItem (tokens, 6)); currSamE->mpos = atoi (textItem (tokens, 7)); currSamE->isize = atoi (textItem (tokens, 8)); currSamE->seq = NULL; currSamE->qual = NULL; currSamE->tags = NULL; // Skip if unmapped or fails platform/vendor checks if (currSamE->flags & S_QUERY_UNMAPPED || currSamE->flags & S_MATE_UNMAPPED || currSamE->flags & S_FAILS_CHECKS) return 0; // Get tokens if (arrayMax (tokens) > 11) { Stringa tags = stringCreate (10); for (j = 11; j < arrayMax (tokens); j++) { if (j > 11) stringAppendf (tags, "\t"); stringAppendf (tags, "%s", textItem (tokens, j)); } currSamE->tags = strdup (string(tags)); stringDestroy (tags); } if (strcmp (textItem (tokens, 9), "*") != 0) { *hasSeqs = 1; currSamE->seq = strdup (textItem (tokens, 9)); } if (strcmp (textItem (tokens, 10), "*") != 0) { *hasQual = 1; currSamE->qual = strdup (textItem (tokens, 10)); } return 1; }
int main (int argc, char *argv[]) { GfrEntry *currGE; Array kgTreeFams; Stringa buffer; int count; int countRemoved; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; buffer = stringCreate (100); stringPrintf (buffer,"%s/%s", confp_get(conf, "ANNOTATION_DIR"), confp_get(conf, "KNOWN_GENE_TREE_FAM_FILENAME")); kgTreeFams = util_readKnownGeneTreeFams (string (buffer)); arraySort (kgTreeFams,(ARRAYORDERF)sortKgTreeFamsByTranscriptName); stringDestroy (buffer); count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()){ if (isHomologous (kgTreeFams,currGE->nameTranscript1,currGE->nameTranscript2)) { countRemoved++; continue; } puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { GfrEntry *currGE; GfrInterRead *currGIR; int i; Stringa buffer; FILE *fp1,*fp2; int count; count = 0; buffer = stringCreate (100); gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { stringPrintf (buffer,"%s_1.bed",currGE->id); fp1 = fopen (string (buffer),"w"); stringPrintf (buffer,"%s_2.bed",currGE->id); fp2 = fopen (string (buffer),"w"); if (fp1 == NULL || fp2 == NULL) { die ("Unable to open BED files"); } fprintf (fp1,"browser full knownGene\n"); fprintf (fp1,"track name=\"Inter paird-ends: %s_1\" visibility=2\n",currGE->id); fprintf (fp2,"browser full knownGene\n"); fprintf (fp2,"track name=\"Inter paird-ends: %s_2\" visibility=2\n",currGE->id); for (i = 0; i < arrayMax (currGE->interReads); i++) { currGIR = arrp (currGE->interReads,i,GfrInterRead); fprintf (fp1,"%s\t%d\t%d\n",currGE->chromosomeTranscript1,currGIR->readStart1,currGIR->readEnd1); fprintf (fp2,"%s\t%d\t%d\n",currGE->chromosomeTranscript2,currGIR->readStart2,currGIR->readEnd2); } fclose (fp1); fclose (fp2); puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); stringDestroy (buffer); warn ("%s_numGfrEntries: %d",argv[0],count); return 0; }
void conf_printUnits (FILE *fp, Array regions, Chrdata_t *chromosomes, int scale) { int i; SRegion_t *tmp; char *chrname; int sflag = 1; Stringa chrshow = stringCreate (50); tmp = arrayp (regions, 0, SRegion_t); if (tmp->chromosome == 0) { fprintf (fp, "chromosomes_display_default = yes\n"); fprintf (fp, "chromosomes_units = 1000000\n"); } else { stringPrintf (chrshow, "chromosomes = "); fprintf (fp, "chromosomes_units = %i\n", scale); fprintf (fp, "chromosomes_display_default = no\n"); for (i = 0; i < arrayMax (regions); i++) { tmp = arrayp (regions, i, SRegion_t); if (chromosomes[tmp->chromosome].show && tmp->show) { chrname = getHchrname (tmp->chromosome); if (sflag == 0) { stringAppendf (chrshow, ";"); } stringAppendf (chrshow, "%s:%i-%i", chrname, tmp->start/scale, tmp->end/scale); sflag = 0; free (chrname); } } fprintf (fp, "%s\n", string (chrshow)); } stringDestroy (chrshow); }
symtable *stCreate() { symtable *st = (symtable *) malloc(sizeof(symtable)); st->blockList = listCreate(); //Add block0 block *b = (block *) malloc(sizeof(block)); b->name = stringCreate(); stringAppendCharArray(b->name, "block0", 6*sizeof(char)); b->symbols = bstCreate(bstCompareSymbol); b->nextLoc = 0; listAddBack(st->blockList, b); //Initialize block0 with PJ's builtin procedures and input/output files for (unsigned int i = 0; i < builtin_num; i++) { const char *builtinName = pjbuiltinString(i); string *name = stringCreate(); stringAppendCharArray(name, builtinName, strlen(builtinName)); symbol *sym = symbolCreate(name); stringDestroy(name); symbolSetBuiltin(sym, i); stAddSymbol(st, sym); } return st; }
SEXP c_read_biokit_exprs (SEXP filename) { LineStream ls; char* line; const int MAND_NCOL=7; // the first column is the row name, and column 2-7 are mandatory int add_ncol=0; Texta it; Texta rnames=textCreate(128); Array mrpkms=arrayCreate(128, double); Array mreads=arrayCreate(128, int); Array srpkms=arrayCreate(128, double); Array sreads=arrayCreate(128, int); Array mprop=arrayCreate(128, double); Array allmap = arrayCreate(128, int); Array annos=arrayCreate(128, Texta); Texta anno=NULL; // must have a NULL assigned; otherwise textCreateClear leads to memory error Stringa str=stringCreate(8); SEXP R_rnames, R_mrpkms, R_mreads, R_srpkms, R_sreads, R_mprop, R_allmap, R_res; SEXP R_colnames, R_class; int nprot=0; int i=0; int j=0; int nrow=0; const char* fn=CHAR(STRING_ELT(filename, 0)); ls = ls_createFromFile(strdup(fn)); ls_nextLine(ls); // skip the first header line while(line = ls_nextLine(ls)) { it = textFieldtokP(line, "\t"); if(arrayMax(it)<MAND_NCOL) error("Input file must contain no less than %d columns", MAND_NCOL); textAdd(rnames, textItem(it, 0)); array(mrpkms, arrayMax(mrpkms), double)=atof(textItem(it, 1)); array(mreads, arrayMax(mreads), int)=atoi(textItem(it, 2)); array(srpkms, arrayMax(srpkms), double)=atof(textItem(it, 3)); array(sreads, arrayMax(sreads), int)=atoi(textItem(it, 4)); array(mprop, arrayMax(mprop), double)=atof(textItem(it, 5)); array(allmap, arrayMax(allmap), int)=atoi(textItem(it, 6)); add_ncol = max(arrayMax(it)-MAND_NCOL, add_ncol); textCreateClear(anno, arrayMax(it)-MAND_NCOL); for(i=MAND_NCOL; i<arrayMax(it); ++i) { textAdd(anno, textItem(it, i)); } array(annos, arrayMax(annos), Texta)=textClone(anno); nrow++; } R_rnames=PROTECT(allocVector(STRSXP, nrow)); nprot++; R_mrpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_mreads=PROTECT(allocVector(INTSXP, nrow)); nprot++; R_srpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_sreads=PROTECT(allocVector(INTSXP, nrow)); nprot++; R_mprop=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_allmap=PROTECT(allocVector(INTSXP, nrow)); nprot++; for(i=0; i<nrow; ++i) { SET_STRING_ELT(R_rnames, i, mkChar(textItem(rnames, i))); REAL(R_mrpkms)[i]=arru(mrpkms, i, double); INTEGER(R_mreads)[i]=arru(mreads, i, int); REAL(R_srpkms)[i]=arru(srpkms, i, double); INTEGER(R_sreads)[i]=arru(sreads, i, int); REAL(R_mprop)[i]=arru(mprop, i, double); INTEGER(R_allmap)[i]=arru(allmap, i, int); } R_res=PROTECT(allocVector(VECSXP, MAND_NCOL+add_ncol-1)); nprot++; SET_VECTOR_ELT(R_res, 0, R_mrpkms); SET_VECTOR_ELT(R_res, 1, R_mreads); SET_VECTOR_ELT(R_res, 2, R_srpkms); SET_VECTOR_ELT(R_res, 3, R_sreads); SET_VECTOR_ELT(R_res, 4, R_mprop); SET_VECTOR_ELT(R_res, 5, R_allmap); for(i=0; i<add_ncol; ++i) { SEXP R_anno=NULL; R_anno=PROTECT(allocVector(STRSXP, nrow)); for(j=0; j<nrow; ++j) { anno=array(annos, j, Texta); if(arrayMax(anno)>i) { SET_STRING_ELT(R_anno, j, mkChar(textItem(anno, i))); } else { SET_STRING_ELT(R_anno, j, R_NaString); } } SET_VECTOR_ELT(R_res, i+MAND_NCOL-1, R_anno); // -1 because the first column is row name UNPROTECT(1); } PROTECT(R_colnames=allocVector(STRSXP, MAND_NCOL+add_ncol-1)); nprot++; PROTECT(R_class=allocVector(STRSXP, 1)); nprot++; SET_STRING_ELT(R_colnames, 0, mkChar("RPKM_MultiMap")); SET_STRING_ELT(R_colnames, 1, mkChar("ReadCount_MultiMap")); SET_STRING_ELT(R_colnames, 2, mkChar("RPKM_UniqMap")); SET_STRING_ELT(R_colnames, 3, mkChar("ReadCount_UniqMap")); SET_STRING_ELT(R_colnames, 4, mkChar("MultiProp")); SET_STRING_ELT(R_colnames, 5, mkChar("AllMappingReads")); for(i=0; i<add_ncol; ++i) { stringPrintf(str, "Annotation%d", i+1); SET_STRING_ELT(R_colnames, i+MAND_NCOL-1, mkChar(string(str))); } SET_STRING_ELT(R_class, 0, mkChar("data.frame")); setAttrib(R_res, install("names"), R_colnames); setAttrib(R_res, install("row.names"), R_rnames); setAttrib(R_res, install("class"), R_class); for(i=0; i<nrow; ++i) { textDestroy(array(annos, i, Texta)); } arrayDestroy(annos); arrayDestroy(rnames); arrayDestroy(mrpkms); arrayDestroy(mreads); arrayDestroy(srpkms); arrayDestroy(sreads); arrayDestroy(mprop); arrayDestroy(allmap); stringDestroy(str); ls_destroy(ls); UNPROTECT(nprot); return(R_res); }
int main (int argc, char *argv[]) { Array kgXrefs; Stringa buffer; LineStream ls; int count=0; char* geneSymbolTranscript; char* descriptionTranscript; char* line; char* exonID = NULL; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; buffer = stringCreate (100); stringPrintf (buffer,"%s/%s", confp_get(conf, "ANNOTATION_DIR"), confp_get(conf, "KNOWN_GENE_XREF_FILENAME")); kgXrefs = util_readKnownGeneXrefs (string (buffer)); arraySort (kgXrefs,(ARRAYORDERF)sortKgXrefsByTranscriptName); stringDestroy (buffer); // gfr_init ("-"); ls = ls_createFromFile("-"); while (line = ls_nextLine(ls)) { char *lineP = hlr_strdup(line); WordIter w = wordIterCreate( line, "\t", 0); char *nameTranscript = wordNext( w ); char *p = rindex(nameTranscript, '_'); if (p) { exonID = hlr_strdup( p+1 ); *p='\0'; } transcript2geneSymbolAndGeneDescription(kgXrefs, nameTranscript, &geneSymbolTranscript, &descriptionTranscript); if (exonID) { printf("%s_%s\t%s\t%s\t%s", nameTranscript, exonID, geneSymbolTranscript, exonID, descriptionTranscript); hlr_free(exonID); } else { printf("%s\t%s\t1\t%s", nameTranscript, geneSymbolTranscript, descriptionTranscript); } printf("%s\n", lineP+strlen(nameTranscript)); count++; hlr_free(lineP); wordIterDestroy(w); } ls_destroy (ls); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
void incl_getExonHlightFile (FILE *fp, Array regions, char *sdata_dir) { LineStream src; FILE *out; char *line; Texta entry; int i, astart, aend; Stringa buffer = stringCreate (50); stringPrintf (buffer, "%s/tmp/exons.hlight_s.txt", sdata_dir); if (!(out = fopen (string (buffer), "w"))) { fprintf (stderr, "Cannot open exons.hlight_s.txt\n"); return; } SRegion_t *tmp; tmp = arrayp (regions, 0, SRegion_t); if (tmp->chromosome == 0) { fprintf (fp, "file = %s/exons.hlight.txt\n", sdata_dir); } else { for (i = 0; i < arrayMax (regions); i++) { tmp = arrayp (regions, i, SRegion_t); if (tmp->chromosome == 23) { stringPrintf (buffer, "%s/X/exons.hlight.txt", sdata_dir); } else if (tmp->chromosome == 24) { stringPrintf (buffer, "%s/Y/exons.hlight.txt", sdata_dir); } else { stringPrintf (buffer, "%s/%i/exons.hlight.txt", sdata_dir, tmp->chromosome); } if ((src = ls_createFromFile (string (buffer))) == NULL) { fprintf (stderr, "Cannot open exons.hlight.txt\n"); return; } while ((line = ls_nextLine (src)) != NULL) { entry = textFieldtokP (line, " "); astart = atoi (textItem (entry, 1)); aend = atoi (textItem (entry, 2)); if ((astart >= tmp->start && astart <= tmp->end) || (aend >= tmp->start && aend <= tmp->end)) { fprintf (out, "%s\n", line); } textDestroy (entry); } } fprintf (fp, "file = %s/tmp/exons.hlight_s.txt\n", sdata_dir); } stringDestroy (buffer); ls_destroy (src); fclose (out); }
int main (int argc, char *argv[]) { GfrEntry *currGE; int count; int countRemoved; int mitochondrialCount; unsigned int minReadSize; int i; Stringa cmd; BlatQuery *blQ=NULL; config *conf = NULL; /**< Pointer to configuration file .fusionseqrc */ if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH")); return EXIT_FAILURE; } if( confp_get( conf,"MAX_OVERLAP_ALLOWED")==NULL ) { die("%s:\tCannot find MAX_OVERLAP_ALLOWED in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf,"MAX_FRACTION_HOMOLOGOUS")==NULL ) { die("%s:\tCannot find MAX_FRACTION_HOMOLOGOUS in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "MITOCHONDRIAL_DIR")==NULL ) { die("%s:\tCannot find MITOCHONDRIAL_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "MITOCHONDRIAL_FILENAME")==NULL ) { die("%s:\tCannot find MITOCHONDRIAL_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "TMP_DIR")==NULL ) { die("%s:\tCannot find TMP_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFCLIENT")==NULL ) { die("%s:\tCannot find BLAT_GFCLIENT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER_HOST")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_HOST in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; }if( confp_get( conf, "BLAT_GFSERVER_PORT")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_PORT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } count = 0; countRemoved = 0; cmd = stringCreate (100); // initializing the gfServers stringPrintf( cmd, "%s status %s %d &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2); int ret = hlr_system( string(cmd), 1 ); if( ret != 0 ) { // not initialized stringPrintf( cmd , "%s -repMatch=100000 -tileSize=12 -canStop -log=%s/gfServer_mitochondrial.log start %s %d %s/%s &", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "TMP_DIR"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2, confp_get( conf, "MITOCHONDRIAL_DIR"), confp_get( conf,"MITOCHONDRIAL_FILENAME")); hlr_system( string( cmd ), 0 ); long int startTime = time(0); stringPrintf( cmd , "%s status %s %d &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2); while( hlr_system( string(cmd), 1) && (time(0)-startTime)<600 ) ; if( hlr_system( string(cmd), 1 ) != 0 ) { die("gfServer for %s/%s not initialized: %s %s %s", confp_get( conf, "MITOCHONDRIAL_DIR"), confp_get( conf, "MITOCHONDRIAL_FILENAME"), confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); return EXIT_FAILURE; } } gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { if (strEqual(currGE->chromosomeTranscript1, "chrM") || strEqual(currGE->chromosomeTranscript2, "chrM")) { countRemoved++; continue; } else { mitochondrialCount = 0; minReadSize=1000; writeFasta( currGE, &minReadSize, confp_get( conf, "TMP_DIR") ); // in util.c stringPrintf(cmd, "cd %s;%s %s %d / -t=dna -q=dna -minScore=%d -out=psl %s_reads.fa %s.mito.psl &>/dev/null", confp_get( conf, "TMP_DIR"), confp_get( conf, "BLAT_GFCLIENT"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2, minReadSize - 5 > 20 ? minReadSize - 5 : 20 , currGE->id, currGE->id); int attempts=0; ret = hlr_system( string(cmd), 1 ); while( hlr_system( string(cmd), 1 ) && attempts<5000 ) attempts++; if( attempts == 5000 ) { die("Cannot map the reads %s", string( cmd )); return EXIT_FAILURE; } // reading the results of blast from File stringPrintf(cmd, "%s/%s.mito.psl", confp_get( conf, "TMP_DIR"), currGE->id); blatParser_initFromFile( string(cmd) ); while( blQ = blatParser_nextQuery() ) { //warn("iter %d\tquery %s", iter, blQ->qName );iter++; int nucleotideOverlap = getNucleotideOverlap ( blQ ); if (nucleotideOverlap > (((double) minReadSize) * strtod(confp_get( conf, "MAX_OVERLAP_ALLOWED"), NULL))) { char* value = strchr( blQ->qName,'/' ); if( value ) *value = '\0'; else die("Not a valid index in the blat query name:\t%s", blQ->qName ); int indexOfInter = atoi( blQ->qName ); // the following three lines should removed the read if writing the GFR entry GfrInterRead *currGIR = arrp( currGE->interReads, indexOfInter, GfrInterRead ); currGIR->flag = 1; mitochondrialCount++; } } blatParser_deInit(); if ( ( (double) mitochondrialCount / (double) ( arrayMax(currGE->readsTranscript1) + arrayMax(currGE->readsTranscript2) ) ) <= strtod(confp_get( conf, "MAX_FRACTION_HOMOLOGOUS"), NULL)) { if( mitochondrialCount > 0 ) updateStats( currGE ); // writing the gfrEntry puts (gfr_writeGfrEntry (currGE)); count++; } else { countRemoved++; } // removing temporary files stringPrintf (cmd,"rm -rf %s/%s_reads.fa %s/%s.mito.psl", confp_get( conf, "TMP_DIR"), currGE->id, confp_get( conf, "TMP_DIR"), currGE->id ); hlr_system( string(cmd) , 1); } } gfr_deInit (); stringDestroy( cmd ); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; int i,j,k,l, h,index; Stringa buffer,cmd,fnSequencesToAlign; FILE *fp; FILE *fp1; FILE *fp2; FILE *freads1; FILE *freads2; Array gfrEntries; BowtieQuery *currBQ,testBQ; BowtieEntry *currBE; Texta seqNames; int readSize1, readSize2, minReadSize; Array bowtieQueries; char transcriptNumber; int isHomologous,homologousCount; int count; int countRemoved; unsigned short int tooMany; BlatQuery *blQ; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc", argv[0]); return EXIT_FAILURE; } if ( (confp_get( conf, "BLAT_TWO_BIT_TO_FA")) == NULL) { die("%s:\tCannot find BLAT_TWO_BIT_TO_FA in the configuration file: %s", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if ( (confp_get( conf,"BLAT_DATA_DIR")) == NULL) { die("%s:\tCannot find BLAT_DATA_DIR in the configuration file: %sc", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "TMP_DIR")==NULL ) { die("%s:\tCannot find TMP_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFCLIENT")==NULL ) { die("%s:\tCannot find BLAT_GFCLIENT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER_HOST")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_HOST in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; }if( confp_get( conf, "BLAT_GFSERVER_PORT")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_PORT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "PSEUDOGENE_DIR")==NULL ) { die("%s:\tCannot find PSEUDOGENE_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "PSEUDOGENE_FILENAME")==NULL ) { die("%s:\tCannot find PSEUDOGENE_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } cmd = stringCreate (100); // initializing the gfServers stringPrintf( cmd, "%s status %s %s &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT") ); int ret = hlr_system( string(cmd), 1 ); if( ret != 0 ) { // not initialized stringPrintf( cmd , "%s -repMatch=100000 -tileSize=12 -canStop -log=%s/gfServer_genome.log start %s %s %s/%s &", confp_get( conf, "BLAT_GFSERVER"), confp_get(conf, "TMP_DIR"),confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"), confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME")); hlr_system( string( cmd ), 0 ); long int startTime = time(0); stringPrintf( cmd , "%s status %s %s &2> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); while( hlr_system( string(cmd), 1) && (time(0)-startTime)<600 ) ; if( hlr_system( string(cmd), 1 ) != 0 ) { die("gfServer for %s/%s not initialized: %s %s %s", confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); return EXIT_FAILURE; } } // end initialization gfr_init ("-"); gfrEntries = gfr_parse (); if (arrayMax (gfrEntries) == 0){ puts (gfr_writeHeader ()); gfr_deInit (); return 0; } seqNames = textCreate (10000); buffer = stringCreate (100); fnSequencesToAlign = stringCreate (100); count = 0; countRemoved = 0; stringPrintf( buffer, "%s/%s", confp_get( conf, "PSEUDOGENE_DIR"), confp_get( conf, "PSEUDOGENE_FILENAME") ); intervalFind_addIntervalsToSearchSpace (string(buffer),0); puts (gfr_writeHeader ()); for (i = 0; i < arrayMax (gfrEntries); i++) { currGE = arrp (gfrEntries,i,GfrEntry); homologousCount = 0; minReadSize=10000; // creating two fasta files with the two genes stringPrintf( cmd, "%s %s/%s -seq=%s -start=%d -end=%d %s/%s_transcript1.fa", confp_get(conf, "BLAT_TWO_BIT_TO_FA") , confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), currGE->chromosomeTranscript1, currGE->startTranscript1, currGE->endTranscript1, confp_get(conf, "TMP_DIR"), currGE->id); hlr_system( string(cmd) , 0); stringPrintf( cmd, "%s %s/%s -seq=%s -start=%d -end=%d %s/%s_transcript2.fa", confp_get(conf, "BLAT_TWO_BIT_TO_FA"), confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), currGE->chromosomeTranscript2, currGE->startTranscript2, currGE->endTranscript2, confp_get(conf, "TMP_DIR"), currGE->id); hlr_system( string(cmd) , 0); Stringa fa1 = stringCreate( 100 ); Stringa fa2 = stringCreate( 100 ); // creating the two fasta files with the reads stringPrintf( fa1, "%s/%s_reads1.fa", confp_get(conf, "TMP_DIR"), currGE->id); if (!(freads1 = fopen ( string(fa1) ,"w"))) { die ("Unable to open file: %s",string (fa1)); } // writing the reads of the first end into file for (l = 0; l < arrayMax (currGE->readsTranscript1); l++) { char* currRead1 = hlr_strdup( textItem (currGE->readsTranscript1,l)); // read1 readSize1 = strlen( currRead1 ); if( readSize1 == 0 ) die("Read size cannot be zero: read1[ %s ]", currRead1); if( readSize1 < minReadSize ) minReadSize = readSize1; fprintf( freads1, ">%d\n%s\n", l, currRead1 ); hlr_free( currRead1 ); } fclose( freads1 ); stringPrintf( fa2, "%s/%s_reads2.fa", confp_get(conf, "TMP_DIR"), currGE->id); if (!(freads2 = fopen ( string(fa2) ,"w"))) { die ("Unable to open file: %s",string (fa2)); } // writing the reads of the second end into file for (l = 0; l < arrayMax (currGE->readsTranscript2); l++) { char* currRead2 = hlr_strdup( textItem (currGE->readsTranscript2,l)); // read2 readSize2 = strlen( currRead2 ); if( readSize2 == 0 ) die("Read size cannot be zero: read2[ %s ]", currRead2); if( readSize2 < minReadSize ) minReadSize = readSize2; fprintf( freads2, ">%d\n%s\n", l, currRead2 ); hlr_free( currRead2 ); } fclose( freads2 ); // collapse the reads 2 ## requires the FASTX package stringPrintf( cmd, "%s -i %s/%s_reads2.fa -o %s/%s_reads2.collapsed.fa", confp_get(conf, "FASTX_COLLAPSER"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); hlr_system (string (cmd),0); //blat of reads2 against the first transcript stringPrintf( cmd, "%s -t=dna -out=psl -fine -tileSize=15 %s/%s_transcript1.fa %s/%s_reads2.collapsed.fa stdout",confp_get(conf, "BLAT_BLAT"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); // reading the results of blast from Pipe blatParser_initFromPipe( string(cmd) ); while( blQ = blatParser_nextQuery() ) { int nucleotideOverlap = getNucleotideOverlap ( blQ ); if ( nucleotideOverlap > ( ((double)readSize2)* atof(confp_get(conf,"MAX_OVERLAP_ALLOWED"))) ) { char* value = strchr(blQ->qName,'-'); homologousCount+=atoi(value+1); } } blatParser_deInit(); // collapse the reads 1 ## requires the FASTX package on the path stringPrintf( cmd, "%s -i %s/%s_reads1.fa -o %s/%s_reads1.collapsed.fa", confp_get(conf, "FASTX_COLLAPSER"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); hlr_system (string (cmd),0); //blat of reads1 against the second transcript stringPrintf( cmd, "%s -t=dna -out=psl -fine -tileSize=15 %s/%s_transcript2.fa %s/%s_reads1.collapsed.fa stdout",confp_get(conf, "BLAT_BLAT"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); blatParser_initFromPipe( string(cmd) ); while( blQ = blatParser_nextQuery() ) { int nucleotideOverlap = getNucleotideOverlap ( blQ ); if ( nucleotideOverlap > ( ((double)readSize1)* atof(confp_get(conf,"MAX_OVERLAP_ALLOWED"))) ) { char* value = strchr(blQ->qName,'-'); homologousCount+=atoi(value+1); } } blatParser_deInit(); stringPrintf (cmd,"cd %s;rm -rf %s_reads?.fa %s_reads?.collapsed.fa %s_transcript?.fa", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id); hlr_system( string(cmd) , 0); if (((double)homologousCount / (double)arrayMax(currGE->readsTranscript1)) <= atof(confp_get(conf, "MAX_FRACTION_HOMOLOGOUS")) ) { homologousCount = 0; // there is no homology between the two genes, but what about the rest of the genome writeFasta( currGE, &minReadSize, confp_get(conf, "TMP_DIR") ); stringPrintf(cmd, "cd %s; %s %s %s / -t=dna -q=dna -minScore=%d -out=psl %s_reads.fa %s.smallhomology.psl &>/dev/null", confp_get(conf, "TMP_DIR"), confp_get( conf, "BLAT_GFCLIENT"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"), minReadSize - (int)(0.1 * minReadSize) > 20 ? minReadSize - (int) (0.1 * minReadSize) : 20 , currGE->id, currGE->id); int attempts=0; ret = hlr_system( string(cmd), 1 ); while( hlr_system( string(cmd), 1 ) && attempts<5000 ) attempts++; if( attempts == 5000 ) { die("Cannot map the reads %s", string( cmd )); return EXIT_FAILURE; } // reading the results of blast from File stringPrintf(cmd, "%s/%s.smallhomology.psl", confp_get( conf, "TMP_DIR"), currGE->id); blatParser_initFromFile( string(cmd) ); tooMany = 1; while( blQ = blatParser_nextQuery() ) { tooMany = 0; checkPseudogeneOverlap( blQ ); if( arrayMax( blQ->entries ) > 1 ) { homologousCount+= arrayMax( blQ->entries ) - 1; char* value = strchr( blQ->qName,'/' ); if( value ) *value = '\0'; else die("Not a valid index in the blat query name:\t%s", blQ->qName ); int indexOfInter = atoi( blQ->qName ); // the following three lines should removed the read if writing the GFR entry GfrInterRead *currGIR = arrp( currGE->interReads, indexOfInter, GfrInterRead ); currGIR->flag = 1; } } blatParser_deInit(); if ( tooMany == 1 || ( ( (double) homologousCount / (double) ( arrayMax(currGE->readsTranscript1) + arrayMax(currGE->readsTranscript2) ) ) > atof(confp_get(conf, "MAX_FRACTION_HOMOLOGOUS")) ) ) { countRemoved++; stringPrintf (cmd,"cd %s; rm -rf %s_reads*.fa %s_reads?.collapsed.fa %s_transcript?.fa %s.smallhomology.psl", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id,currGE->id); hlr_system( string(cmd), 1 ); continue; } // writing the gfrEntry, if everthing else didn't stop if( homologousCount > 0 ) updateStats( currGE ); puts (gfr_writeGfrEntry (currGE)); count++; // removing temporary files stringPrintf (cmd,"cd %s;rm -rf %s_reads*.fa %s_reads?.collapsed.fa %s_transcript?.fa %s.smallhomology.psl", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id,currGE->id); hlr_system( string(cmd) , 1); } else { countRemoved++; } } gfr_deInit (); stringDestroy (fnSequencesToAlign); stringDestroy (cmd); stringDestroy (buffer); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
static void generateOutput (char* prefix, char* typeSelected, int minNum) { GfrEntry *currGE; Stringa buffer; char *pos; puts ("<html>"); puts ("<head>"); puts ("<title>Results - Gene Fusions</title>"); html_printGenericStyleSheet (12); puts ("</head>"); puts ("<body>"); if (prefix[0] == '\0') { die ("Invalid prefix"); } printf ("<h1>Results - %s</h1><br><br><br>",prefix); buffer = stringCreate(50); //Chromosome expression, if present LineStream ls; char* chrSignal=NULL; stringPrintf(buffer, "ls -1 %s/BGRS/%s_chr*.bgr.gz 2> /dev/null", confp_get(Conf, "WEB_DATA_DIR"), prefix); ls = ls_createFromPipe(string(buffer)); int countCol = 0; puts ("Expression signal: "); fflush(stdout); while( chrSignal = ls_nextLine(ls)) { char* chrTmp = stringBetween( prefix, ".bgr.gz", chrSignal ); chrTmp++; printf ("[<a href=%s&hgt.customText=%s/BGRS/%s_%s.bgr.gz target='blank'>%s</a>] ", htmlLinker_generateLinkToGenomeBrowserAtUCSC("hg18","vertebrate","human", chrTmp, confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"), 50000000 + confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")), confp_get(Conf, "WEB_DATA_LINK"), prefix, chrTmp, chrTmp); if (countCol > 10) { puts( "<BR>" ); countCol=0; } countCol++; } if( countCol==0) puts( "No data available yet" ); ls_destroy(ls); puts ("<br><br>"); puts ("For a definition of SPER, DASPER and RESPER see <a href=http://rnaseq.gersteinlab.org/fusionseq/>FusionSeq</a>"); puts ("<br><br>"); puts ("<br><table border=0 width=100% align=center cellpadding=10>"); puts ("<tr align=left>"); puts ("<th>SPER</th>"); puts ("<th>DASPER</th>"); puts ("<th>RESPER</th>"); puts ("<th>Number of inter paired-end reads</th>"); puts ("<th>Type</th>"); puts ("<th>Genomic coordinates</th>"); puts ("<th>Gene symbol</th>"); puts ("<th>Description</th>"); puts ("<th>Genomic coordinates</th>"); puts ("<th>Gene symbol</th>"); puts ("<th>Description</th>"); puts ("<th></th>"); puts ("</tr>"); fflush(stdout); stringPrintf (buffer,"%s/%s.gfr", confp_get(Conf, "WEB_DATA_DIR"), prefix); gfr_init (string (buffer)); int countElements = 0; while (currGE = gfr_nextEntry ()) { if (currGE->numInter < minNum) { continue; } if (strEqual (typeSelected,"all") || strEqual (currGE->fusionType,typeSelected) || ( strEqual(currGE->fusionType,"cis") && strEqual( typeSelected,"same") ) || ( strEqual(currGE->fusionType,"read-through") && strEqual( typeSelected,"same") ) ) { if (pos = strchr (currGE->descriptionTranscript1,'|')) { *pos = '\0'; } if (pos = strchr (currGE->descriptionTranscript2,'|')) { *pos = '\0'; } puts ("<tr>"); printf ("<td align=left>%1.3f</td>\n",currGE->SPER); printf ("<td align=left>%1.3f</td>\n",currGE->DASPER); printf ("<td align=left>%1.3f</td>\n",currGE->RESPER); printf ("<td align=left>%d</td>\n",currGE->numInter); printf ("<td align=left>%s</td>\n",currGE->fusionType); printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human", currGE->chromosomeTranscript1, currGE->startTranscript1 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")), currGE->endTranscript1 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))), currGE->chromosomeTranscript1,currGE->startTranscript1,currGE->endTranscript1); printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript1)); printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript1); printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human", currGE->chromosomeTranscript2, currGE->startTranscript2 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")), currGE->endTranscript2 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))), currGE->chromosomeTranscript2,currGE->startTranscript2,currGE->endTranscript2); printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript2)); printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript2); printf ("<td align=left><a href=%s/showDetails_cgi?%s+%s>Details</a></td>\n", confp_get(Conf, "WEB_URL_CGI"), prefix,currGE->id); puts ("</tr>"); countElements++; } } gfr_deInit (); stringDestroy (buffer); puts ("</table><br><br>"); if( countElements == 0) puts("No fusion candidates can be found satisfying all specified criteria."); puts ("</body>"); puts ("</html>"); fflush (stdout); }
int main (int argc, char *argv[]) { GfrEntry *currGE; BLEntry *currBLE; BLEntry currQuery; FILE *fp; char *line; int count; int countRemoved; int index; WordIter w; Array blackList = arrayCreate(20, BLEntry); config *Conf; if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH")); return EXIT_FAILURE; } if( confp_get( Conf, "ANNOTATION_DIR")==NULL ) { die("%s:\tCannot find ANNOTATION_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( Conf, "BLACKLIST_FILENAME")==NULL ) { die("%s:\tCannot find BLACKLIST_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } Stringa buffer=stringCreate( 100 ); stringPrintf( buffer, "%s/%s", confp_get( Conf, "ANNOTATION_DIR"), confp_get( Conf, "BLACKLIST_FILENAME") ); /* fp = fopen( string( buffer ), "r" ); if( !fp ) die("Unable to open file: %s", string(buffer)); stringDestroy( buffer ); */ // reading blacklist file LineStream ls = ls_createFromFile( string(buffer) ); while( line = ls_nextLine(ls) ) { w = wordIterCreate( line, "\t", 1); currBLE = arrayp( blackList, arrayMax(blackList), BLEntry); currBLE->gene1 = hlr_strdup ( wordNext(w) ); currBLE->gene2 = hlr_strdup ( wordNext(w) ); wordIterDestroy(w); } //fclose(fp); ls_destroy( ls ); stringDestroy( buffer ); arraySort( blackList, (ARRAYORDERF) sortBlackListByName1); // beginFiltering count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { // reading the gfr if( currGE->geneSymbolTranscript1 == NULL ) { die("Gene symbols are not present in the GFR file. Please run gfrAddInfo before gfrBlackListFilter."); return EXIT_FAILURE; } // creating a new query to the black list currQuery.gene1 = currGE->geneSymbolTranscript1; currQuery.gene2 = currGE->geneSymbolTranscript2; if( strEqual( currQuery.gene1 , currQuery.gene2 ) ) { countRemoved++; continue; } // searching against read_1/read_2 int res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1); if( !res ) { // not found, then searching against read_2/read_1 currQuery.gene1 = currGE->geneSymbolTranscript2; currQuery.gene2 = currGE->geneSymbolTranscript1; res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1 ); if( !res ) { // not found, write the instance to stdout, update the counts puts (gfr_writeGfrEntry (currGE)); count++; } else { // found: read2/read1 countRemoved++; } } else { //found: read1/read2 countRemoved++; } } gfr_deInit (); arrayDestroy( blackList ); warn ("%s_BlackListFilter: %s",argv[0], confp_get( Conf, "BLACKLIST_FILENAME")); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close( Conf); return 0; }