int main (int argc, char *argv[]) { LineStream ls; char *line; char *pos; Stringa buffer; if (argc != 2) { usage ("%s <file.intraOffsets>"); } TH1 *his = new TH1D ("","Intra-read distribution",1000,0,1000); TCanvas *canv = new TCanvas("","canvas",1200,400); ls = ls_createFromFile (argv[1]); while (line = ls_nextLine (ls)) { his->Fill (atoi (line)); } ls_destroy (ls); his->Draw(); his->GetXaxis()->SetLabelSize (0.04); his->GetYaxis()->SetLabelSize (0.04); buffer = stringCreate (100); pos = strchr (argv[1],'.'); if (pos == NULL) { die ("Expected <file.intraOffsets>: %s",argv[1]); } *pos = '\0'; stringPrintf (buffer,"%s_intraDistribution.jpg",argv[1]); canv->Print (string (buffer),"jpg"); stringDestroy (buffer); return 0; }
static Fastq* fastq_processNextSequence (int freeMemory, int truncateName) { char *line; static Fastq* currFQ = NULL; int count; Seq* currSeq = NULL; if (ls_isEof (lsFastq)) { if (freeMemory) { fastq_freeFastq (currFQ); } return NULL; } count = 0; while ( (line=ls_nextLine (lsFastq)) && (count<4) ) { if (line[0] == '\0') { continue; } if (line[0] == '@') { if (freeMemory) { fastq_freeFastq (currFQ); } count++; AllocVar (currFQ); AllocVar (currFQ->seq); currSeq = currFQ->seq; currSeq->name = hlr_strdup (line + 1); if (truncateName) { currSeq->name = firstWordInLine (skipLeadingSpaces (currSeq->name)); } line = ls_nextLine (lsFastq); // reading sequence currSeq->sequence = hlr_strdup ( line ); currSeq->size = strlen (currSeq->sequence); count++; line = ls_nextLine (lsFastq); // reading quality ID if( line[0] != '+' ) die("Expected quality ID: '+' or '+%s'", currSeq->name ); count++; line = ls_nextLine (lsFastq); // reading quality currFQ->quality = hlr_strdup( line ); count++; } } ls_back (lsFastq,1); return currFQ; }
void pr3p_run (LineStream ls) { /** Parse the primer3 output and call the registered functions.<br> Postcondition: the functions registered have been called @param[in] ls - input line stream with prime output */ int goOn = 1; char *line; while ((line = ls_nextLine (ls)) != NULL) { if (strStartsWithC (line,"# EPRIMER3 RESULTS FOR ")) { char name[100]; sscanf (line+23,"%99s",name); if (goOn && sequence_hook != NULL) goOn = (*sequence_hook) (name); } else if (strStartsWithC (line,"# CLONINGPRIMERS RESULTS FOR ")) { char name[100]; sscanf (line+29,"%99s",name); if (goOn && sequence_hook != NULL) goOn = (*sequence_hook) (name); } else if (line[0] == '#' || line[0] == '\0') continue; else if (strstr (line,"PRODUCT SIZE:")) { int num,len; if (sscanf (line,"%d %*s %*s %d",&num,&len) != 2) die ("primer3parser: format error on line %s",line); if (goOn && product_hook != NULL) goOn = (*product_hook) (num,len); } else if (strstr (line,"FORWARD PRIMER") || strstr (line,"REVERSE PRIMER") || strstr (line,"INTERNAL OLIGO")) { int start,len; float tm,gc; char seq[101]; if (sscanf (line,"%*s %*s %d %d %f %f %100s", &start,&len,&tm,&gc,seq) != 5) die ("primer3parser: format error on line %s",line); if (strstr (line,"FORWARD PRIMER")) { if (goOn && forward_hook != NULL) goOn = (*forward_hook) (start,start+len-1,seq,gc,tm); } else if (strstr (line,"REVERSE PRIMER")) { if (goOn && reverse_hook != NULL) goOn = (*reverse_hook) (start,start+len-1,seq,gc,tm); } else if (strstr (line,"INTERNAL OLIGO")) { if (goOn && internal_hook != NULL) goOn = (*internal_hook) (start,start+len-1,seq,gc,tm); } } } }
/** * Returns a pointer to next ElandQuery. * @pre The module has been initialized using elandParser_init(). * Parse entries of the following format: \verbatim >FC30H5TAA_100308:2:1:1647:1161 GCTTACATTTTTCCTCTCTACATTATC U0 1 0 0 chr17.fa 8466296 F .. >FC30H5TAA_100308:2:1:1588:122 GAGTTAGCCTTGGGACCCCTACTTCTT U0 1 0 0 chr3.fa 61525628 F .. >FC30H5TAA_100308:2:1:1642:123 GGTGAGAGCCGCGACGGGCTTTAGGCG NM 0 0 0 >FC30H5TAA_100308:2:1:1630:119 CCGCCATTGCCAGCCCCCAGCTGACGG R2 0 0 2 >FC30H5TAA_100308:2:1:1603:120 GCAAGATGAAGTGAAAGGTAAAGAATC U1 0 1 1 chrM.fa 15277 R .. 26A \endverbatim */ ElandQuery* elandParser_nextQuery (void) { WordIter w; char *line,*token,*pos; static ElandQuery *currElandQuery = NULL; while (line = ls_nextLine (ls)) { if (line[0] == '\0') { continue; } elandParser_freeQuery (currElandQuery); currElandQuery = NULL; AllocVar (currElandQuery); w = wordIterCreate (line,"\t",0); currElandQuery->sequenceName = hlr_strdup (wordNext (w) + 1); // remove the '>' character at beginning of the line currElandQuery->sequence = hlr_strdup (wordNext (w)); currElandQuery->matchCode = hlr_strdup (wordNext (w)); if (strEqual (currElandQuery->matchCode,"QC")) { wordIterDestroy (w); return currElandQuery; } currElandQuery->exactMatches = atoi (wordNext (w)); currElandQuery->oneErrorMatches = atoi (wordNext (w)); currElandQuery->twoErrorMatches = atoi (wordNext (w)); token = wordNext (w); if (token == NULL) { wordIterDestroy (w); return currElandQuery; } if (!(pos = strchr (token,'.'))) { die ("Expected '.' in chromosome name: %s",token); } *pos = '\0'; currElandQuery->chromosome = hlr_strdup (pos + 1); currElandQuery->position = atoi (wordNext (w)); token = wordNext (w); if (token[0] == 'F') { currElandQuery->strand = '+'; } else if (token[0] == 'R') { currElandQuery->strand = '-'; } wordIterDestroy (w); return currElandQuery; } elandParser_freeQuery (currElandQuery); currElandQuery = NULL; return currElandQuery; }
/** * Get the next BlastQuery. * @pre The module has been initialized using blastParser_init(). */ BlastQuery* blastParser_nextQuery (void) { char *line,*pos; static char *queryName = NULL; static char *prevBlastQueryName = NULL; static BlastQuery *currBlastQuery = NULL; int first; if (!ls_isEof (ls)) { blastParser_freeQuery (currBlastQuery); currBlastQuery = NULL; AllocVar (currBlastQuery); currBlastQuery->entries = arrayCreate (5,BlastEntry); first = 1; while (line = ls_nextLine (ls)) { if (line[0] == '\0') { continue; } pos = strchr (line,'\t'); *pos = '\0'; strReplace (&queryName,line); if (first == 1 || strEqual (prevBlastQueryName,queryName)) { blastParser_processLine (pos + 1,currBlastQuery); } else { ls_back (ls,1); return currBlastQuery; } if (first == 1) { currBlastQuery->qName = hlr_strdup (queryName); first = 0; } strReplace(&prevBlastQueryName,queryName); } if (first == 1) { return NULL; } else { return currBlastQuery; } } blastParser_freeQuery (currBlastQuery); currBlastQuery = NULL; return NULL; }
SEXP c_read_biokit_exprs (SEXP filename) { LineStream ls; char* line; const int MAND_NCOL=7; // the first column is the row name, and column 2-7 are mandatory int add_ncol=0; Texta it; Texta rnames=textCreate(128); Array mrpkms=arrayCreate(128, double); Array mreads=arrayCreate(128, int); Array srpkms=arrayCreate(128, double); Array sreads=arrayCreate(128, int); Array mprop=arrayCreate(128, double); Array allmap = arrayCreate(128, int); Array annos=arrayCreate(128, Texta); Texta anno=NULL; // must have a NULL assigned; otherwise textCreateClear leads to memory error Stringa str=stringCreate(8); SEXP R_rnames, R_mrpkms, R_mreads, R_srpkms, R_sreads, R_mprop, R_allmap, R_res; SEXP R_colnames, R_class; int nprot=0; int i=0; int j=0; int nrow=0; const char* fn=CHAR(STRING_ELT(filename, 0)); ls = ls_createFromFile(strdup(fn)); ls_nextLine(ls); // skip the first header line while(line = ls_nextLine(ls)) { it = textFieldtokP(line, "\t"); if(arrayMax(it)<MAND_NCOL) error("Input file must contain no less than %d columns", MAND_NCOL); textAdd(rnames, textItem(it, 0)); array(mrpkms, arrayMax(mrpkms), double)=atof(textItem(it, 1)); array(mreads, arrayMax(mreads), int)=atoi(textItem(it, 2)); array(srpkms, arrayMax(srpkms), double)=atof(textItem(it, 3)); array(sreads, arrayMax(sreads), int)=atoi(textItem(it, 4)); array(mprop, arrayMax(mprop), double)=atof(textItem(it, 5)); array(allmap, arrayMax(allmap), int)=atoi(textItem(it, 6)); add_ncol = max(arrayMax(it)-MAND_NCOL, add_ncol); textCreateClear(anno, arrayMax(it)-MAND_NCOL); for(i=MAND_NCOL; i<arrayMax(it); ++i) { textAdd(anno, textItem(it, i)); } array(annos, arrayMax(annos), Texta)=textClone(anno); nrow++; } R_rnames=PROTECT(allocVector(STRSXP, nrow)); nprot++; R_mrpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_mreads=PROTECT(allocVector(INTSXP, nrow)); nprot++; R_srpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_sreads=PROTECT(allocVector(INTSXP, nrow)); nprot++; R_mprop=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_allmap=PROTECT(allocVector(INTSXP, nrow)); nprot++; for(i=0; i<nrow; ++i) { SET_STRING_ELT(R_rnames, i, mkChar(textItem(rnames, i))); REAL(R_mrpkms)[i]=arru(mrpkms, i, double); INTEGER(R_mreads)[i]=arru(mreads, i, int); REAL(R_srpkms)[i]=arru(srpkms, i, double); INTEGER(R_sreads)[i]=arru(sreads, i, int); REAL(R_mprop)[i]=arru(mprop, i, double); INTEGER(R_allmap)[i]=arru(allmap, i, int); } R_res=PROTECT(allocVector(VECSXP, MAND_NCOL+add_ncol-1)); nprot++; SET_VECTOR_ELT(R_res, 0, R_mrpkms); SET_VECTOR_ELT(R_res, 1, R_mreads); SET_VECTOR_ELT(R_res, 2, R_srpkms); SET_VECTOR_ELT(R_res, 3, R_sreads); SET_VECTOR_ELT(R_res, 4, R_mprop); SET_VECTOR_ELT(R_res, 5, R_allmap); for(i=0; i<add_ncol; ++i) { SEXP R_anno=NULL; R_anno=PROTECT(allocVector(STRSXP, nrow)); for(j=0; j<nrow; ++j) { anno=array(annos, j, Texta); if(arrayMax(anno)>i) { SET_STRING_ELT(R_anno, j, mkChar(textItem(anno, i))); } else { SET_STRING_ELT(R_anno, j, R_NaString); } } SET_VECTOR_ELT(R_res, i+MAND_NCOL-1, R_anno); // -1 because the first column is row name UNPROTECT(1); } PROTECT(R_colnames=allocVector(STRSXP, MAND_NCOL+add_ncol-1)); nprot++; PROTECT(R_class=allocVector(STRSXP, 1)); nprot++; SET_STRING_ELT(R_colnames, 0, mkChar("RPKM_MultiMap")); SET_STRING_ELT(R_colnames, 1, mkChar("ReadCount_MultiMap")); SET_STRING_ELT(R_colnames, 2, mkChar("RPKM_UniqMap")); SET_STRING_ELT(R_colnames, 3, mkChar("ReadCount_UniqMap")); SET_STRING_ELT(R_colnames, 4, mkChar("MultiProp")); SET_STRING_ELT(R_colnames, 5, mkChar("AllMappingReads")); for(i=0; i<add_ncol; ++i) { stringPrintf(str, "Annotation%d", i+1); SET_STRING_ELT(R_colnames, i+MAND_NCOL-1, mkChar(string(str))); } SET_STRING_ELT(R_class, 0, mkChar("data.frame")); setAttrib(R_res, install("names"), R_colnames); setAttrib(R_res, install("row.names"), R_rnames); setAttrib(R_res, install("class"), R_class); for(i=0; i<nrow; ++i) { textDestroy(array(annos, i, Texta)); } arrayDestroy(annos); arrayDestroy(rnames); arrayDestroy(mrpkms); arrayDestroy(mreads); arrayDestroy(srpkms); arrayDestroy(sreads); arrayDestroy(mprop); arrayDestroy(allmap); stringDestroy(str); ls_destroy(ls); UNPROTECT(nprot); return(R_res); }
int main (int argc, char *argv[]) { Array kgXrefs; Stringa buffer; LineStream ls; int count=0; char* geneSymbolTranscript; char* descriptionTranscript; char* line; char* exonID = NULL; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; buffer = stringCreate (100); stringPrintf (buffer,"%s/%s", confp_get(conf, "ANNOTATION_DIR"), confp_get(conf, "KNOWN_GENE_XREF_FILENAME")); kgXrefs = util_readKnownGeneXrefs (string (buffer)); arraySort (kgXrefs,(ARRAYORDERF)sortKgXrefsByTranscriptName); stringDestroy (buffer); // gfr_init ("-"); ls = ls_createFromFile("-"); while (line = ls_nextLine(ls)) { char *lineP = hlr_strdup(line); WordIter w = wordIterCreate( line, "\t", 0); char *nameTranscript = wordNext( w ); char *p = rindex(nameTranscript, '_'); if (p) { exonID = hlr_strdup( p+1 ); *p='\0'; } transcript2geneSymbolAndGeneDescription(kgXrefs, nameTranscript, &geneSymbolTranscript, &descriptionTranscript); if (exonID) { printf("%s_%s\t%s\t%s\t%s", nameTranscript, exonID, geneSymbolTranscript, exonID, descriptionTranscript); hlr_free(exonID); } else { printf("%s\t%s\t1\t%s", nameTranscript, geneSymbolTranscript, descriptionTranscript); } printf("%s\n", lineP+strlen(nameTranscript)); count++; hlr_free(lineP); wordIterDestroy(w); } ls_destroy (ls); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
static void generateOutput (char* prefix, char* typeSelected, int minNum) { GfrEntry *currGE; Stringa buffer; char *pos; puts ("<html>"); puts ("<head>"); puts ("<title>Results - Gene Fusions</title>"); html_printGenericStyleSheet (12); puts ("</head>"); puts ("<body>"); if (prefix[0] == '\0') { die ("Invalid prefix"); } printf ("<h1>Results - %s</h1><br><br><br>",prefix); buffer = stringCreate(50); //Chromosome expression, if present LineStream ls; char* chrSignal=NULL; stringPrintf(buffer, "ls -1 %s/BGRS/%s_chr*.bgr.gz 2> /dev/null", confp_get(Conf, "WEB_DATA_DIR"), prefix); ls = ls_createFromPipe(string(buffer)); int countCol = 0; puts ("Expression signal: "); fflush(stdout); while( chrSignal = ls_nextLine(ls)) { char* chrTmp = stringBetween( prefix, ".bgr.gz", chrSignal ); chrTmp++; printf ("[<a href=%s&hgt.customText=%s/BGRS/%s_%s.bgr.gz target='blank'>%s</a>] ", htmlLinker_generateLinkToGenomeBrowserAtUCSC("hg18","vertebrate","human", chrTmp, confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"), 50000000 + confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")), confp_get(Conf, "WEB_DATA_LINK"), prefix, chrTmp, chrTmp); if (countCol > 10) { puts( "<BR>" ); countCol=0; } countCol++; } if( countCol==0) puts( "No data available yet" ); ls_destroy(ls); puts ("<br><br>"); puts ("For a definition of SPER, DASPER and RESPER see <a href=http://rnaseq.gersteinlab.org/fusionseq/>FusionSeq</a>"); puts ("<br><br>"); puts ("<br><table border=0 width=100% align=center cellpadding=10>"); puts ("<tr align=left>"); puts ("<th>SPER</th>"); puts ("<th>DASPER</th>"); puts ("<th>RESPER</th>"); puts ("<th>Number of inter paired-end reads</th>"); puts ("<th>Type</th>"); puts ("<th>Genomic coordinates</th>"); puts ("<th>Gene symbol</th>"); puts ("<th>Description</th>"); puts ("<th>Genomic coordinates</th>"); puts ("<th>Gene symbol</th>"); puts ("<th>Description</th>"); puts ("<th></th>"); puts ("</tr>"); fflush(stdout); stringPrintf (buffer,"%s/%s.gfr", confp_get(Conf, "WEB_DATA_DIR"), prefix); gfr_init (string (buffer)); int countElements = 0; while (currGE = gfr_nextEntry ()) { if (currGE->numInter < minNum) { continue; } if (strEqual (typeSelected,"all") || strEqual (currGE->fusionType,typeSelected) || ( strEqual(currGE->fusionType,"cis") && strEqual( typeSelected,"same") ) || ( strEqual(currGE->fusionType,"read-through") && strEqual( typeSelected,"same") ) ) { if (pos = strchr (currGE->descriptionTranscript1,'|')) { *pos = '\0'; } if (pos = strchr (currGE->descriptionTranscript2,'|')) { *pos = '\0'; } puts ("<tr>"); printf ("<td align=left>%1.3f</td>\n",currGE->SPER); printf ("<td align=left>%1.3f</td>\n",currGE->DASPER); printf ("<td align=left>%1.3f</td>\n",currGE->RESPER); printf ("<td align=left>%d</td>\n",currGE->numInter); printf ("<td align=left>%s</td>\n",currGE->fusionType); printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human", currGE->chromosomeTranscript1, currGE->startTranscript1 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")), currGE->endTranscript1 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))), currGE->chromosomeTranscript1,currGE->startTranscript1,currGE->endTranscript1); printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript1)); printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript1); printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human", currGE->chromosomeTranscript2, currGE->startTranscript2 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")), currGE->endTranscript2 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))), currGE->chromosomeTranscript2,currGE->startTranscript2,currGE->endTranscript2); printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript2)); printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript2); printf ("<td align=left><a href=%s/showDetails_cgi?%s+%s>Details</a></td>\n", confp_get(Conf, "WEB_URL_CGI"), prefix,currGE->id); puts ("</tr>"); countElements++; } } gfr_deInit (); stringDestroy (buffer); puts ("</table><br><br>"); if( countElements == 0) puts("No fusion candidates can be found satisfying all specified criteria."); puts ("</body>"); puts ("</html>"); fflush (stdout); }
int main (int argc, char **argv) { LineStream ls; Texta tokens = NULL; char *line; int hasQual = 0; int hasSeqs = 0; int start=1; ls = ls_createFromFile ("-"); while (line = ls_nextLine (ls)) { // Put all the lines of the SAM header in comments if (line[0] == '@') { printf ("# %s\n", line); continue; } // Parse each SAM entry and store into array tokens = textFieldtokP (line, "\t"); if (arrayMax (tokens) < 11) { textDestroy( tokens ); ls_destroy (ls); die ("Invalid SAM entry: %s", line); } SamEntry *currSamE = NULL; SamEntry *mateSamE = NULL; AllocVar(currSamE ); int ret = generateSamEntry( tokens, currSamE, &hasSeqs, &hasQual ); textDestroy( tokens ); if ( ret==0 ) { if ( isPaired ( currSamE ) ) ls_nextLine( ls ); // discarding next entry too (the mate) destroySamEntry( currSamE ); freeMem( currSamE ); continue; } if ( isPaired( currSamE ) ) { int hasQual2, hasSeq2; AllocVar( mateSamE ); Texta secondEnd = NULL; secondEnd = textFieldtok (ls_nextLine( ls ) , "\t"); ret = generateSamEntry( secondEnd, mateSamE, &hasSeq2, &hasQual2 ); textDestroy( secondEnd ); if( ret == 0 ) { destroySamEntry( currSamE ); destroySamEntry( mateSamE ); freeMem( currSamE ); freeMem( mateSamE ); continue; } if (strcmp (currSamE->qname, mateSamE->qname) != 0) { die ("Please note that for paired-end data, sam2mrf requires the mate pairs to be on subsequent lines. You may want to sort the SAM file first.\nEx: sort -r file.sam | sam2mrf > file.mrf\n"); } } // Print MRF headers if( start ) { printf ("%s", MRF_COLUMN_NAME_BLOCKS); if (hasSeqs) printf("\t%s", MRF_COLUMN_NAME_SEQUENCE); if (hasQual) printf("\t%s", MRF_COLUMN_NAME_QUALITY_SCORES); printf ("\t%s\n", MRF_COLUMN_NAME_QUERY_ID); start=0; } // Print AlignmentBlocks printMrfAlignBlocks (currSamE, R_FIRST); if( isPaired ( currSamE ) ) { printf ("|"); printMrfAlignBlocks (mateSamE, R_SECOND); } seq_init(); // Print Sequence if (hasSeqs) { if (!currSamE->seq) die ("Entry missing sequence column\n"); if( currSamE->flags & S_QUERY_STRAND ) seq_reverseComplement( currSamE->seq, strlen(currSamE->seq)); printf ("\t%s", currSamE->seq); if (mateSamE) { if (!mateSamE->seq) die ("Entry missing sequence column\n"); if( mateSamE->flags & S_MATE_STRAND ) seq_reverseComplement( mateSamE->seq, strlen(mateSamE->seq)); printf ("|%s", mateSamE->seq); } } // Print quality scores if (hasQual) { if (!currSamE->qual) die ("Entry missing quality scores column\n"); printf ("\t%s", currSamE->qual); if (mateSamE) { if (!mateSamE->qual) die ("Entry missing quality scores column\n"); printf ("|%s", mateSamE->qual); } } // Print queryID if (mateSamE) { printf ("\t%s|%s", currSamE->qname,"2"); // No need to print out both IDs, but need the pipe symbol for consistency } else { printf ("\t%s", currSamE->qname); } printf("\n"); destroySamEntry( currSamE ); freeMem( currSamE ); if( isPaired( currSamE ) ) { destroySamEntry ( mateSamE ); freeMem( mateSamE ); } } // clean up ls_destroy (ls); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { GfrEntry *currGE; BLEntry *currBLE; BLEntry currQuery; FILE *fp; char *line; int count; int countRemoved; int index; WordIter w; Array blackList = arrayCreate(20, BLEntry); if (argc != 2) { usage ("%s <blackList.txt>",argv[0]); } fp = fopen( argv[1], "r" ); if( !fp ) die("Unable to open file: %s", argv[1]); // reading blacklist file LineStream ls = ls_createFromFile( argv[1] ); while( line = ls_nextLine(ls) ) { w = wordIterCreate( line, "\t", 1); currBLE = arrayp( blackList, arrayMax(blackList), BLEntry); currBLE->gene1 = hlr_strdup ( wordNext(w) ); currBLE->gene2 = hlr_strdup ( wordNext(w) ); wordIterDestroy(w); } fclose(fp); arraySort( blackList, (ARRAYORDERF) sortBlackListByName1); // beginFiltering count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { // reading the gfr // creating a new query to the black list currQuery.gene1 = currGE->geneSymbolTranscript1; currQuery.gene2 = currGE->geneSymbolTranscript2; // searching against read_1/read_2 int res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1); if( !res ) { // not found, then searching against read_2/read_1 currQuery.gene1 = currGE->geneSymbolTranscript2; currQuery.gene2 = currGE->geneSymbolTranscript1; res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1 ); if( !res ) { // not found, write the instance to stdout, update the counts puts (gfr_writeGfrEntry (currGE)); count++; } else { // found: read2/read1 countRemoved++; } } else { //found: read1/read2 countRemoved++; } } gfr_deInit (); arrayDestroy( blackList ); warn ("%s_BlackListFilter: %s",argv[0], argv[1]); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; BLEntry *currBLE; BLEntry currQuery; FILE *fp; char *line; int count; int countRemoved; int index; WordIter w; Array blackList = arrayCreate(20, BLEntry); config *Conf; if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH")); return EXIT_FAILURE; } if( confp_get( Conf, "ANNOTATION_DIR")==NULL ) { die("%s:\tCannot find ANNOTATION_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( Conf, "BLACKLIST_FILENAME")==NULL ) { die("%s:\tCannot find BLACKLIST_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } Stringa buffer=stringCreate( 100 ); stringPrintf( buffer, "%s/%s", confp_get( Conf, "ANNOTATION_DIR"), confp_get( Conf, "BLACKLIST_FILENAME") ); /* fp = fopen( string( buffer ), "r" ); if( !fp ) die("Unable to open file: %s", string(buffer)); stringDestroy( buffer ); */ // reading blacklist file LineStream ls = ls_createFromFile( string(buffer) ); while( line = ls_nextLine(ls) ) { w = wordIterCreate( line, "\t", 1); currBLE = arrayp( blackList, arrayMax(blackList), BLEntry); currBLE->gene1 = hlr_strdup ( wordNext(w) ); currBLE->gene2 = hlr_strdup ( wordNext(w) ); wordIterDestroy(w); } //fclose(fp); ls_destroy( ls ); stringDestroy( buffer ); arraySort( blackList, (ARRAYORDERF) sortBlackListByName1); // beginFiltering count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { // reading the gfr if( currGE->geneSymbolTranscript1 == NULL ) { die("Gene symbols are not present in the GFR file. Please run gfrAddInfo before gfrBlackListFilter."); return EXIT_FAILURE; } // creating a new query to the black list currQuery.gene1 = currGE->geneSymbolTranscript1; currQuery.gene2 = currGE->geneSymbolTranscript2; if( strEqual( currQuery.gene1 , currQuery.gene2 ) ) { countRemoved++; continue; } // searching against read_1/read_2 int res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1); if( !res ) { // not found, then searching against read_2/read_1 currQuery.gene1 = currGE->geneSymbolTranscript2; currQuery.gene2 = currGE->geneSymbolTranscript1; res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1 ); if( !res ) { // not found, write the instance to stdout, update the counts puts (gfr_writeGfrEntry (currGE)); count++; } else { // found: read2/read1 countRemoved++; } } else { //found: read1/read2 countRemoved++; } } gfr_deInit (); arrayDestroy( blackList ); warn ("%s_BlackListFilter: %s",argv[0], confp_get( Conf, "BLACKLIST_FILENAME")); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close( Conf); return 0; }
void incl_getExonHlightFile (FILE *fp, Array regions, char *sdata_dir) { LineStream src; FILE *out; char *line; Texta entry; int i, astart, aend; Stringa buffer = stringCreate (50); stringPrintf (buffer, "%s/tmp/exons.hlight_s.txt", sdata_dir); if (!(out = fopen (string (buffer), "w"))) { fprintf (stderr, "Cannot open exons.hlight_s.txt\n"); return; } SRegion_t *tmp; tmp = arrayp (regions, 0, SRegion_t); if (tmp->chromosome == 0) { fprintf (fp, "file = %s/exons.hlight.txt\n", sdata_dir); } else { for (i = 0; i < arrayMax (regions); i++) { tmp = arrayp (regions, i, SRegion_t); if (tmp->chromosome == 23) { stringPrintf (buffer, "%s/X/exons.hlight.txt", sdata_dir); } else if (tmp->chromosome == 24) { stringPrintf (buffer, "%s/Y/exons.hlight.txt", sdata_dir); } else { stringPrintf (buffer, "%s/%i/exons.hlight.txt", sdata_dir, tmp->chromosome); } if ((src = ls_createFromFile (string (buffer))) == NULL) { fprintf (stderr, "Cannot open exons.hlight.txt\n"); return; } while ((line = ls_nextLine (src)) != NULL) { entry = textFieldtokP (line, " "); astart = atoi (textItem (entry, 1)); aend = atoi (textItem (entry, 2)); if ((astart >= tmp->start && astart <= tmp->end) || (aend >= tmp->start && aend <= tmp->end)) { fprintf (out, "%s\n", line); } textDestroy (entry); } } fprintf (fp, "file = %s/tmp/exons.hlight_s.txt\n", sdata_dir); } stringDestroy (buffer); ls_destroy (src); fclose (out); }