Ejemplo n.º 1
0
int main (int argc, char *argv[])
{ 
	LineStream ls;
	char *line;
	char *pos;
	Stringa buffer;

	if (argc != 2) {
		usage ("%s <file.intraOffsets>");
	}

	TH1 *his = new TH1D ("","Intra-read distribution",1000,0,1000);
	TCanvas *canv = new TCanvas("","canvas",1200,400);
	ls = ls_createFromFile (argv[1]);
	while (line = ls_nextLine (ls)) {
		his->Fill (atoi (line));
	}
	ls_destroy (ls);
	his->Draw();
	his->GetXaxis()->SetLabelSize (0.04);
	his->GetYaxis()->SetLabelSize (0.04);
	buffer = stringCreate (100);
	pos = strchr (argv[1],'.');
	if (pos == NULL) {
		die ("Expected <file.intraOffsets>: %s",argv[1]);
	}
	*pos = '\0';
	stringPrintf (buffer,"%s_intraDistribution.jpg",argv[1]);
	canv->Print (string (buffer),"jpg");
	stringDestroy (buffer);
	return 0;
}
Ejemplo n.º 2
0
static Fastq* fastq_processNextSequence (int freeMemory, int truncateName)
{
  char *line;
  static Fastq* currFQ = NULL;
  int count;
  Seq* currSeq = NULL;

  if (ls_isEof (lsFastq)) {
    if (freeMemory) {
      fastq_freeFastq (currFQ);
    }
    return NULL;
  }
  count = 0;
  while ( (line=ls_nextLine (lsFastq)) && (count<4) ) {
    if (line[0] == '\0') {
      continue;
    }
    if (line[0] == '@') {      
      if (freeMemory) {
	fastq_freeFastq (currFQ);
      }
      count++;
      AllocVar (currFQ);
      AllocVar (currFQ->seq);
      currSeq = currFQ->seq;
      currSeq->name = hlr_strdup (line + 1);
      if (truncateName) {
	currSeq->name = firstWordInLine (skipLeadingSpaces (currSeq->name));
      }
      line = ls_nextLine (lsFastq); // reading sequence
      currSeq->sequence = hlr_strdup ( line );
      currSeq->size = strlen (currSeq->sequence);
      count++;
      line = ls_nextLine (lsFastq); // reading quality ID
      if( line[0] != '+' )
	die("Expected quality ID: '+' or '+%s'", currSeq->name );
      count++;
      line = ls_nextLine (lsFastq); // reading quality
      currFQ->quality = hlr_strdup( line );
      count++;
    } 
  }   
  ls_back (lsFastq,1);
  return currFQ;
}
Ejemplo n.º 3
0
void pr3p_run (LineStream ls) {
  /**
     Parse the primer3 output and call the registered functions.<br>
     Postcondition: the functions registered have been called
     @param[in] ls - input line stream with prime output
  */
  int goOn = 1;
  char *line;

  while ((line = ls_nextLine (ls)) != NULL) {
    if (strStartsWithC (line,"# EPRIMER3 RESULTS FOR ")) {
      char name[100];

      sscanf (line+23,"%99s",name);
      if (goOn && sequence_hook != NULL)
        goOn = (*sequence_hook) (name);
    }
    else if (strStartsWithC (line,"# CLONINGPRIMERS RESULTS FOR ")) {
      char name[100];

      sscanf (line+29,"%99s",name);
      if (goOn && sequence_hook != NULL)
        goOn = (*sequence_hook) (name);
    }
    else if (line[0] == '#' || line[0] == '\0')
      continue;
    else if (strstr (line,"PRODUCT SIZE:")) {
      int num,len;

      if (sscanf (line,"%d %*s %*s %d",&num,&len) != 2)
        die ("primer3parser: format error on line %s",line);
      if (goOn && product_hook != NULL)
        goOn = (*product_hook) (num,len);
    }
    else if (strstr (line,"FORWARD PRIMER") || strstr (line,"REVERSE PRIMER") ||
             strstr (line,"INTERNAL OLIGO")) {
      int start,len;
      float tm,gc;
      char seq[101];

      if (sscanf (line,"%*s %*s %d %d %f %f %100s",
                  &start,&len,&tm,&gc,seq) != 5)
        die ("primer3parser: format error on line %s",line);
      if (strstr (line,"FORWARD PRIMER")) {
        if (goOn && forward_hook != NULL)
          goOn = (*forward_hook) (start,start+len-1,seq,gc,tm);
      }
      else if (strstr (line,"REVERSE PRIMER")) {
        if (goOn && reverse_hook != NULL)
          goOn = (*reverse_hook) (start,start+len-1,seq,gc,tm);
      }
      else if (strstr (line,"INTERNAL OLIGO")) {
        if (goOn && internal_hook != NULL)
          goOn = (*internal_hook) (start,start+len-1,seq,gc,tm);
      }
    }
  }
}
Ejemplo n.º 4
0
/**
 * Returns a pointer to next ElandQuery. 
 * @pre The module has been initialized using elandParser_init().
 * Parse entries of the following format:
   \verbatim
   >FC30H5TAA_100308:2:1:1647:1161	GCTTACATTTTTCCTCTCTACATTATC	U0	1	0	0	chr17.fa	8466296	F	..
   >FC30H5TAA_100308:2:1:1588:122	GAGTTAGCCTTGGGACCCCTACTTCTT	U0	1	0	0	chr3.fa	61525628	F	..
   >FC30H5TAA_100308:2:1:1642:123	GGTGAGAGCCGCGACGGGCTTTAGGCG	NM	0	0	0
   >FC30H5TAA_100308:2:1:1630:119	CCGCCATTGCCAGCCCCCAGCTGACGG	R2	0	0	2
   >FC30H5TAA_100308:2:1:1603:120	GCAAGATGAAGTGAAAGGTAAAGAATC	U1	0	1	1	chrM.fa	15277	R	..	26A
   \endverbatim
 */
ElandQuery* elandParser_nextQuery (void)
{
  WordIter w;
  char *line,*token,*pos;
  static ElandQuery *currElandQuery = NULL;
   
  while (line = ls_nextLine (ls)) {
    if (line[0] == '\0') {
      continue;
    }
    elandParser_freeQuery (currElandQuery);
    currElandQuery = NULL;
    AllocVar (currElandQuery);
    w = wordIterCreate (line,"\t",0);
    currElandQuery->sequenceName = hlr_strdup (wordNext (w) + 1); // remove the '>' character at beginning of the line
    currElandQuery->sequence = hlr_strdup (wordNext (w));
    currElandQuery->matchCode = hlr_strdup (wordNext (w));
    if (strEqual (currElandQuery->matchCode,"QC")) {
      wordIterDestroy (w);
      return currElandQuery;
    }
    currElandQuery->exactMatches = atoi (wordNext (w));
    currElandQuery->oneErrorMatches = atoi (wordNext (w));
    currElandQuery->twoErrorMatches = atoi (wordNext (w));
    token = wordNext (w);
    if (token == NULL) {
      wordIterDestroy (w);
      return currElandQuery;
    }
    if (!(pos = strchr (token,'.'))) {
      die ("Expected '.' in chromosome name: %s",token);
    }
    *pos = '\0';
    currElandQuery->chromosome = hlr_strdup (pos + 1);
    currElandQuery->position = atoi (wordNext (w));
    token = wordNext (w);
    if (token[0] == 'F') {
      currElandQuery->strand = '+'; 
    }
    else if (token[0] == 'R') {
      currElandQuery->strand = '-'; 
    } 
    wordIterDestroy (w);
    return currElandQuery;
  }
  elandParser_freeQuery (currElandQuery);
  currElandQuery = NULL;
  return currElandQuery;
}
Ejemplo n.º 5
0
/**
 * Get the next BlastQuery.
 * @pre The module has been initialized using blastParser_init().
 */
BlastQuery* blastParser_nextQuery (void)
{
    char *line,*pos;
    static char *queryName = NULL;
    static char *prevBlastQueryName = NULL;
    static BlastQuery *currBlastQuery = NULL;
    int first;

    if (!ls_isEof (ls)) {
        blastParser_freeQuery (currBlastQuery);
        currBlastQuery = NULL;
        AllocVar (currBlastQuery);
        currBlastQuery->entries = arrayCreate (5,BlastEntry);
        first = 1;
        while (line = ls_nextLine (ls)) {
            if (line[0] == '\0') {
                continue;
            }
            pos = strchr (line,'\t');
            *pos = '\0';
            strReplace (&queryName,line);
            if (first == 1 || strEqual (prevBlastQueryName,queryName)) {
                blastParser_processLine (pos + 1,currBlastQuery);
            }
            else {
                ls_back (ls,1);
                return currBlastQuery;
            }
            if (first == 1) {
                currBlastQuery->qName = hlr_strdup (queryName);
                first = 0;
            }
            strReplace(&prevBlastQueryName,queryName);
        }
        if (first == 1) {
            return NULL;
        }
        else {
            return currBlastQuery;
        }
    }
    blastParser_freeQuery (currBlastQuery);
    currBlastQuery = NULL;
    return NULL;
}
Ejemplo n.º 6
0
SEXP c_read_biokit_exprs (SEXP filename) {
  LineStream ls;
  char* line;
  const int MAND_NCOL=7; // the first column is the row name, and column 2-7 are mandatory
  int add_ncol=0;
  Texta it;
  Texta rnames=textCreate(128);
  Array mrpkms=arrayCreate(128, double);
  Array mreads=arrayCreate(128, int);
  Array srpkms=arrayCreate(128, double);
  Array sreads=arrayCreate(128, int);
  Array mprop=arrayCreate(128, double);
  Array allmap = arrayCreate(128, int);
  Array annos=arrayCreate(128, Texta);
  Texta anno=NULL; // must have a NULL assigned; otherwise textCreateClear leads to memory error
  Stringa str=stringCreate(8);

  SEXP R_rnames, R_mrpkms, R_mreads, R_srpkms, R_sreads, R_mprop, R_allmap, R_res;
  SEXP R_colnames, R_class;
  
  int nprot=0;
  int i=0;
  int j=0;
  int nrow=0;
  const char* fn=CHAR(STRING_ELT(filename, 0));
  ls = ls_createFromFile(strdup(fn));

  ls_nextLine(ls); // skip the first header line
  while(line = ls_nextLine(ls)) {
    it = textFieldtokP(line, "\t");
    if(arrayMax(it)<MAND_NCOL)
      error("Input file must contain no less than %d columns", MAND_NCOL);

    textAdd(rnames, textItem(it, 0));
    array(mrpkms, arrayMax(mrpkms), double)=atof(textItem(it, 1));
    array(mreads, arrayMax(mreads), int)=atoi(textItem(it, 2));
    array(srpkms, arrayMax(srpkms), double)=atof(textItem(it, 3));
    array(sreads, arrayMax(sreads), int)=atoi(textItem(it, 4));
    array(mprop, arrayMax(mprop), double)=atof(textItem(it, 5));
    array(allmap, arrayMax(allmap), int)=atoi(textItem(it, 6));

    add_ncol = max(arrayMax(it)-MAND_NCOL, add_ncol);
    textCreateClear(anno, arrayMax(it)-MAND_NCOL);
    for(i=MAND_NCOL; i<arrayMax(it);  ++i) {
      textAdd(anno, textItem(it, i));
    }
    array(annos, arrayMax(annos), Texta)=textClone(anno);
    nrow++;
  }

  R_rnames=PROTECT(allocVector(STRSXP, nrow)); nprot++;
  R_mrpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_mreads=PROTECT(allocVector(INTSXP, nrow)); nprot++;
  R_srpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_sreads=PROTECT(allocVector(INTSXP, nrow)); nprot++;
  R_mprop=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_allmap=PROTECT(allocVector(INTSXP, nrow)); nprot++;

  for(i=0; i<nrow; ++i) {
    SET_STRING_ELT(R_rnames, i, mkChar(textItem(rnames, i)));
    REAL(R_mrpkms)[i]=arru(mrpkms, i, double);
    INTEGER(R_mreads)[i]=arru(mreads, i, int);
    REAL(R_srpkms)[i]=arru(srpkms, i, double);
    INTEGER(R_sreads)[i]=arru(sreads, i, int);
    REAL(R_mprop)[i]=arru(mprop, i, double);
    INTEGER(R_allmap)[i]=arru(allmap, i, int);
  }

  R_res=PROTECT(allocVector(VECSXP, MAND_NCOL+add_ncol-1)); nprot++;
  SET_VECTOR_ELT(R_res, 0, R_mrpkms);
  SET_VECTOR_ELT(R_res, 1, R_mreads);
  SET_VECTOR_ELT(R_res, 2, R_srpkms);
  SET_VECTOR_ELT(R_res, 3, R_sreads);
  SET_VECTOR_ELT(R_res, 4, R_mprop);
  SET_VECTOR_ELT(R_res, 5, R_allmap);
  for(i=0; i<add_ncol; ++i) {
    SEXP R_anno=NULL;
    R_anno=PROTECT(allocVector(STRSXP, nrow));
    for(j=0; j<nrow; ++j) {
      anno=array(annos, j, Texta);
      if(arrayMax(anno)>i) {
         SET_STRING_ELT(R_anno, j, mkChar(textItem(anno, i)));
      } else {
         SET_STRING_ELT(R_anno, j, R_NaString);
      }
    }
    SET_VECTOR_ELT(R_res, i+MAND_NCOL-1, R_anno); // -1 because the first column is row name
    UNPROTECT(1);
  }

  PROTECT(R_colnames=allocVector(STRSXP, MAND_NCOL+add_ncol-1)); nprot++;
  PROTECT(R_class=allocVector(STRSXP, 1)); nprot++;
  SET_STRING_ELT(R_colnames, 0, mkChar("RPKM_MultiMap"));
  SET_STRING_ELT(R_colnames, 1, mkChar("ReadCount_MultiMap"));
  SET_STRING_ELT(R_colnames, 2, mkChar("RPKM_UniqMap"));
  SET_STRING_ELT(R_colnames, 3, mkChar("ReadCount_UniqMap"));
  SET_STRING_ELT(R_colnames, 4, mkChar("MultiProp"));
  SET_STRING_ELT(R_colnames, 5, mkChar("AllMappingReads"));
  for(i=0; i<add_ncol; ++i) {
    stringPrintf(str, "Annotation%d", i+1);
    SET_STRING_ELT(R_colnames, i+MAND_NCOL-1,
                   mkChar(string(str)));
  }
  SET_STRING_ELT(R_class, 0, mkChar("data.frame"));
  setAttrib(R_res, install("names"), R_colnames);
  setAttrib(R_res, install("row.names"), R_rnames);
  setAttrib(R_res, install("class"), R_class);

  for(i=0; i<nrow; ++i) {
    textDestroy(array(annos, i, Texta));
  }
  arrayDestroy(annos);
  arrayDestroy(rnames);
  arrayDestroy(mrpkms);
  arrayDestroy(mreads);
  arrayDestroy(srpkms);
  arrayDestroy(sreads);
  arrayDestroy(mprop);
  arrayDestroy(allmap);
  stringDestroy(str);

  ls_destroy(ls);
  UNPROTECT(nprot);
  return(R_res);
}
Ejemplo n.º 7
0
int main (int argc, char *argv[])
{
	Array kgXrefs;
	Stringa buffer;
	LineStream ls;
	int count=0;
	char* geneSymbolTranscript;
	char* descriptionTranscript;
	char* line;
	char* exonID = NULL;

	config *conf;

	if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL)
		return EXIT_FAILURE;

	buffer = stringCreate (100);

	stringPrintf (buffer,"%s/%s",
		      confp_get(conf, "ANNOTATION_DIR"),
		      confp_get(conf, "KNOWN_GENE_XREF_FILENAME"));
	kgXrefs = util_readKnownGeneXrefs (string (buffer));
	arraySort (kgXrefs,(ARRAYORDERF)sortKgXrefsByTranscriptName);
	stringDestroy (buffer);

	//  gfr_init ("-");
	 ls = ls_createFromFile("-");
  
	while (line = ls_nextLine(ls)) {
		char *lineP = hlr_strdup(line);
		WordIter w = wordIterCreate( line, "\t", 0);
		char *nameTranscript = wordNext( w );
		char *p = rindex(nameTranscript, '_');
		if (p) {
			exonID = hlr_strdup( p+1 );
			*p='\0';
		}
		transcript2geneSymbolAndGeneDescription(kgXrefs,
							nameTranscript,
							&geneSymbolTranscript,
							&descriptionTranscript);
		if (exonID) {
			printf("%s_%s\t%s\t%s\t%s", 
				nameTranscript, 
				exonID,
				geneSymbolTranscript, 
				exonID, 
				descriptionTranscript);
			hlr_free(exonID);
		} else {
			printf("%s\t%s\t1\t%s", 
				nameTranscript, 
			
	geneSymbolTranscript, 
				descriptionTranscript);
		}
		printf("%s\n", lineP+strlen(nameTranscript));
		count++;
		hlr_free(lineP);
		wordIterDestroy(w);
	}
	ls_destroy (ls);
	warn ("%s_numGfrEntries: %d",argv[0],count);
	confp_close(conf);

	return EXIT_SUCCESS;
}
Ejemplo n.º 8
0
static void generateOutput (char* prefix, char* typeSelected, int minNum)
{
  GfrEntry *currGE;
  Stringa buffer;
  char *pos;

  puts ("<html>");
  puts ("<head>");
  puts ("<title>Results - Gene Fusions</title>");
  html_printGenericStyleSheet (12);
  puts ("</head>");
  puts ("<body>");
  if (prefix[0] == '\0') {
    die ("Invalid prefix");
  }
  printf ("<h1>Results - %s</h1><br><br><br>",prefix);

  buffer = stringCreate(50);
  //Chromosome expression, if present
  LineStream ls;
  char* chrSignal=NULL;  
  stringPrintf(buffer, "ls -1 %s/BGRS/%s_chr*.bgr.gz 2> /dev/null", 
	       confp_get(Conf, "WEB_DATA_DIR"), 
	       prefix);
  ls = ls_createFromPipe(string(buffer));
  int countCol = 0;
  puts ("Expression signal: &nbsp;");
  fflush(stdout);
  while( chrSignal = ls_nextLine(ls)) {
        
	char* chrTmp = stringBetween( prefix, ".bgr.gz", chrSignal );
	chrTmp++;      
	printf ("[<a href=%s&hgt.customText=%s/BGRS/%s_%s.bgr.gz target='blank'>%s</a>]&nbsp;",
		htmlLinker_generateLinkToGenomeBrowserAtUCSC("hg18","vertebrate","human", chrTmp, 
			confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"), 
			50000000 + confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")),
			confp_get(Conf, "WEB_DATA_LINK"), 
			prefix, 
			chrTmp, 
			chrTmp); 
	if (countCol > 10) {
	  puts( "<BR>" );
	  countCol=0;
	}
	countCol++;
  }
  if( countCol==0) puts( "No data available yet" );
  ls_destroy(ls);
  puts ("<br><br>");
  puts ("For a definition of SPER, DASPER and RESPER see <a href=http://rnaseq.gersteinlab.org/fusionseq/>FusionSeq</a>");
  puts ("<br><br>");
  puts ("<br><table border=0 width=100% align=center cellpadding=10>");
  puts ("<tr align=left>");
  puts ("<th>SPER</th>");
  puts ("<th>DASPER</th>");
  puts ("<th>RESPER</th>");
  puts ("<th>Number of inter paired-end reads</th>");
  puts ("<th>Type</th>");
  puts ("<th>Genomic coordinates</th>");
  puts ("<th>Gene symbol</th>");
  puts ("<th>Description</th>");
  puts ("<th>Genomic coordinates</th>");
  puts ("<th>Gene symbol</th>");
  puts ("<th>Description</th>");
  puts ("<th></th>");
  puts ("</tr>");
  fflush(stdout);

  stringPrintf (buffer,"%s/%s.gfr", confp_get(Conf, "WEB_DATA_DIR"), prefix);
  gfr_init (string (buffer));
  int countElements = 0;
  while (currGE = gfr_nextEntry ()) {
    if (currGE->numInter < minNum) {
      continue;
    }
    if (strEqual (typeSelected,"all") || strEqual (currGE->fusionType,typeSelected) || 
	( strEqual(currGE->fusionType,"cis") && strEqual( typeSelected,"same") ) ||
	( strEqual(currGE->fusionType,"read-through") && strEqual( typeSelected,"same") ) ) {
      if (pos = strchr (currGE->descriptionTranscript1,'|')) {
        *pos = '\0';
      }
      if (pos = strchr (currGE->descriptionTranscript2,'|')) {
        *pos = '\0';
      }
      puts ("<tr>");
      printf ("<td align=left>%1.3f</td>\n",currGE->SPER);
      printf ("<td align=left>%1.3f</td>\n",currGE->DASPER);
      printf ("<td align=left>%1.3f</td>\n",currGE->RESPER);
      printf ("<td align=left>%d</td>\n",currGE->numInter);
      printf ("<td align=left>%s</td>\n",currGE->fusionType);
      printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n",
              htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",
			currGE->chromosomeTranscript1,
			currGE->startTranscript1 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")),
			currGE->endTranscript1 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))),
     	      currGE->chromosomeTranscript1,currGE->startTranscript1,currGE->endTranscript1);
      printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript1));
      printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript1);
      printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n",
              htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",
		     	currGE->chromosomeTranscript2,
			currGE->startTranscript2 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")),
			currGE->endTranscript2 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))),
              currGE->chromosomeTranscript2,currGE->startTranscript2,currGE->endTranscript2);
      printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript2));
      printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript2);
      printf ("<td align=left><a href=%s/showDetails_cgi?%s+%s>Details</a></td>\n", confp_get(Conf, "WEB_URL_CGI"), prefix,currGE->id);
      puts ("</tr>");
      countElements++;
    }
  }
  gfr_deInit ();
  stringDestroy (buffer);
  puts ("</table><br><br>");
  if( countElements == 0) puts("No fusion candidates can be found satisfying all specified criteria.");
  puts ("</body>");
  puts ("</html>");
  fflush (stdout);
}
Ejemplo n.º 9
0
int main (int argc, char **argv)
{
  LineStream ls;
  Texta tokens = NULL;
  char *line;

  int hasQual = 0;
  int hasSeqs = 0;
  int start=1;
 
  ls = ls_createFromFile ("-");
  while (line = ls_nextLine (ls)) {
    // Put all the lines of the SAM header in comments
    if (line[0] == '@') {
      printf ("# %s\n", line);
      continue;
    }
    // Parse each SAM entry and store into array   
    tokens = textFieldtokP (line, "\t");
    if (arrayMax (tokens) < 11) {
      textDestroy( tokens );
      ls_destroy (ls);
      die ("Invalid SAM entry: %s", line);
    }
    SamEntry *currSamE = NULL;
    SamEntry *mateSamE = NULL;
    AllocVar(currSamE ); 

    int ret = generateSamEntry( tokens, currSamE, &hasSeqs, &hasQual );
    textDestroy( tokens );
    if ( ret==0 ) {
      if ( isPaired ( currSamE ) )
	ls_nextLine( ls ); // discarding next entry too (the mate)
      destroySamEntry( currSamE );
      freeMem( currSamE );
      continue;
    }   
    if ( isPaired( currSamE ) )   {
      int hasQual2, hasSeq2;
      AllocVar( mateSamE );
      Texta secondEnd = NULL;
      secondEnd = textFieldtok (ls_nextLine( ls ) , "\t");
      ret = generateSamEntry( secondEnd, mateSamE, &hasSeq2, &hasQual2 );
      textDestroy( secondEnd );
      if( ret == 0 ) {
	destroySamEntry( currSamE );
	destroySamEntry( mateSamE );
	freeMem( currSamE );
	freeMem( mateSamE );
	continue;
      }
      if (strcmp (currSamE->qname, mateSamE->qname) != 0) {
        die ("Please note that for paired-end data, sam2mrf requires the mate pairs to be on subsequent lines. You may want to sort the SAM file first.\nEx: sort -r file.sam | sam2mrf > file.mrf\n");
      }
    } 

    // Print MRF headers
    if( start ) {
      printf ("%s", MRF_COLUMN_NAME_BLOCKS);
      if (hasSeqs) printf("\t%s", MRF_COLUMN_NAME_SEQUENCE);
      if (hasQual) printf("\t%s", MRF_COLUMN_NAME_QUALITY_SCORES);
      printf ("\t%s\n", MRF_COLUMN_NAME_QUERY_ID);
      start=0;
    }
    
    // Print AlignmentBlocks   
    printMrfAlignBlocks (currSamE, R_FIRST);
    if( isPaired ( currSamE ) ) {  
      printf ("|");
      printMrfAlignBlocks (mateSamE, R_SECOND);
    }

    seq_init();
    // Print Sequence
    if (hasSeqs) {
      if (!currSamE->seq)
        die ("Entry missing sequence column\n");
      if( currSamE->flags & S_QUERY_STRAND )
	seq_reverseComplement( currSamE->seq, strlen(currSamE->seq));
      printf ("\t%s", currSamE->seq);
      if (mateSamE) {
        if (!mateSamE->seq)
          die ("Entry missing sequence column\n");
        if( mateSamE->flags & S_MATE_STRAND )
	  seq_reverseComplement( mateSamE->seq, strlen(mateSamE->seq));
	printf ("|%s", mateSamE->seq);
      }
    }
    // Print quality scores
    if (hasQual) {
      if (!currSamE->qual)
        die ("Entry missing quality scores column\n");
      printf ("\t%s", currSamE->qual);
      if (mateSamE) {
        if (!mateSamE->qual)
          die ("Entry missing quality scores column\n");
        printf ("|%s", mateSamE->qual);
      }
    }

    // Print queryID

    if (mateSamE) {
      printf ("\t%s|%s", currSamE->qname,"2"); // No need to print out both IDs, but need the pipe symbol for consistency
    }
    else {
      printf ("\t%s", currSamE->qname);
    }
    printf("\n");
    
    destroySamEntry( currSamE );
    freeMem( currSamE ); 
    if( isPaired( currSamE ) ) {
      destroySamEntry ( mateSamE );
      freeMem( mateSamE );
    }
  }
  // clean up
  ls_destroy (ls);
  return EXIT_SUCCESS;
}
Ejemplo n.º 10
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  BLEntry *currBLE;
  BLEntry currQuery;
  FILE *fp;
  char *line;
  int count;
  int countRemoved;
  
  int index;
  WordIter w;
  Array blackList = arrayCreate(20, BLEntry);

  if (argc != 2) {
    usage ("%s <blackList.txt>",argv[0]);
  }  
  fp = fopen( argv[1], "r" );
  
  if( !fp )  die("Unable to open file: %s", argv[1]);
  // reading blacklist file
  LineStream ls = ls_createFromFile( argv[1] );
  while( line = ls_nextLine(ls) ) {
    w = wordIterCreate( line, "\t", 1);
    currBLE = arrayp( blackList, arrayMax(blackList), BLEntry);
    currBLE->gene1 = hlr_strdup ( wordNext(w) );
    currBLE->gene2 = hlr_strdup ( wordNext(w) );    
    wordIterDestroy(w);
  }
  fclose(fp);
  arraySort( blackList, (ARRAYORDERF) sortBlackListByName1);

  // beginFiltering
  count = 0;
  countRemoved = 0;
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()) { // reading the gfr
    // creating a new query to the black list
    currQuery.gene1 = currGE->geneSymbolTranscript1;
    currQuery.gene2 = currGE->geneSymbolTranscript2;
    // searching against read_1/read_2
    int res = arrayFind( blackList, &currQuery, 
			 &index,  (ARRAYORDERF) sortBlackListByName1);  
    
    if( !res ) { // not found, then searching against read_2/read_1
      currQuery.gene1 = currGE->geneSymbolTranscript2;
      currQuery.gene2 = currGE->geneSymbolTranscript1;
      
      res =  arrayFind( blackList, &currQuery, 
			&index, (ARRAYORDERF) sortBlackListByName1 );
      
      if( !res ) { // not found, write the instance to stdout, update the counts
	puts (gfr_writeGfrEntry (currGE));
	count++;	
      } else { // found: read2/read1
	countRemoved++;
      }	
    } else { //found: read1/read2
      countRemoved++;
    }
  }	           
  gfr_deInit ();
  arrayDestroy( blackList );
  warn ("%s_BlackListFilter: %s",argv[0], argv[1]);
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);
  return 0;
}
Ejemplo n.º 11
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  BLEntry *currBLE;
  BLEntry currQuery;
  FILE *fp;
  char *line;
  int count;
  int countRemoved;
  
  int index;
  WordIter w;
  Array blackList = arrayCreate(20, BLEntry);
  config *Conf;

  if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) {
    die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH"));
    return EXIT_FAILURE;
  }
  if( confp_get( Conf, "ANNOTATION_DIR")==NULL ) {
    die("%s:\tCannot find ANNOTATION_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  if( confp_get( Conf, "BLACKLIST_FILENAME")==NULL ) {
    die("%s:\tCannot find BLACKLIST_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  Stringa buffer=stringCreate( 100 );
  stringPrintf( buffer, "%s/%s", confp_get( Conf, "ANNOTATION_DIR"), confp_get( Conf, "BLACKLIST_FILENAME") );
  /*  fp = fopen( string( buffer ), "r" );
  if( !fp )  die("Unable to open file: %s", string(buffer));
  stringDestroy( buffer );
  */ 
// reading blacklist file
  LineStream ls = ls_createFromFile( string(buffer) );
  while( line = ls_nextLine(ls) ) {
    w = wordIterCreate( line, "\t", 1);
    currBLE = arrayp( blackList, arrayMax(blackList), BLEntry);
    currBLE->gene1 = hlr_strdup ( wordNext(w) );
    currBLE->gene2 = hlr_strdup ( wordNext(w) );    
    wordIterDestroy(w);
  }
  //fclose(fp);
  ls_destroy( ls );
  stringDestroy( buffer );
  arraySort( blackList, (ARRAYORDERF) sortBlackListByName1);

  // beginFiltering
  count = 0;
  countRemoved = 0;
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()) { // reading the gfr
    if( currGE->geneSymbolTranscript1 == NULL ) {
      die("Gene symbols are not present in the GFR file. Please run gfrAddInfo before gfrBlackListFilter.");
      return EXIT_FAILURE;
    }
	
    // creating a new query to the black list
    currQuery.gene1 = currGE->geneSymbolTranscript1;
    currQuery.gene2 = currGE->geneSymbolTranscript2;
    if( strEqual( currQuery.gene1 , currQuery.gene2 ) ) {
	countRemoved++;
	continue;
      }
    // searching against read_1/read_2
    int res = arrayFind( blackList, &currQuery, 
			 &index,  (ARRAYORDERF) sortBlackListByName1);  
    
    if( !res ) { // not found, then searching against read_2/read_1
      currQuery.gene1 = currGE->geneSymbolTranscript2;
      currQuery.gene2 = currGE->geneSymbolTranscript1;
      
      res =  arrayFind( blackList, &currQuery, 
			&index, (ARRAYORDERF) sortBlackListByName1 );
      
      if( !res ) { // not found, write the instance to stdout, update the counts
	puts (gfr_writeGfrEntry (currGE));
	count++;	
      } else { // found: read2/read1
	countRemoved++;
      }	
    } else { //found: read1/read2
      countRemoved++;
    }
  }	           
  gfr_deInit ();
  arrayDestroy( blackList );
  warn ("%s_BlackListFilter: %s",argv[0], confp_get( Conf, "BLACKLIST_FILENAME"));
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);
  confp_close( Conf);
  return 0;
}
Ejemplo n.º 12
0
void incl_getExonHlightFile (FILE *fp, Array regions, char *sdata_dir)
{
  LineStream src;
  FILE *out;
  char *line;
  Texta entry;
  int i, astart, aend;

  Stringa buffer = stringCreate (50);

  stringPrintf (buffer, "%s/tmp/exons.hlight_s.txt", sdata_dir);
  if (!(out = fopen (string (buffer), "w"))) {
	fprintf (stderr, "Cannot open exons.hlight_s.txt\n");
	return;
  }

  SRegion_t *tmp;
  tmp = arrayp (regions, 0, SRegion_t);

  if (tmp->chromosome == 0) {
	fprintf (fp, "file = %s/exons.hlight.txt\n", sdata_dir);
  }
  else {
	for (i = 0; i < arrayMax (regions); i++) {
	  tmp = arrayp (regions, i, SRegion_t);

	  if (tmp->chromosome == 23) {
		stringPrintf (buffer, "%s/X/exons.hlight.txt", sdata_dir);
	  }
	  else if (tmp->chromosome == 24) {
		stringPrintf (buffer, "%s/Y/exons.hlight.txt", sdata_dir);
	  }
	  else {
		stringPrintf (buffer, "%s/%i/exons.hlight.txt", sdata_dir, tmp->chromosome);
	  }

	  if ((src = ls_createFromFile (string (buffer))) == NULL) {
		fprintf (stderr, "Cannot open exons.hlight.txt\n");
		return;
	  }

	  while ((line = ls_nextLine (src)) != NULL) {
		entry = textFieldtokP (line, " ");

		astart = atoi (textItem (entry, 1));
		aend   = atoi (textItem (entry, 2));

		if ((astart >= tmp->start && astart <= tmp->end) ||
			(aend >= tmp->start && aend <= tmp->end)) {
		  fprintf (out, "%s\n", line);
		}
		textDestroy (entry);
	  }
	}

	fprintf (fp, "file = %s/tmp/exons.hlight_s.txt\n", sdata_dir);
  }

  stringDestroy (buffer);
  ls_destroy (src);
  fclose (out);
}