Exemplo n.º 1
0
int main (int argc, char *argv[])
{ 
	LineStream ls;
	char *line;
	char *pos;
	Stringa buffer;

	if (argc != 2) {
		usage ("%s <file.intraOffsets>");
	}

	TH1 *his = new TH1D ("","Intra-read distribution",1000,0,1000);
	TCanvas *canv = new TCanvas("","canvas",1200,400);
	ls = ls_createFromFile (argv[1]);
	while (line = ls_nextLine (ls)) {
		his->Fill (atoi (line));
	}
	ls_destroy (ls);
	his->Draw();
	his->GetXaxis()->SetLabelSize (0.04);
	his->GetYaxis()->SetLabelSize (0.04);
	buffer = stringCreate (100);
	pos = strchr (argv[1],'.');
	if (pos == NULL) {
		die ("Expected <file.intraOffsets>: %s",argv[1]);
	}
	*pos = '\0';
	stringPrintf (buffer,"%s_intraDistribution.jpg",argv[1]);
	canv->Print (string (buffer),"jpg");
	stringDestroy (buffer);
	return 0;
}
Exemplo n.º 2
0
SEXP c_read_biokit_exprs (SEXP filename) {
  LineStream ls;
  char* line;
  const int MAND_NCOL=7; // the first column is the row name, and column 2-7 are mandatory
  int add_ncol=0;
  Texta it;
  Texta rnames=textCreate(128);
  Array mrpkms=arrayCreate(128, double);
  Array mreads=arrayCreate(128, int);
  Array srpkms=arrayCreate(128, double);
  Array sreads=arrayCreate(128, int);
  Array mprop=arrayCreate(128, double);
  Array allmap = arrayCreate(128, int);
  Array annos=arrayCreate(128, Texta);
  Texta anno=NULL; // must have a NULL assigned; otherwise textCreateClear leads to memory error
  Stringa str=stringCreate(8);

  SEXP R_rnames, R_mrpkms, R_mreads, R_srpkms, R_sreads, R_mprop, R_allmap, R_res;
  SEXP R_colnames, R_class;
  
  int nprot=0;
  int i=0;
  int j=0;
  int nrow=0;
  const char* fn=CHAR(STRING_ELT(filename, 0));
  ls = ls_createFromFile(strdup(fn));

  ls_nextLine(ls); // skip the first header line
  while(line = ls_nextLine(ls)) {
    it = textFieldtokP(line, "\t");
    if(arrayMax(it)<MAND_NCOL)
      error("Input file must contain no less than %d columns", MAND_NCOL);

    textAdd(rnames, textItem(it, 0));
    array(mrpkms, arrayMax(mrpkms), double)=atof(textItem(it, 1));
    array(mreads, arrayMax(mreads), int)=atoi(textItem(it, 2));
    array(srpkms, arrayMax(srpkms), double)=atof(textItem(it, 3));
    array(sreads, arrayMax(sreads), int)=atoi(textItem(it, 4));
    array(mprop, arrayMax(mprop), double)=atof(textItem(it, 5));
    array(allmap, arrayMax(allmap), int)=atoi(textItem(it, 6));

    add_ncol = max(arrayMax(it)-MAND_NCOL, add_ncol);
    textCreateClear(anno, arrayMax(it)-MAND_NCOL);
    for(i=MAND_NCOL; i<arrayMax(it);  ++i) {
      textAdd(anno, textItem(it, i));
    }
    array(annos, arrayMax(annos), Texta)=textClone(anno);
    nrow++;
  }

  R_rnames=PROTECT(allocVector(STRSXP, nrow)); nprot++;
  R_mrpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_mreads=PROTECT(allocVector(INTSXP, nrow)); nprot++;
  R_srpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_sreads=PROTECT(allocVector(INTSXP, nrow)); nprot++;
  R_mprop=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_allmap=PROTECT(allocVector(INTSXP, nrow)); nprot++;

  for(i=0; i<nrow; ++i) {
    SET_STRING_ELT(R_rnames, i, mkChar(textItem(rnames, i)));
    REAL(R_mrpkms)[i]=arru(mrpkms, i, double);
    INTEGER(R_mreads)[i]=arru(mreads, i, int);
    REAL(R_srpkms)[i]=arru(srpkms, i, double);
    INTEGER(R_sreads)[i]=arru(sreads, i, int);
    REAL(R_mprop)[i]=arru(mprop, i, double);
    INTEGER(R_allmap)[i]=arru(allmap, i, int);
  }

  R_res=PROTECT(allocVector(VECSXP, MAND_NCOL+add_ncol-1)); nprot++;
  SET_VECTOR_ELT(R_res, 0, R_mrpkms);
  SET_VECTOR_ELT(R_res, 1, R_mreads);
  SET_VECTOR_ELT(R_res, 2, R_srpkms);
  SET_VECTOR_ELT(R_res, 3, R_sreads);
  SET_VECTOR_ELT(R_res, 4, R_mprop);
  SET_VECTOR_ELT(R_res, 5, R_allmap);
  for(i=0; i<add_ncol; ++i) {
    SEXP R_anno=NULL;
    R_anno=PROTECT(allocVector(STRSXP, nrow));
    for(j=0; j<nrow; ++j) {
      anno=array(annos, j, Texta);
      if(arrayMax(anno)>i) {
         SET_STRING_ELT(R_anno, j, mkChar(textItem(anno, i)));
      } else {
         SET_STRING_ELT(R_anno, j, R_NaString);
      }
    }
    SET_VECTOR_ELT(R_res, i+MAND_NCOL-1, R_anno); // -1 because the first column is row name
    UNPROTECT(1);
  }

  PROTECT(R_colnames=allocVector(STRSXP, MAND_NCOL+add_ncol-1)); nprot++;
  PROTECT(R_class=allocVector(STRSXP, 1)); nprot++;
  SET_STRING_ELT(R_colnames, 0, mkChar("RPKM_MultiMap"));
  SET_STRING_ELT(R_colnames, 1, mkChar("ReadCount_MultiMap"));
  SET_STRING_ELT(R_colnames, 2, mkChar("RPKM_UniqMap"));
  SET_STRING_ELT(R_colnames, 3, mkChar("ReadCount_UniqMap"));
  SET_STRING_ELT(R_colnames, 4, mkChar("MultiProp"));
  SET_STRING_ELT(R_colnames, 5, mkChar("AllMappingReads"));
  for(i=0; i<add_ncol; ++i) {
    stringPrintf(str, "Annotation%d", i+1);
    SET_STRING_ELT(R_colnames, i+MAND_NCOL-1,
                   mkChar(string(str)));
  }
  SET_STRING_ELT(R_class, 0, mkChar("data.frame"));
  setAttrib(R_res, install("names"), R_colnames);
  setAttrib(R_res, install("row.names"), R_rnames);
  setAttrib(R_res, install("class"), R_class);

  for(i=0; i<nrow; ++i) {
    textDestroy(array(annos, i, Texta));
  }
  arrayDestroy(annos);
  arrayDestroy(rnames);
  arrayDestroy(mrpkms);
  arrayDestroy(mreads);
  arrayDestroy(srpkms);
  arrayDestroy(sreads);
  arrayDestroy(mprop);
  arrayDestroy(allmap);
  stringDestroy(str);

  ls_destroy(ls);
  UNPROTECT(nprot);
  return(R_res);
}
Exemplo n.º 3
0
/**
 * Initialize the FASTQ module using a file name.
 * @note Use "-" to denote stdin.
 * @post fastq_nextSequence(), fastq_readAllSequences() can be called.
 */
void fastq_initFromFile (char* fileName) 
{
  lsFastq = ls_createFromFile (fileName);
  ls_bufferSet (lsFastq,1);
}
Exemplo n.º 4
0
int main (int argc, char *argv[])
{
	Array kgXrefs;
	Stringa buffer;
	LineStream ls;
	int count=0;
	char* geneSymbolTranscript;
	char* descriptionTranscript;
	char* line;
	char* exonID = NULL;

	config *conf;

	if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL)
		return EXIT_FAILURE;

	buffer = stringCreate (100);

	stringPrintf (buffer,"%s/%s",
		      confp_get(conf, "ANNOTATION_DIR"),
		      confp_get(conf, "KNOWN_GENE_XREF_FILENAME"));
	kgXrefs = util_readKnownGeneXrefs (string (buffer));
	arraySort (kgXrefs,(ARRAYORDERF)sortKgXrefsByTranscriptName);
	stringDestroy (buffer);

	//  gfr_init ("-");
	 ls = ls_createFromFile("-");
  
	while (line = ls_nextLine(ls)) {
		char *lineP = hlr_strdup(line);
		WordIter w = wordIterCreate( line, "\t", 0);
		char *nameTranscript = wordNext( w );
		char *p = rindex(nameTranscript, '_');
		if (p) {
			exonID = hlr_strdup( p+1 );
			*p='\0';
		}
		transcript2geneSymbolAndGeneDescription(kgXrefs,
							nameTranscript,
							&geneSymbolTranscript,
							&descriptionTranscript);
		if (exonID) {
			printf("%s_%s\t%s\t%s\t%s", 
				nameTranscript, 
				exonID,
				geneSymbolTranscript, 
				exonID, 
				descriptionTranscript);
			hlr_free(exonID);
		} else {
			printf("%s\t%s\t1\t%s", 
				nameTranscript, 
			
	geneSymbolTranscript, 
				descriptionTranscript);
		}
		printf("%s\n", lineP+strlen(nameTranscript));
		count++;
		hlr_free(lineP);
		wordIterDestroy(w);
	}
	ls_destroy (ls);
	warn ("%s_numGfrEntries: %d",argv[0],count);
	confp_close(conf);

	return EXIT_SUCCESS;
}
Exemplo n.º 5
0
int main (int argc, char **argv)
{
  LineStream ls;
  Texta tokens = NULL;
  char *line;

  int hasQual = 0;
  int hasSeqs = 0;
  int start=1;
 
  ls = ls_createFromFile ("-");
  while (line = ls_nextLine (ls)) {
    // Put all the lines of the SAM header in comments
    if (line[0] == '@') {
      printf ("# %s\n", line);
      continue;
    }
    // Parse each SAM entry and store into array   
    tokens = textFieldtokP (line, "\t");
    if (arrayMax (tokens) < 11) {
      textDestroy( tokens );
      ls_destroy (ls);
      die ("Invalid SAM entry: %s", line);
    }
    SamEntry *currSamE = NULL;
    SamEntry *mateSamE = NULL;
    AllocVar(currSamE ); 

    int ret = generateSamEntry( tokens, currSamE, &hasSeqs, &hasQual );
    textDestroy( tokens );
    if ( ret==0 ) {
      if ( isPaired ( currSamE ) )
	ls_nextLine( ls ); // discarding next entry too (the mate)
      destroySamEntry( currSamE );
      freeMem( currSamE );
      continue;
    }   
    if ( isPaired( currSamE ) )   {
      int hasQual2, hasSeq2;
      AllocVar( mateSamE );
      Texta secondEnd = NULL;
      secondEnd = textFieldtok (ls_nextLine( ls ) , "\t");
      ret = generateSamEntry( secondEnd, mateSamE, &hasSeq2, &hasQual2 );
      textDestroy( secondEnd );
      if( ret == 0 ) {
	destroySamEntry( currSamE );
	destroySamEntry( mateSamE );
	freeMem( currSamE );
	freeMem( mateSamE );
	continue;
      }
      if (strcmp (currSamE->qname, mateSamE->qname) != 0) {
        die ("Please note that for paired-end data, sam2mrf requires the mate pairs to be on subsequent lines. You may want to sort the SAM file first.\nEx: sort -r file.sam | sam2mrf > file.mrf\n");
      }
    } 

    // Print MRF headers
    if( start ) {
      printf ("%s", MRF_COLUMN_NAME_BLOCKS);
      if (hasSeqs) printf("\t%s", MRF_COLUMN_NAME_SEQUENCE);
      if (hasQual) printf("\t%s", MRF_COLUMN_NAME_QUALITY_SCORES);
      printf ("\t%s\n", MRF_COLUMN_NAME_QUERY_ID);
      start=0;
    }
    
    // Print AlignmentBlocks   
    printMrfAlignBlocks (currSamE, R_FIRST);
    if( isPaired ( currSamE ) ) {  
      printf ("|");
      printMrfAlignBlocks (mateSamE, R_SECOND);
    }

    seq_init();
    // Print Sequence
    if (hasSeqs) {
      if (!currSamE->seq)
        die ("Entry missing sequence column\n");
      if( currSamE->flags & S_QUERY_STRAND )
	seq_reverseComplement( currSamE->seq, strlen(currSamE->seq));
      printf ("\t%s", currSamE->seq);
      if (mateSamE) {
        if (!mateSamE->seq)
          die ("Entry missing sequence column\n");
        if( mateSamE->flags & S_MATE_STRAND )
	  seq_reverseComplement( mateSamE->seq, strlen(mateSamE->seq));
	printf ("|%s", mateSamE->seq);
      }
    }
    // Print quality scores
    if (hasQual) {
      if (!currSamE->qual)
        die ("Entry missing quality scores column\n");
      printf ("\t%s", currSamE->qual);
      if (mateSamE) {
        if (!mateSamE->qual)
          die ("Entry missing quality scores column\n");
        printf ("|%s", mateSamE->qual);
      }
    }

    // Print queryID

    if (mateSamE) {
      printf ("\t%s|%s", currSamE->qname,"2"); // No need to print out both IDs, but need the pipe symbol for consistency
    }
    else {
      printf ("\t%s", currSamE->qname);
    }
    printf("\n");
    
    destroySamEntry( currSamE );
    freeMem( currSamE ); 
    if( isPaired( currSamE ) ) {
      destroySamEntry ( mateSamE );
      freeMem( mateSamE );
    }
  }
  // clean up
  ls_destroy (ls);
  return EXIT_SUCCESS;
}
Exemplo n.º 6
0
/**
 * Initialize the blastParser module from file.
 * @param[in] fileName File name, use "-" to denote stdin
 */
void blastParser_initFromFile (char* fileName)
{
    ls = ls_createFromFile (fileName);
    ls_bufferSet (ls,1);
}
Exemplo n.º 7
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  BLEntry *currBLE;
  BLEntry currQuery;
  FILE *fp;
  char *line;
  int count;
  int countRemoved;
  
  int index;
  WordIter w;
  Array blackList = arrayCreate(20, BLEntry);

  if (argc != 2) {
    usage ("%s <blackList.txt>",argv[0]);
  }  
  fp = fopen( argv[1], "r" );
  
  if( !fp )  die("Unable to open file: %s", argv[1]);
  // reading blacklist file
  LineStream ls = ls_createFromFile( argv[1] );
  while( line = ls_nextLine(ls) ) {
    w = wordIterCreate( line, "\t", 1);
    currBLE = arrayp( blackList, arrayMax(blackList), BLEntry);
    currBLE->gene1 = hlr_strdup ( wordNext(w) );
    currBLE->gene2 = hlr_strdup ( wordNext(w) );    
    wordIterDestroy(w);
  }
  fclose(fp);
  arraySort( blackList, (ARRAYORDERF) sortBlackListByName1);

  // beginFiltering
  count = 0;
  countRemoved = 0;
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()) { // reading the gfr
    // creating a new query to the black list
    currQuery.gene1 = currGE->geneSymbolTranscript1;
    currQuery.gene2 = currGE->geneSymbolTranscript2;
    // searching against read_1/read_2
    int res = arrayFind( blackList, &currQuery, 
			 &index,  (ARRAYORDERF) sortBlackListByName1);  
    
    if( !res ) { // not found, then searching against read_2/read_1
      currQuery.gene1 = currGE->geneSymbolTranscript2;
      currQuery.gene2 = currGE->geneSymbolTranscript1;
      
      res =  arrayFind( blackList, &currQuery, 
			&index, (ARRAYORDERF) sortBlackListByName1 );
      
      if( !res ) { // not found, write the instance to stdout, update the counts
	puts (gfr_writeGfrEntry (currGE));
	count++;	
      } else { // found: read2/read1
	countRemoved++;
      }	
    } else { //found: read1/read2
      countRemoved++;
    }
  }	           
  gfr_deInit ();
  arrayDestroy( blackList );
  warn ("%s_BlackListFilter: %s",argv[0], argv[1]);
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);
  return 0;
}
Exemplo n.º 8
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  BLEntry *currBLE;
  BLEntry currQuery;
  FILE *fp;
  char *line;
  int count;
  int countRemoved;
  
  int index;
  WordIter w;
  Array blackList = arrayCreate(20, BLEntry);
  config *Conf;

  if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) {
    die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH"));
    return EXIT_FAILURE;
  }
  if( confp_get( Conf, "ANNOTATION_DIR")==NULL ) {
    die("%s:\tCannot find ANNOTATION_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  if( confp_get( Conf, "BLACKLIST_FILENAME")==NULL ) {
    die("%s:\tCannot find BLACKLIST_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  Stringa buffer=stringCreate( 100 );
  stringPrintf( buffer, "%s/%s", confp_get( Conf, "ANNOTATION_DIR"), confp_get( Conf, "BLACKLIST_FILENAME") );
  /*  fp = fopen( string( buffer ), "r" );
  if( !fp )  die("Unable to open file: %s", string(buffer));
  stringDestroy( buffer );
  */ 
// reading blacklist file
  LineStream ls = ls_createFromFile( string(buffer) );
  while( line = ls_nextLine(ls) ) {
    w = wordIterCreate( line, "\t", 1);
    currBLE = arrayp( blackList, arrayMax(blackList), BLEntry);
    currBLE->gene1 = hlr_strdup ( wordNext(w) );
    currBLE->gene2 = hlr_strdup ( wordNext(w) );    
    wordIterDestroy(w);
  }
  //fclose(fp);
  ls_destroy( ls );
  stringDestroy( buffer );
  arraySort( blackList, (ARRAYORDERF) sortBlackListByName1);

  // beginFiltering
  count = 0;
  countRemoved = 0;
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()) { // reading the gfr
    if( currGE->geneSymbolTranscript1 == NULL ) {
      die("Gene symbols are not present in the GFR file. Please run gfrAddInfo before gfrBlackListFilter.");
      return EXIT_FAILURE;
    }
	
    // creating a new query to the black list
    currQuery.gene1 = currGE->geneSymbolTranscript1;
    currQuery.gene2 = currGE->geneSymbolTranscript2;
    if( strEqual( currQuery.gene1 , currQuery.gene2 ) ) {
	countRemoved++;
	continue;
      }
    // searching against read_1/read_2
    int res = arrayFind( blackList, &currQuery, 
			 &index,  (ARRAYORDERF) sortBlackListByName1);  
    
    if( !res ) { // not found, then searching against read_2/read_1
      currQuery.gene1 = currGE->geneSymbolTranscript2;
      currQuery.gene2 = currGE->geneSymbolTranscript1;
      
      res =  arrayFind( blackList, &currQuery, 
			&index, (ARRAYORDERF) sortBlackListByName1 );
      
      if( !res ) { // not found, write the instance to stdout, update the counts
	puts (gfr_writeGfrEntry (currGE));
	count++;	
      } else { // found: read2/read1
	countRemoved++;
      }	
    } else { //found: read1/read2
      countRemoved++;
    }
  }	           
  gfr_deInit ();
  arrayDestroy( blackList );
  warn ("%s_BlackListFilter: %s",argv[0], confp_get( Conf, "BLACKLIST_FILENAME"));
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);
  confp_close( Conf);
  return 0;
}
Exemplo n.º 9
0
void incl_getExonHlightFile (FILE *fp, Array regions, char *sdata_dir)
{
  LineStream src;
  FILE *out;
  char *line;
  Texta entry;
  int i, astart, aend;

  Stringa buffer = stringCreate (50);

  stringPrintf (buffer, "%s/tmp/exons.hlight_s.txt", sdata_dir);
  if (!(out = fopen (string (buffer), "w"))) {
	fprintf (stderr, "Cannot open exons.hlight_s.txt\n");
	return;
  }

  SRegion_t *tmp;
  tmp = arrayp (regions, 0, SRegion_t);

  if (tmp->chromosome == 0) {
	fprintf (fp, "file = %s/exons.hlight.txt\n", sdata_dir);
  }
  else {
	for (i = 0; i < arrayMax (regions); i++) {
	  tmp = arrayp (regions, i, SRegion_t);

	  if (tmp->chromosome == 23) {
		stringPrintf (buffer, "%s/X/exons.hlight.txt", sdata_dir);
	  }
	  else if (tmp->chromosome == 24) {
		stringPrintf (buffer, "%s/Y/exons.hlight.txt", sdata_dir);
	  }
	  else {
		stringPrintf (buffer, "%s/%i/exons.hlight.txt", sdata_dir, tmp->chromosome);
	  }

	  if ((src = ls_createFromFile (string (buffer))) == NULL) {
		fprintf (stderr, "Cannot open exons.hlight.txt\n");
		return;
	  }

	  while ((line = ls_nextLine (src)) != NULL) {
		entry = textFieldtokP (line, " ");

		astart = atoi (textItem (entry, 1));
		aend   = atoi (textItem (entry, 2));

		if ((astart >= tmp->start && astart <= tmp->end) ||
			(aend >= tmp->start && aend <= tmp->end)) {
		  fprintf (out, "%s\n", line);
		}
		textDestroy (entry);
	  }
	}

	fprintf (fp, "file = %s/tmp/exons.hlight_s.txt\n", sdata_dir);
  }

  stringDestroy (buffer);
  ls_destroy (src);
  fclose (out);
}
Exemplo n.º 10
0
/**
 * Initialize the elandParser module.
 * @param[in] fileName File name, use "-" to denote stdin
 */
void elandParser_init (char *fileName)
{
  ls = ls_createFromFile (fileName);
}