コード例 #1
0
ファイル: cgi.c プロジェクト: Accio/ribios
SEXP r_cgiParameters() {
  int i;
  char *name;
  Stringa value=stringCreate(16);
  Texta keys=textCreate(8);
  Texta values=textCreate(8);
  SEXP r_keys, r_values;

  cgiGetInit();

  while(name = cgiGetNext(value)) {
    textAdd(keys, name);
    textAdd(values, string(value));
  }
  
  int n=arrayMax(keys);
  PROTECT(r_keys=allocVector(STRSXP, n));
  PROTECT(r_values=allocVector(STRSXP, n));
  for(i=0; i<n; ++i) {
    SET_STRING_ELT(r_keys, i, mkChar(textItem(keys,i)));
    SET_STRING_ELT(r_values, i, mkChar(textItem(values,i)));
  }
  setNames(r_values, r_keys);

  stringDestroy(value);
  textDestroy(keys);
  textDestroy(values);
  UNPROTECT(2);
  return(r_values);
}
コード例 #2
0
ファイル: cgi.c プロジェクト: Accio/ribios
SEXP r_cgiParam(SEXP r_param, SEXP ignore_case, SEXP r_default) {
  if(r_param == R_NilValue) return(R_NilValue);

  char* name;
  Stringa value=stringCreate(16);

  char *param=cStr(r_param);
  char *str=NULL;

  SEXP res;
  int (*fPtr)(char*, char*);
  fPtr=cBool(ignore_case) ? &myStrCaseEqual : &myStrEqual;

  cgiGetInit();

  while(name = cgiGetNext(value)) {
    if((*fPtr)(name, param)) {
      str=hlr_strdup(string(value));
      break;
    }
  }
  
  stringDestroy(value);

  if(str) {
    return mkString(str);
  } else {
    return r_default;
  }
}
コード例 #3
0
int main (int argc, char *argv[])
{ 
	LineStream ls;
	char *line;
	char *pos;
	Stringa buffer;

	if (argc != 2) {
		usage ("%s <file.intraOffsets>");
	}

	TH1 *his = new TH1D ("","Intra-read distribution",1000,0,1000);
	TCanvas *canv = new TCanvas("","canvas",1200,400);
	ls = ls_createFromFile (argv[1]);
	while (line = ls_nextLine (ls)) {
		his->Fill (atoi (line));
	}
	ls_destroy (ls);
	his->Draw();
	his->GetXaxis()->SetLabelSize (0.04);
	his->GetYaxis()->SetLabelSize (0.04);
	buffer = stringCreate (100);
	pos = strchr (argv[1],'.');
	if (pos == NULL) {
		die ("Expected <file.intraOffsets>: %s",argv[1]);
	}
	*pos = '\0';
	stringPrintf (buffer,"%s_intraDistribution.jpg",argv[1]);
	canv->Print (string (buffer),"jpg");
	stringDestroy (buffer);
	return 0;
}
コード例 #4
0
ファイル: gfrPCRFilter.c プロジェクト: asboner/fusionseq
int main (int argc, char *argv[])
{
	GfrEntry *currGE;
	int count;
	int countRemoved;
	int i;

	if (argc != 3) {
		usage ("%s <offsetCutoff> <minNumUniqueReads>",argv[0]);
	}
	count = 0;
	countRemoved = 0;

	int offsetCutOff = atoi (argv[1]);
	int minNumUniqueReads = atoi (argv[2]);

	gfr_init ("-");
	puts (gfr_writeHeader ());
	while (currGE = gfr_nextEntry ()) {
		Array starts = arrayCreate( 100, int);
		for (i = 0; i < arrayMax( currGE->interReads ); i++) {
			int currStart = arrp(currGE->interReads, i, GfrInterRead)->readStart1 + arrp(currGE->interReads, i, GfrInterRead)->readStart2;
			array(starts, arrayMax(starts), int) = currStart; 
		}
		arraySort( starts, (ARRAYORDERF) arrayIntcmp );
		arrayUniq( starts, NULL, (ARRAYORDERF) arrayIntcmp ) ;
		int numUniqeOffsets = arrayMax( starts );
		arrayDestroy( starts );

	if (arrayMax( currGE->readsTranscript1 ) != arrayMax( currGE->readsTranscript2 ) )
		die( "The two ends have a different number of reads");
	Texta reads = textCreate(arrayMax(currGE->readsTranscript1));
	for (i = 0; i < arrayMax(currGE->readsTranscript1); i++) {
		Stringa strA = stringCreate( strlen(textItem( currGE->readsTranscript1, i) ) * 2 + 1);
		stringAppendf( strA, textItem( currGE->readsTranscript1,i));
		stringAppendf( strA, textItem( currGE->readsTranscript2,i)); 
		textAdd( reads, string(strA));
		stringDestroy( strA );
	}
	textUniqKeepOrder( reads );
	int numRemaining = arrayMax( reads );
	textDestroy ( reads );

	if (numRemaining <= minNumUniqueReads || numUniqeOffsets <= offsetCutOff) {
		countRemoved++;
		continue;
	} 
	puts (gfr_writeGfrEntry (currGE));
	count++;
	}
	gfr_deInit ();
	warn("%s_PCRFilter: offset=%d minNumUniqueReads=%d",
	     argv[0],offsetCutOff, minNumUniqueReads);
	warn("%s_numRemoved: %d",argv[0],countRemoved);
	warn("%s_numGfrEntries: %d",argv[0],count);
	return 0;
}
コード例 #5
0
ファイル: bp2alignment.c プロジェクト: arsenijae/fusionseq
static char* getBreakPointSequence (char *tileCoordinate1, char *tileCoordinate2)
{
	Stringa buffer;
	Stringa targetsFile;
	FILE *fp;
	Array targetSeqs;
	int i;
	Seq *currSeq;
	static Stringa sequence = NULL;

	buffer = stringCreate (100);
	targetsFile = stringCreate (100);
	stringPrintf (targetsFile,"targets_%d.txt",getpid ());
	if (!(fp = fopen (string (targetsFile),"w")) ){
		die ("Unable to open target file: %s",string (targetsFile));
	}
	fprintf (fp,"%s\n%s",tileCoordinate1,tileCoordinate2);
	fclose (fp);

	stringPrintf (buffer,"%s %s/%s stdout -noMask -seqList=%s",
		      confp_get(Conf, "BLAT_TWO_BIT_TO_FA"),
		      confp_get(Conf, "BLAT_DATA_DIR"),
		      confp_get(Conf, "BLAT_TWO_BIT_DATA_FILENAME"),
		      string (targetsFile));
	fasta_initFromPipe (string (buffer));
	targetSeqs = fasta_readAllSequences (0);
	fasta_deInit ();
	if (arrayMax (targetSeqs) != 2) {
		die ("Expected only two target sequences");
	} 
	stringCreateClear (sequence,100);
	for (i = 0; i < arrayMax (targetSeqs); i++) {
		currSeq = arrp (targetSeqs,i,Seq);
		stringAppendf (sequence,"%s",currSeq->sequence);
		hlr_free (currSeq->name);
		hlr_free (currSeq->sequence);
	}
	arrayDestroy (targetSeqs);
	stringPrintf (buffer,"rm -rf %s",string (targetsFile));
	hlr_system (string (buffer),0);
	stringDestroy (targetsFile);
	stringDestroy (buffer);
	return string (sequence);
}
コード例 #6
0
ファイル: seqViz_cgi.c プロジェクト: arsenijae/fusionseq
/** 
 * writes circos configuration file
 */
int write_circosConf (char* prefix,
		              Locus locus,
		              Array regions,
		              Chrdata_t *chromosomes,
		              SVCfg_t *settings)
{
  float rpos = 0.99;
  FILE *fp;
  Stringa buffer = stringCreate(50);
  int scale = getScale (regions);

  stringPrintf (buffer, "%s/test/circos_%s_%s_%d_%d.conf",
		confp_get(Conf, "WEB_CIRCOS_DIR"), 
		prefix, 
		locus.chromosome, 
		locus.start, 
		locus.end);

  if (!(fp = fopen (string (buffer), "w"))) {
    die ("Unable to open target file");
    return -1;
  };

  printf ("<h2>%i</h2>", scale);

  // write conf file
  conf_printHeader (fp, 
		    confp_get(Conf, "WEB_CIRCOS_DIR"), 
		    confp_get(Conf, "WEB_DATA_DIR"), 
		    confp_get(Conf, "WEB_SDATA_DIR"), 
		    chromosomes, 
		    prefix, 
		    locus, 
		    scale);
  conf_printUnits (fp, regions, chromosomes, scale);

  if (scale <= 10000) {
    conf_printDataTracks (fp, 
		    	  prefix, 
			  locus, 
			  confp_get(Conf, "WEB_SDATA_DIR"), 
			  confp_get(Conf, "WEB_DATA_DIR"), 
			  &rpos, 
			  regions, 
			  chromosomes, 
			  settings);
  }

  conf_printLinks (fp, confp_get(Conf, "WEB_DATA_DIR"), &rpos, prefix, locus, settings->readlim);
  conf_printFooter (fp);

  fclose (fp);
  stringDestroy (buffer);
  return 0;
}
コード例 #7
0
ファイル: symtable.c プロジェクト: mystal/pj-compiler
void stDestroy(symtable *st)
{
    if (listSize(st->blockList) != 1)
        fprintf(stderr, "Symbol table block leak!\n");
    block *b = (block *) listRemoveBack(st->blockList);
    stringDestroy(b->name);
    bstDestroy(b->symbols, bstDelSymbol); //Cleanup memory for symbols in bst
    free(b);
    listDestroy(st->blockList);
    free(st);
    st = NULL;
}
コード例 #8
0
ファイル: rdbu.c プロジェクト: Accio/ribios
void rdbu_initLoginInfo (char *filename) {
  /**
     Looks in the following places for user/password\@database
     info needed for logging into database in this order
     1. a file named .login_db in the user's home directory
     2. a file named .login_db in the current directory
     3. a file named 'filename' (optional input parameter)
     4. arguments 'dbuser', 'dbpassword', 'dbname' present  the command line
     Syntax of the files:
     - only the first line is read
     - this line must have the form user/password\@dbname
     - each of these fields can be '-'; in this case the value
       of this field is not changed. E.g. if $HOME/.login_db contains
       -/-\@testdb
      and 'filename' contains
      scott/-@-
      and on the command line there is an argument
      -dbpassword tiger
      then username will be scott, password will be tiger and
      database will be testdb
      optional: if arg_init() was called before then the command line will be
                considered; else the command line is ignored
      Postcondition: rdbu_user() etc can be called
  */
  Stringa fn;
  char *home = getenv ("HOME");
  if (user != NULL)
    die ("rdbu_getLoginInfo() twice");
  user = stringCreate(10);
  password = stringCreate(10);
  database = stringCreate(10);
  if (home != NULL) {
    fn = stringCreate (100);
    stringPrintf (fn,"%s/.login_db",home);
    readUserInfoFile (string (fn));
    stringDestroy (fn);
  }
  readUserInfoFile (".login_db");
  if (filename != NULL && *filename != '\0')
    readUserInfoFile (filename);
  if (arg_isInit ()) {
    if (arg_present ("dbuser"))
      stringCpy (user,arg_get ("dbuser"));
    if (arg_present ("dbpassword"))
      stringCpy (password,arg_get ("dbpassword"));
    if (arg_present ("dbname"))
      stringCpy (database,arg_get ("dbname"));
  }
}
コード例 #9
0
ファイル: mrf2gff.c プロジェクト: sbonerlab/rseqtools
int main (int argc, char *argv[])
{
  int i,j,groupNumber;
  MrfEntry *currEntry;
  GffEntry *currGffEntry,*nextGffEntry;
  Array gffEntries;
  FILE *fp;
  Stringa buffer;
  short int paired;

  if (argc != 2) {
    usage ("%s <prefix>",argv[0]);
  }
  buffer = stringCreate (1000);
  groupNumber = 0;
  mrf_init ("-");
  gffEntries = arrayCreate (100000,GffEntry);
  while (currEntry = mrf_nextEntry ()) {
    processRead (gffEntries, currEntry, &groupNumber);
  }
  mrf_deInit ();

  arraySort (gffEntries,(ARRAYORDERF)sortGffEntriesByTargetNameAndGroupNumber);
  i = 0; 
  while (i < arrayMax (gffEntries)) {
    currGffEntry = arrp (gffEntries,i,GffEntry);
    stringPrintf (buffer,"%s_%s.gff",argv[1],currGffEntry->targetName);
    fp = fopen (string (buffer),"w");
    if (fp == NULL) {
      die ("Unable to open file: %s",string (buffer));
    }
    fprintf (fp,"browser hide all\n");
    fprintf (fp,"track name=\"%s_%s\" visibility=2\n",argv[1],currGffEntry->targetName);
    fprintf (fp,"%s\n",currGffEntry->line);
    j = i + 1;
    while (j < arrayMax (gffEntries)) {
      nextGffEntry = arrp (gffEntries,j,GffEntry);
      if (!strEqual (currGffEntry->targetName,nextGffEntry->targetName)) {
        break;
      } 
      fprintf (fp,"%s\n",nextGffEntry->line);
      j++;
    }
    i = j;
    fclose (fp);
  }
  stringDestroy (buffer);
  return 0;
}
コード例 #10
0
ファイル: sam2mrf.c プロジェクト: gersteinlab/RSEQtools
int generateSamEntry ( Texta tokens, SamEntry *currSamE, 
		       int* hasSeqs, 
		       int* hasQual)
{
  int j;
  currSamE->qname = strdup (textItem (tokens, 0));
  currSamE->flags = atoi (textItem (tokens, 1));
  currSamE->rname = strdup (textItem (tokens, 2));
  currSamE->pos   = atoi (textItem (tokens, 3));
  currSamE->mapq  = atoi (textItem (tokens, 4));
  currSamE->cigar = strdup (textItem (tokens, 5));
  currSamE->mrnm  = strdup (textItem (tokens, 6));
  currSamE->mpos  = atoi (textItem (tokens, 7));
  currSamE->isize = atoi (textItem (tokens, 8));
  currSamE->seq   = NULL;
  currSamE->qual  = NULL;
  currSamE->tags  = NULL;
  
  // Skip if unmapped or fails platform/vendor checks
  if (currSamE->flags & S_QUERY_UNMAPPED ||
      currSamE->flags & S_MATE_UNMAPPED ||
      currSamE->flags & S_FAILS_CHECKS)
    return 0;
  
  // Get tokens
  if (arrayMax (tokens) > 11) {
    Stringa tags = stringCreate (10);
    for (j = 11; j < arrayMax (tokens); j++) {
      if (j > 11)
	stringAppendf (tags, "\t");
      stringAppendf (tags, "%s", textItem (tokens, j));
    }
    currSamE->tags = strdup (string(tags));
    stringDestroy (tags);
  }
  
  if (strcmp (textItem (tokens, 9),  "*") != 0) {
    *hasSeqs = 1;
    currSamE->seq = strdup (textItem (tokens, 9));
  }
  if (strcmp (textItem (tokens, 10), "*") != 0) {
    *hasQual = 1;
    currSamE->qual = strdup (textItem (tokens, 10));
  }
  return 1;
}
コード例 #11
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  Array kgTreeFams;
  Stringa buffer;
  int count;
  int countRemoved;

  config *conf;

  if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL)
    return EXIT_FAILURE;

  buffer = stringCreate (100);
  stringPrintf (buffer,"%s/%s",
                confp_get(conf, "ANNOTATION_DIR"), 
		confp_get(conf, "KNOWN_GENE_TREE_FAM_FILENAME"));
  kgTreeFams = util_readKnownGeneTreeFams (string (buffer));
  arraySort (kgTreeFams,(ARRAYORDERF)sortKgTreeFamsByTranscriptName);
  stringDestroy (buffer);

  count = 0;
  countRemoved = 0;
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()){
    if (isHomologous (kgTreeFams,currGE->nameTranscript1,currGE->nameTranscript2)) {
      countRemoved++;
      continue;
    }
    puts (gfr_writeGfrEntry (currGE));
    count++;
  }
  gfr_deInit ();
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);

  confp_close(conf);

  return EXIT_SUCCESS;
}
コード例 #12
0
ファイル: gfr2bed.c プロジェクト: asboner/fusionseq
int main (int argc, char *argv[])
{
	GfrEntry *currGE;
	GfrInterRead *currGIR;
	int i;
	Stringa buffer;
	FILE *fp1,*fp2;
	int count;

	count = 0;
	buffer = stringCreate (100);
	gfr_init ("-");
	puts (gfr_writeHeader ());
	while (currGE = gfr_nextEntry ()) {
		stringPrintf (buffer,"%s_1.bed",currGE->id);
		fp1 = fopen (string (buffer),"w");
		stringPrintf (buffer,"%s_2.bed",currGE->id);
		fp2 = fopen (string (buffer),"w");
		if (fp1 == NULL || fp2 == NULL) {
			die ("Unable to open BED files");
		}
		fprintf (fp1,"browser full knownGene\n");
		fprintf (fp1,"track name=\"Inter paird-ends: %s_1\" visibility=2\n",currGE->id);
		fprintf (fp2,"browser full knownGene\n");
		fprintf (fp2,"track name=\"Inter paird-ends: %s_2\" visibility=2\n",currGE->id);
		for (i = 0; i < arrayMax (currGE->interReads); i++) {
			currGIR = arrp (currGE->interReads,i,GfrInterRead);
			fprintf (fp1,"%s\t%d\t%d\n",currGE->chromosomeTranscript1,currGIR->readStart1,currGIR->readEnd1);
			fprintf (fp2,"%s\t%d\t%d\n",currGE->chromosomeTranscript2,currGIR->readStart2,currGIR->readEnd2);
		}
		fclose (fp1);
		fclose (fp2);
		puts (gfr_writeGfrEntry (currGE));
		count++;
	}
	gfr_deInit ();
	stringDestroy (buffer);
	warn ("%s_numGfrEntries: %d",argv[0],count);
	return 0;
}
コード例 #13
0
ファイル: sqvCircos.c プロジェクト: arsenijae/fusionseq
void conf_printUnits (FILE *fp, Array regions, Chrdata_t *chromosomes, int scale)
{
  int i;
  SRegion_t *tmp;
  char *chrname;
  int sflag = 1;
  Stringa chrshow = stringCreate (50);

  tmp = arrayp (regions, 0, SRegion_t);
  if (tmp->chromosome == 0) {
    fprintf (fp, "chromosomes_display_default = yes\n");
    fprintf (fp, "chromosomes_units = 1000000\n");
  }
  else {
    stringPrintf (chrshow, "chromosomes = ");

    fprintf (fp, "chromosomes_units = %i\n", scale);
    fprintf (fp, "chromosomes_display_default = no\n");

    for (i = 0; i < arrayMax (regions); i++) {
      tmp = arrayp (regions, i, SRegion_t);
      if (chromosomes[tmp->chromosome].show && tmp->show) {
        chrname = getHchrname (tmp->chromosome);

        if (sflag == 0) {
          stringAppendf (chrshow, ";");
        }
        stringAppendf (chrshow, "%s:%i-%i", chrname, tmp->start/scale, tmp->end/scale);
        sflag = 0;

        free (chrname);
      }
    }
    fprintf (fp, "%s\n", string (chrshow));
  }

  stringDestroy (chrshow);
}
コード例 #14
0
ファイル: symtable.c プロジェクト: mystal/pj-compiler
symtable *stCreate()
{
    symtable *st = (symtable *) malloc(sizeof(symtable));
    st->blockList = listCreate();
    //Add block0
    block *b = (block *) malloc(sizeof(block));
    b->name = stringCreate();
    stringAppendCharArray(b->name, "block0", 6*sizeof(char));
    b->symbols = bstCreate(bstCompareSymbol);
    b->nextLoc = 0;
    listAddBack(st->blockList, b);
    //Initialize block0 with PJ's builtin procedures and input/output files
    for (unsigned int i = 0; i < builtin_num; i++)
    {
        const char *builtinName = pjbuiltinString(i);
        string *name = stringCreate();
        stringAppendCharArray(name, builtinName, strlen(builtinName));
        symbol *sym = symbolCreate(name);
        stringDestroy(name);
        symbolSetBuiltin(sym, i);
        stAddSymbol(st, sym);
    }
    return st;
}
コード例 #15
0
ファイル: read_biokit_exprs.c プロジェクト: Accio/ribios
SEXP c_read_biokit_exprs (SEXP filename) {
  LineStream ls;
  char* line;
  const int MAND_NCOL=7; // the first column is the row name, and column 2-7 are mandatory
  int add_ncol=0;
  Texta it;
  Texta rnames=textCreate(128);
  Array mrpkms=arrayCreate(128, double);
  Array mreads=arrayCreate(128, int);
  Array srpkms=arrayCreate(128, double);
  Array sreads=arrayCreate(128, int);
  Array mprop=arrayCreate(128, double);
  Array allmap = arrayCreate(128, int);
  Array annos=arrayCreate(128, Texta);
  Texta anno=NULL; // must have a NULL assigned; otherwise textCreateClear leads to memory error
  Stringa str=stringCreate(8);

  SEXP R_rnames, R_mrpkms, R_mreads, R_srpkms, R_sreads, R_mprop, R_allmap, R_res;
  SEXP R_colnames, R_class;
  
  int nprot=0;
  int i=0;
  int j=0;
  int nrow=0;
  const char* fn=CHAR(STRING_ELT(filename, 0));
  ls = ls_createFromFile(strdup(fn));

  ls_nextLine(ls); // skip the first header line
  while(line = ls_nextLine(ls)) {
    it = textFieldtokP(line, "\t");
    if(arrayMax(it)<MAND_NCOL)
      error("Input file must contain no less than %d columns", MAND_NCOL);

    textAdd(rnames, textItem(it, 0));
    array(mrpkms, arrayMax(mrpkms), double)=atof(textItem(it, 1));
    array(mreads, arrayMax(mreads), int)=atoi(textItem(it, 2));
    array(srpkms, arrayMax(srpkms), double)=atof(textItem(it, 3));
    array(sreads, arrayMax(sreads), int)=atoi(textItem(it, 4));
    array(mprop, arrayMax(mprop), double)=atof(textItem(it, 5));
    array(allmap, arrayMax(allmap), int)=atoi(textItem(it, 6));

    add_ncol = max(arrayMax(it)-MAND_NCOL, add_ncol);
    textCreateClear(anno, arrayMax(it)-MAND_NCOL);
    for(i=MAND_NCOL; i<arrayMax(it);  ++i) {
      textAdd(anno, textItem(it, i));
    }
    array(annos, arrayMax(annos), Texta)=textClone(anno);
    nrow++;
  }

  R_rnames=PROTECT(allocVector(STRSXP, nrow)); nprot++;
  R_mrpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_mreads=PROTECT(allocVector(INTSXP, nrow)); nprot++;
  R_srpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_sreads=PROTECT(allocVector(INTSXP, nrow)); nprot++;
  R_mprop=PROTECT(allocVector(REALSXP, nrow)); nprot++;
  R_allmap=PROTECT(allocVector(INTSXP, nrow)); nprot++;

  for(i=0; i<nrow; ++i) {
    SET_STRING_ELT(R_rnames, i, mkChar(textItem(rnames, i)));
    REAL(R_mrpkms)[i]=arru(mrpkms, i, double);
    INTEGER(R_mreads)[i]=arru(mreads, i, int);
    REAL(R_srpkms)[i]=arru(srpkms, i, double);
    INTEGER(R_sreads)[i]=arru(sreads, i, int);
    REAL(R_mprop)[i]=arru(mprop, i, double);
    INTEGER(R_allmap)[i]=arru(allmap, i, int);
  }

  R_res=PROTECT(allocVector(VECSXP, MAND_NCOL+add_ncol-1)); nprot++;
  SET_VECTOR_ELT(R_res, 0, R_mrpkms);
  SET_VECTOR_ELT(R_res, 1, R_mreads);
  SET_VECTOR_ELT(R_res, 2, R_srpkms);
  SET_VECTOR_ELT(R_res, 3, R_sreads);
  SET_VECTOR_ELT(R_res, 4, R_mprop);
  SET_VECTOR_ELT(R_res, 5, R_allmap);
  for(i=0; i<add_ncol; ++i) {
    SEXP R_anno=NULL;
    R_anno=PROTECT(allocVector(STRSXP, nrow));
    for(j=0; j<nrow; ++j) {
      anno=array(annos, j, Texta);
      if(arrayMax(anno)>i) {
         SET_STRING_ELT(R_anno, j, mkChar(textItem(anno, i)));
      } else {
         SET_STRING_ELT(R_anno, j, R_NaString);
      }
    }
    SET_VECTOR_ELT(R_res, i+MAND_NCOL-1, R_anno); // -1 because the first column is row name
    UNPROTECT(1);
  }

  PROTECT(R_colnames=allocVector(STRSXP, MAND_NCOL+add_ncol-1)); nprot++;
  PROTECT(R_class=allocVector(STRSXP, 1)); nprot++;
  SET_STRING_ELT(R_colnames, 0, mkChar("RPKM_MultiMap"));
  SET_STRING_ELT(R_colnames, 1, mkChar("ReadCount_MultiMap"));
  SET_STRING_ELT(R_colnames, 2, mkChar("RPKM_UniqMap"));
  SET_STRING_ELT(R_colnames, 3, mkChar("ReadCount_UniqMap"));
  SET_STRING_ELT(R_colnames, 4, mkChar("MultiProp"));
  SET_STRING_ELT(R_colnames, 5, mkChar("AllMappingReads"));
  for(i=0; i<add_ncol; ++i) {
    stringPrintf(str, "Annotation%d", i+1);
    SET_STRING_ELT(R_colnames, i+MAND_NCOL-1,
                   mkChar(string(str)));
  }
  SET_STRING_ELT(R_class, 0, mkChar("data.frame"));
  setAttrib(R_res, install("names"), R_colnames);
  setAttrib(R_res, install("row.names"), R_rnames);
  setAttrib(R_res, install("class"), R_class);

  for(i=0; i<nrow; ++i) {
    textDestroy(array(annos, i, Texta));
  }
  arrayDestroy(annos);
  arrayDestroy(rnames);
  arrayDestroy(mrpkms);
  arrayDestroy(mreads);
  arrayDestroy(srpkms);
  arrayDestroy(sreads);
  arrayDestroy(mprop);
  arrayDestroy(allmap);
  stringDestroy(str);

  ls_destroy(ls);
  UNPROTECT(nprot);
  return(R_res);
}
コード例 #16
0
int main (int argc, char *argv[])
{
	Array kgXrefs;
	Stringa buffer;
	LineStream ls;
	int count=0;
	char* geneSymbolTranscript;
	char* descriptionTranscript;
	char* line;
	char* exonID = NULL;

	config *conf;

	if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL)
		return EXIT_FAILURE;

	buffer = stringCreate (100);

	stringPrintf (buffer,"%s/%s",
		      confp_get(conf, "ANNOTATION_DIR"),
		      confp_get(conf, "KNOWN_GENE_XREF_FILENAME"));
	kgXrefs = util_readKnownGeneXrefs (string (buffer));
	arraySort (kgXrefs,(ARRAYORDERF)sortKgXrefsByTranscriptName);
	stringDestroy (buffer);

	//  gfr_init ("-");
	 ls = ls_createFromFile("-");
  
	while (line = ls_nextLine(ls)) {
		char *lineP = hlr_strdup(line);
		WordIter w = wordIterCreate( line, "\t", 0);
		char *nameTranscript = wordNext( w );
		char *p = rindex(nameTranscript, '_');
		if (p) {
			exonID = hlr_strdup( p+1 );
			*p='\0';
		}
		transcript2geneSymbolAndGeneDescription(kgXrefs,
							nameTranscript,
							&geneSymbolTranscript,
							&descriptionTranscript);
		if (exonID) {
			printf("%s_%s\t%s\t%s\t%s", 
				nameTranscript, 
				exonID,
				geneSymbolTranscript, 
				exonID, 
				descriptionTranscript);
			hlr_free(exonID);
		} else {
			printf("%s\t%s\t1\t%s", 
				nameTranscript, 
			
	geneSymbolTranscript, 
				descriptionTranscript);
		}
		printf("%s\n", lineP+strlen(nameTranscript));
		count++;
		hlr_free(lineP);
		wordIterDestroy(w);
	}
	ls_destroy (ls);
	warn ("%s_numGfrEntries: %d",argv[0],count);
	confp_close(conf);

	return EXIT_SUCCESS;
}
コード例 #17
0
ファイル: sqvCircos.c プロジェクト: arsenijae/fusionseq
void incl_getExonHlightFile (FILE *fp, Array regions, char *sdata_dir)
{
  LineStream src;
  FILE *out;
  char *line;
  Texta entry;
  int i, astart, aend;

  Stringa buffer = stringCreate (50);

  stringPrintf (buffer, "%s/tmp/exons.hlight_s.txt", sdata_dir);
  if (!(out = fopen (string (buffer), "w"))) {
	fprintf (stderr, "Cannot open exons.hlight_s.txt\n");
	return;
  }

  SRegion_t *tmp;
  tmp = arrayp (regions, 0, SRegion_t);

  if (tmp->chromosome == 0) {
	fprintf (fp, "file = %s/exons.hlight.txt\n", sdata_dir);
  }
  else {
	for (i = 0; i < arrayMax (regions); i++) {
	  tmp = arrayp (regions, i, SRegion_t);

	  if (tmp->chromosome == 23) {
		stringPrintf (buffer, "%s/X/exons.hlight.txt", sdata_dir);
	  }
	  else if (tmp->chromosome == 24) {
		stringPrintf (buffer, "%s/Y/exons.hlight.txt", sdata_dir);
	  }
	  else {
		stringPrintf (buffer, "%s/%i/exons.hlight.txt", sdata_dir, tmp->chromosome);
	  }

	  if ((src = ls_createFromFile (string (buffer))) == NULL) {
		fprintf (stderr, "Cannot open exons.hlight.txt\n");
		return;
	  }

	  while ((line = ls_nextLine (src)) != NULL) {
		entry = textFieldtokP (line, " ");

		astart = atoi (textItem (entry, 1));
		aend   = atoi (textItem (entry, 2));

		if ((astart >= tmp->start && astart <= tmp->end) ||
			(aend >= tmp->start && aend <= tmp->end)) {
		  fprintf (out, "%s\n", line);
		}
		textDestroy (entry);
	  }
	}

	fprintf (fp, "file = %s/tmp/exons.hlight_s.txt\n", sdata_dir);
  }

  stringDestroy (buffer);
  ls_destroy (src);
  fclose (out);
}
コード例 #18
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  int count;
  int countRemoved;
  int mitochondrialCount; 
  unsigned int minReadSize;
  int  i;
  Stringa cmd;
  BlatQuery *blQ=NULL;
  config *conf = NULL; /**< Pointer to configuration file .fusionseqrc  */

  if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) {
    die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH"));
    return EXIT_FAILURE;
  }
  if( confp_get( conf,"MAX_OVERLAP_ALLOWED")==NULL ) {
    die("%s:\tCannot find MAX_OVERLAP_ALLOWED in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  if( confp_get( conf,"MAX_FRACTION_HOMOLOGOUS")==NULL ) {
    die("%s:\tCannot find MAX_FRACTION_HOMOLOGOUS in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
 if( confp_get( conf, "MITOCHONDRIAL_DIR")==NULL ) {
    die("%s:\tCannot find MITOCHONDRIAL_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  if( confp_get( conf, "MITOCHONDRIAL_FILENAME")==NULL ) {
    die("%s:\tCannot find MITOCHONDRIAL_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
if( confp_get( conf, "TMP_DIR")==NULL ) {
    die("%s:\tCannot find TMP_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
 if( confp_get( conf, "BLAT_GFSERVER")==NULL ) {
    die("%s:\tCannot find BLAT_GFSERVER in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
 if( confp_get( conf, "BLAT_GFCLIENT")==NULL ) {
    die("%s:\tCannot find BLAT_GFCLIENT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
if( confp_get( conf, "BLAT_GFSERVER_HOST")==NULL ) {
    die("%s:\tCannot find BLAT_GFSERVER_HOST in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }if( confp_get( conf, "BLAT_GFSERVER_PORT")==NULL ) {
    die("%s:\tCannot find BLAT_GFSERVER_PORT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }

  count = 0;
  countRemoved = 0;
  
  cmd = stringCreate (100);
  // initializing the gfServers
  stringPrintf( cmd, "%s status %s %d &> /dev/null", confp_get( conf, "BLAT_GFSERVER"),  confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2);
  int ret = hlr_system( string(cmd), 1 );
   if( ret != 0 ) { // not initialized
    stringPrintf( cmd , "%s -repMatch=100000 -tileSize=12 -canStop -log=%s/gfServer_mitochondrial.log start %s %d %s/%s  &", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "TMP_DIR"),  confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2, confp_get( conf, "MITOCHONDRIAL_DIR"), confp_get( conf,"MITOCHONDRIAL_FILENAME"));
    hlr_system( string( cmd ), 0 );
    long int startTime = time(0);
    stringPrintf( cmd , "%s status %s %d &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2);
    while( hlr_system( string(cmd), 1) && (time(0)-startTime)<600 ) ;
    if( hlr_system( string(cmd), 1 ) != 0 )  {
      die("gfServer for %s/%s not initialized: %s %s %s", confp_get( conf, "MITOCHONDRIAL_DIR"), confp_get( conf, "MITOCHONDRIAL_FILENAME"), confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); 
      return EXIT_FAILURE;
    }
  } 

 
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()) {
    if (strEqual(currGE->chromosomeTranscript1, "chrM") || 
	strEqual(currGE->chromosomeTranscript2, "chrM")) {
      countRemoved++;
      continue;
    } else {
      mitochondrialCount = 0;
      minReadSize=1000;
      writeFasta( currGE, &minReadSize, confp_get( conf, "TMP_DIR") ); // in util.c
      stringPrintf(cmd, "cd %s;%s %s %d / -t=dna -q=dna -minScore=%d -out=psl %s_reads.fa %s.mito.psl &>/dev/null", confp_get( conf, "TMP_DIR"), confp_get( conf, "BLAT_GFCLIENT"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2, minReadSize - 5 > 20 ? minReadSize - 5 : 20 , currGE->id, currGE->id);
      int attempts=0;
      ret = hlr_system( string(cmd), 1 );
      while( hlr_system( string(cmd), 1 ) && attempts<5000 ) attempts++;
      if( attempts == 5000 ) {
	die("Cannot map the reads %s", string( cmd ));
	return EXIT_FAILURE;
      }

      // reading the results of blast from File
      stringPrintf(cmd,  "%s/%s.mito.psl", confp_get( conf, "TMP_DIR"), currGE->id);
      blatParser_initFromFile( string(cmd) );
      while( blQ = blatParser_nextQuery() ) {
	//warn("iter %d\tquery %s", iter, blQ->qName );iter++; 
	int nucleotideOverlap = getNucleotideOverlap ( blQ );
	if (nucleotideOverlap > (((double) minReadSize) * strtod(confp_get( conf, "MAX_OVERLAP_ALLOWED"), NULL))) {
	  char* value = strchr( blQ->qName,'/' );
	  if( value ) *value = '\0'; else die("Not a valid index in the blat query name:\t%s", blQ->qName );
	  int indexOfInter = atoi( blQ->qName ); // the following three lines should removed the read if writing the GFR entry
	  GfrInterRead *currGIR = arrp( currGE->interReads, indexOfInter, GfrInterRead );
	  currGIR->flag = 1;
	  mitochondrialCount++;
	} 
      }
      blatParser_deInit();
      if ( ( (double) mitochondrialCount / (double) ( arrayMax(currGE->readsTranscript1) + arrayMax(currGE->readsTranscript2) ) ) <= strtod(confp_get( conf, "MAX_FRACTION_HOMOLOGOUS"), NULL)) {   
	if( mitochondrialCount > 0 ) updateStats( currGE );
	// writing the gfrEntry
	puts (gfr_writeGfrEntry (currGE));
	count++;
      } else {
	countRemoved++;
      }
      // removing temporary files
      stringPrintf (cmd,"rm -rf %s/%s_reads.fa %s/%s.mito.psl", confp_get( conf, "TMP_DIR"),  currGE->id, confp_get( conf, "TMP_DIR"),  currGE->id );
      hlr_system( string(cmd) , 1);      
    } 
    
  }
  gfr_deInit ();
 
  stringDestroy( cmd );
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);
  confp_close(conf);
  return 0;
}
コード例 #19
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  int i,j,k,l, h,index;
  Stringa buffer,cmd,fnSequencesToAlign;
  FILE *fp;
  FILE *fp1;
  FILE *fp2;
  FILE *freads1;
  FILE *freads2;
  Array gfrEntries;
  BowtieQuery *currBQ,testBQ;
  BowtieEntry *currBE;
  Texta seqNames;
  int readSize1, readSize2, minReadSize;
  Array bowtieQueries;
  char transcriptNumber;
  int isHomologous,homologousCount;
  int count;
  int countRemoved;
  unsigned short int tooMany;
  BlatQuery *blQ;

  config *conf;

  if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) {
    die("%s:\tCannot find .fusionseqrc", argv[0]);
    return EXIT_FAILURE;
  } 
  if ( (confp_get( conf, "BLAT_TWO_BIT_TO_FA")) == NULL) {
    die("%s:\tCannot find BLAT_TWO_BIT_TO_FA in the configuration file: %s", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  } 
  if ( (confp_get( conf,"BLAT_DATA_DIR")) == NULL) {
    die("%s:\tCannot find BLAT_DATA_DIR in the configuration file: %sc", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  } 
 if( confp_get( conf, "TMP_DIR")==NULL ) {
    die("%s:\tCannot find TMP_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  if( confp_get( conf, "BLAT_GFSERVER")==NULL ) {
    die("%s:\tCannot find BLAT_GFSERVER in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
 if( confp_get( conf, "BLAT_GFCLIENT")==NULL ) {
    die("%s:\tCannot find BLAT_GFCLIENT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
if( confp_get( conf, "BLAT_GFSERVER_HOST")==NULL ) {
    die("%s:\tCannot find BLAT_GFSERVER_HOST in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }if( confp_get( conf, "BLAT_GFSERVER_PORT")==NULL ) {
    die("%s:\tCannot find BLAT_GFSERVER_PORT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
 if( confp_get( conf, "PSEUDOGENE_DIR")==NULL ) {
   die("%s:\tCannot find PSEUDOGENE_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
   return EXIT_FAILURE;
 }
 if( confp_get( conf, "PSEUDOGENE_FILENAME")==NULL ) {
   die("%s:\tCannot find PSEUDOGENE_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
   return EXIT_FAILURE;
 }
 
  cmd = stringCreate (100);
  // initializing the gfServers
  stringPrintf( cmd, "%s status %s %s &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT") );
  int ret = hlr_system( string(cmd), 1 );
  if( ret != 0 ) { // not initialized
    stringPrintf( cmd , "%s -repMatch=100000 -tileSize=12 -canStop -log=%s/gfServer_genome.log start %s %s %s/%s  &", confp_get( conf, "BLAT_GFSERVER"), confp_get(conf, "TMP_DIR"),confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"), confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"));
    hlr_system( string( cmd ), 0 );
    long int startTime = time(0);
    stringPrintf( cmd , "%s status %s %s &2> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"));
    while( hlr_system( string(cmd), 1) && (time(0)-startTime)<600 ) ;
    if( hlr_system( string(cmd), 1 ) != 0 )  {
      die("gfServer for %s/%s not initialized: %s %s %s", confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); 
      return EXIT_FAILURE;
    }
  } 
  // end initialization

  
  gfr_init ("-");
  gfrEntries =  gfr_parse ();
  if (arrayMax (gfrEntries) == 0){
    puts (gfr_writeHeader ());
    gfr_deInit ();
    return 0;
  }
  seqNames = textCreate (10000); 
  buffer = stringCreate (100);
  fnSequencesToAlign = stringCreate (100);
  count = 0;
  countRemoved = 0;

  stringPrintf( buffer, "%s/%s", confp_get( conf, "PSEUDOGENE_DIR"), confp_get( conf, "PSEUDOGENE_FILENAME") );
  intervalFind_addIntervalsToSearchSpace (string(buffer),0);

  puts (gfr_writeHeader ());
 
  for (i = 0; i < arrayMax (gfrEntries); i++) {
    currGE = arrp (gfrEntries,i,GfrEntry);
    homologousCount = 0;
    minReadSize=10000;
    // creating two fasta files with the two genes
    
    stringPrintf( cmd, "%s %s/%s -seq=%s -start=%d -end=%d %s/%s_transcript1.fa", confp_get(conf, "BLAT_TWO_BIT_TO_FA") , confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), currGE->chromosomeTranscript1, currGE->startTranscript1, currGE->endTranscript1, confp_get(conf, "TMP_DIR"), currGE->id);
    hlr_system( string(cmd) , 0);   
    stringPrintf( cmd, "%s %s/%s -seq=%s -start=%d -end=%d %s/%s_transcript2.fa", confp_get(conf, "BLAT_TWO_BIT_TO_FA"),  confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), currGE->chromosomeTranscript2, currGE->startTranscript2, currGE->endTranscript2, confp_get(conf, "TMP_DIR"), currGE->id);
    hlr_system( string(cmd) , 0);   
    
    Stringa fa1 = stringCreate( 100 ); 
    Stringa fa2 = stringCreate( 100 );
    
    // creating the two fasta files with the reads
    stringPrintf( fa1, "%s/%s_reads1.fa", confp_get(conf, "TMP_DIR"), currGE->id);
    if (!(freads1 = fopen ( string(fa1) ,"w"))) {
      die ("Unable to open file: %s",string (fa1));
    }   
    // writing the reads of the first end into file
    
    for (l = 0; l < arrayMax (currGE->readsTranscript1); l++) {
      char* currRead1 = hlr_strdup( textItem (currGE->readsTranscript1,l)); // read1
      readSize1 = strlen( currRead1 );
      if( readSize1 == 0 ) die("Read size cannot be zero: read1[ %s ]", currRead1);
      if( readSize1 < minReadSize ) minReadSize = readSize1;
      fprintf( freads1, ">%d\n%s\n", l, currRead1 );
      hlr_free( currRead1 );
    }
    fclose( freads1 );
    
    stringPrintf( fa2, "%s/%s_reads2.fa", confp_get(conf, "TMP_DIR"), currGE->id);
    if (!(freads2 = fopen ( string(fa2) ,"w"))) {
      die ("Unable to open file: %s",string (fa2));
    } 
    // writing the reads of the second end into file
    for (l = 0; l < arrayMax (currGE->readsTranscript2); l++) {
      char* currRead2 = hlr_strdup( textItem (currGE->readsTranscript2,l)); // read2
      readSize2 = strlen( currRead2 );
      if( readSize2 == 0 ) die("Read size cannot be zero: read2[ %s ]", currRead2);
      if( readSize2 < minReadSize ) minReadSize = readSize2;
      fprintf( freads2, ">%d\n%s\n", l, currRead2 );
      hlr_free( currRead2 );
    }
    fclose( freads2 );      
    
    // collapse the reads 2  ## requires the FASTX package
    stringPrintf( cmd, "%s -i %s/%s_reads2.fa -o %s/%s_reads2.collapsed.fa", confp_get(conf, "FASTX_COLLAPSER"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id  );
    hlr_system (string (cmd),0);
    
    //blat of reads2 against the first transcript
    stringPrintf( cmd, "%s -t=dna -out=psl -fine -tileSize=15 %s/%s_transcript1.fa %s/%s_reads2.collapsed.fa stdout",confp_get(conf, "BLAT_BLAT"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id );
    
    // reading the results of blast from Pipe
    blatParser_initFromPipe( string(cmd) );
    while( blQ = blatParser_nextQuery() ) {
      int nucleotideOverlap = getNucleotideOverlap ( blQ );
      if ( nucleotideOverlap > ( ((double)readSize2)* atof(confp_get(conf,"MAX_OVERLAP_ALLOWED"))) ) {
	char* value = strchr(blQ->qName,'-');
	homologousCount+=atoi(value+1);
      }
    }
    blatParser_deInit();
    
    // collapse the reads 1 ## requires the FASTX package on the path
    stringPrintf( cmd, "%s -i %s/%s_reads1.fa -o %s/%s_reads1.collapsed.fa", confp_get(conf, "FASTX_COLLAPSER"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id  );
    hlr_system (string (cmd),0);
    
    //blat of reads1 against the second transcript
    stringPrintf( cmd, "%s -t=dna -out=psl -fine -tileSize=15 %s/%s_transcript2.fa %s/%s_reads1.collapsed.fa stdout",confp_get(conf, "BLAT_BLAT"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id  );
    
    blatParser_initFromPipe( string(cmd) );
    while( blQ = blatParser_nextQuery() ) {		
      int nucleotideOverlap = getNucleotideOverlap ( blQ );
      if ( nucleotideOverlap > ( ((double)readSize1)* atof(confp_get(conf,"MAX_OVERLAP_ALLOWED"))) ) {
	char* value = strchr(blQ->qName,'-');
	homologousCount+=atoi(value+1);
      }
    }
    blatParser_deInit();
    stringPrintf (cmd,"cd %s;rm -rf %s_reads?.fa %s_reads?.collapsed.fa %s_transcript?.fa", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id);
    hlr_system( string(cmd) , 0);      
    if (((double)homologousCount / (double)arrayMax(currGE->readsTranscript1)) <= atof(confp_get(conf, "MAX_FRACTION_HOMOLOGOUS")) ) { 
      homologousCount = 0;
      // there is no homology between the two genes, but what about the rest of the genome
      writeFasta( currGE, &minReadSize,  confp_get(conf, "TMP_DIR") );
      stringPrintf(cmd, "cd %s; %s %s %s / -t=dna -q=dna -minScore=%d -out=psl %s_reads.fa %s.smallhomology.psl &>/dev/null", confp_get(conf, "TMP_DIR"), confp_get( conf, "BLAT_GFCLIENT"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"), minReadSize - (int)(0.1 * minReadSize) > 20 ? minReadSize - (int) (0.1 * minReadSize) : 20 ,  currGE->id,  currGE->id);
      int attempts=0;
      ret = hlr_system( string(cmd), 1 );
      while( hlr_system( string(cmd), 1 ) && attempts<5000 ) attempts++;
      if( attempts == 5000 ) {
	die("Cannot map the reads %s", string( cmd ));
	return EXIT_FAILURE;
      }
      // reading the results of blast from File
      stringPrintf(cmd,  "%s/%s.smallhomology.psl", confp_get( conf, "TMP_DIR"), currGE->id);
      blatParser_initFromFile( string(cmd) );
      tooMany = 1;
      while( blQ = blatParser_nextQuery() ) {
	tooMany = 0;
	checkPseudogeneOverlap( blQ );
	if( arrayMax( blQ->entries ) > 1 ) {
	  homologousCount+= arrayMax( blQ->entries ) - 1;
	  char* value = strchr( blQ->qName,'/' );
	  if( value ) *value = '\0'; else die("Not a valid index in the blat query name:\t%s", blQ->qName );
	  int indexOfInter = atoi( blQ->qName ); // the following three lines should removed the read if writing the GFR entry
	  GfrInterRead *currGIR = arrp( currGE->interReads, indexOfInter, GfrInterRead );
	  currGIR->flag = 1;
	}
      }
      blatParser_deInit();
      if (  tooMany == 1 || ( ( (double) homologousCount / (double) ( arrayMax(currGE->readsTranscript1) + arrayMax(currGE->readsTranscript2) ) )  > atof(confp_get(conf, "MAX_FRACTION_HOMOLOGOUS")) ) ) {
	countRemoved++;
	stringPrintf (cmd,"cd %s; rm -rf %s_reads*.fa %s_reads?.collapsed.fa %s_transcript?.fa %s.smallhomology.psl", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id,currGE->id);
	hlr_system( string(cmd), 1 );
	continue;
      }
      // writing the gfrEntry, if everthing else didn't stop 
      if( homologousCount > 0 ) updateStats( currGE );
      puts (gfr_writeGfrEntry (currGE));
      count++;
      // removing temporary files
      stringPrintf (cmd,"cd %s;rm -rf %s_reads*.fa %s_reads?.collapsed.fa %s_transcript?.fa  %s.smallhomology.psl", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id,currGE->id);
      hlr_system( string(cmd) , 1);      
    } else {
      countRemoved++;
    }
    
  }

  gfr_deInit ();

  stringDestroy (fnSequencesToAlign);
  stringDestroy (cmd);
  stringDestroy (buffer);
  warn ("%s_numRemoved: %d",argv[0],countRemoved);  
  warn ("%s_numGfrEntries: %d",argv[0],count);

  confp_close(conf);

  return EXIT_SUCCESS;
}
コード例 #20
0
ファイル: geneFusions_cgi.c プロジェクト: asboner/fusionseq
static void generateOutput (char* prefix, char* typeSelected, int minNum)
{
  GfrEntry *currGE;
  Stringa buffer;
  char *pos;

  puts ("<html>");
  puts ("<head>");
  puts ("<title>Results - Gene Fusions</title>");
  html_printGenericStyleSheet (12);
  puts ("</head>");
  puts ("<body>");
  if (prefix[0] == '\0') {
    die ("Invalid prefix");
  }
  printf ("<h1>Results - %s</h1><br><br><br>",prefix);

  buffer = stringCreate(50);
  //Chromosome expression, if present
  LineStream ls;
  char* chrSignal=NULL;  
  stringPrintf(buffer, "ls -1 %s/BGRS/%s_chr*.bgr.gz 2> /dev/null", 
	       confp_get(Conf, "WEB_DATA_DIR"), 
	       prefix);
  ls = ls_createFromPipe(string(buffer));
  int countCol = 0;
  puts ("Expression signal: &nbsp;");
  fflush(stdout);
  while( chrSignal = ls_nextLine(ls)) {
        
	char* chrTmp = stringBetween( prefix, ".bgr.gz", chrSignal );
	chrTmp++;      
	printf ("[<a href=%s&hgt.customText=%s/BGRS/%s_%s.bgr.gz target='blank'>%s</a>]&nbsp;",
		htmlLinker_generateLinkToGenomeBrowserAtUCSC("hg18","vertebrate","human", chrTmp, 
			confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"), 
			50000000 + confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")),
			confp_get(Conf, "WEB_DATA_LINK"), 
			prefix, 
			chrTmp, 
			chrTmp); 
	if (countCol > 10) {
	  puts( "<BR>" );
	  countCol=0;
	}
	countCol++;
  }
  if( countCol==0) puts( "No data available yet" );
  ls_destroy(ls);
  puts ("<br><br>");
  puts ("For a definition of SPER, DASPER and RESPER see <a href=http://rnaseq.gersteinlab.org/fusionseq/>FusionSeq</a>");
  puts ("<br><br>");
  puts ("<br><table border=0 width=100% align=center cellpadding=10>");
  puts ("<tr align=left>");
  puts ("<th>SPER</th>");
  puts ("<th>DASPER</th>");
  puts ("<th>RESPER</th>");
  puts ("<th>Number of inter paired-end reads</th>");
  puts ("<th>Type</th>");
  puts ("<th>Genomic coordinates</th>");
  puts ("<th>Gene symbol</th>");
  puts ("<th>Description</th>");
  puts ("<th>Genomic coordinates</th>");
  puts ("<th>Gene symbol</th>");
  puts ("<th>Description</th>");
  puts ("<th></th>");
  puts ("</tr>");
  fflush(stdout);

  stringPrintf (buffer,"%s/%s.gfr", confp_get(Conf, "WEB_DATA_DIR"), prefix);
  gfr_init (string (buffer));
  int countElements = 0;
  while (currGE = gfr_nextEntry ()) {
    if (currGE->numInter < minNum) {
      continue;
    }
    if (strEqual (typeSelected,"all") || strEqual (currGE->fusionType,typeSelected) || 
	( strEqual(currGE->fusionType,"cis") && strEqual( typeSelected,"same") ) ||
	( strEqual(currGE->fusionType,"read-through") && strEqual( typeSelected,"same") ) ) {
      if (pos = strchr (currGE->descriptionTranscript1,'|')) {
        *pos = '\0';
      }
      if (pos = strchr (currGE->descriptionTranscript2,'|')) {
        *pos = '\0';
      }
      puts ("<tr>");
      printf ("<td align=left>%1.3f</td>\n",currGE->SPER);
      printf ("<td align=left>%1.3f</td>\n",currGE->DASPER);
      printf ("<td align=left>%1.3f</td>\n",currGE->RESPER);
      printf ("<td align=left>%d</td>\n",currGE->numInter);
      printf ("<td align=left>%s</td>\n",currGE->fusionType);
      printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n",
              htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",
			currGE->chromosomeTranscript1,
			currGE->startTranscript1 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")),
			currGE->endTranscript1 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))),
     	      currGE->chromosomeTranscript1,currGE->startTranscript1,currGE->endTranscript1);
      printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript1));
      printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript1);
      printf ("<td align=left><a href=%s target=blank>%s:%d-%d</a></td>\n",
              htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",
		     	currGE->chromosomeTranscript2,
			currGE->startTranscript2 - atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION")),
			currGE->endTranscript2 + atoi(confp_get(Conf, "UCSC_GENOME_BROWSER_FLANKING_REGION"))),
              currGE->chromosomeTranscript2,currGE->startTranscript2,currGE->endTranscript2);
      printf ("<td align=left>%s</td>\n",processString (currGE->geneSymbolTranscript2));
      printf ("<td align=left>%s</td>\n",currGE->descriptionTranscript2);
      printf ("<td align=left><a href=%s/showDetails_cgi?%s+%s>Details</a></td>\n", confp_get(Conf, "WEB_URL_CGI"), prefix,currGE->id);
      puts ("</tr>");
      countElements++;
    }
  }
  gfr_deInit ();
  stringDestroy (buffer);
  puts ("</table><br><br>");
  if( countElements == 0) puts("No fusion candidates can be found satisfying all specified criteria.");
  puts ("</body>");
  puts ("</html>");
  fflush (stdout);
}
コード例 #21
0
int main (int argc, char *argv[])
{
  GfrEntry *currGE;
  BLEntry *currBLE;
  BLEntry currQuery;
  FILE *fp;
  char *line;
  int count;
  int countRemoved;
  
  int index;
  WordIter w;
  Array blackList = arrayCreate(20, BLEntry);
  config *Conf;

  if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) {
    die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH"));
    return EXIT_FAILURE;
  }
  if( confp_get( Conf, "ANNOTATION_DIR")==NULL ) {
    die("%s:\tCannot find ANNOTATION_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  if( confp_get( Conf, "BLACKLIST_FILENAME")==NULL ) {
    die("%s:\tCannot find BLACKLIST_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") );
    return EXIT_FAILURE;
  }
  Stringa buffer=stringCreate( 100 );
  stringPrintf( buffer, "%s/%s", confp_get( Conf, "ANNOTATION_DIR"), confp_get( Conf, "BLACKLIST_FILENAME") );
  /*  fp = fopen( string( buffer ), "r" );
  if( !fp )  die("Unable to open file: %s", string(buffer));
  stringDestroy( buffer );
  */ 
// reading blacklist file
  LineStream ls = ls_createFromFile( string(buffer) );
  while( line = ls_nextLine(ls) ) {
    w = wordIterCreate( line, "\t", 1);
    currBLE = arrayp( blackList, arrayMax(blackList), BLEntry);
    currBLE->gene1 = hlr_strdup ( wordNext(w) );
    currBLE->gene2 = hlr_strdup ( wordNext(w) );    
    wordIterDestroy(w);
  }
  //fclose(fp);
  ls_destroy( ls );
  stringDestroy( buffer );
  arraySort( blackList, (ARRAYORDERF) sortBlackListByName1);

  // beginFiltering
  count = 0;
  countRemoved = 0;
  gfr_init ("-");
  puts (gfr_writeHeader ());
  while (currGE = gfr_nextEntry ()) { // reading the gfr
    if( currGE->geneSymbolTranscript1 == NULL ) {
      die("Gene symbols are not present in the GFR file. Please run gfrAddInfo before gfrBlackListFilter.");
      return EXIT_FAILURE;
    }
	
    // creating a new query to the black list
    currQuery.gene1 = currGE->geneSymbolTranscript1;
    currQuery.gene2 = currGE->geneSymbolTranscript2;
    if( strEqual( currQuery.gene1 , currQuery.gene2 ) ) {
	countRemoved++;
	continue;
      }
    // searching against read_1/read_2
    int res = arrayFind( blackList, &currQuery, 
			 &index,  (ARRAYORDERF) sortBlackListByName1);  
    
    if( !res ) { // not found, then searching against read_2/read_1
      currQuery.gene1 = currGE->geneSymbolTranscript2;
      currQuery.gene2 = currGE->geneSymbolTranscript1;
      
      res =  arrayFind( blackList, &currQuery, 
			&index, (ARRAYORDERF) sortBlackListByName1 );
      
      if( !res ) { // not found, write the instance to stdout, update the counts
	puts (gfr_writeGfrEntry (currGE));
	count++;	
      } else { // found: read2/read1
	countRemoved++;
      }	
    } else { //found: read1/read2
      countRemoved++;
    }
  }	           
  gfr_deInit ();
  arrayDestroy( blackList );
  warn ("%s_BlackListFilter: %s",argv[0], confp_get( Conf, "BLACKLIST_FILENAME"));
  warn ("%s_numRemoved: %d",argv[0],countRemoved);
  warn ("%s_numGfrEntries: %d",argv[0],count);
  confp_close( Conf);
  return 0;
}