Пример #1
0
int main (int argc, char *argv[])
{
  int i,j;
  Array intervals;
  Interval *currInterval;
  SubInterval *currSubInterval;

  if (argc != 2) {
    usage ("%s <trackName>",argv[0]);
  }
  intervalFind_addIntervalsToSearchSpace ("-",0);
  intervals = intervalFind_getAllIntervals ();
  puts ("browser hide all");
  printf ("track name=\"%s\" visibility=2\n",argv[1]);
  for (i = 0; i < arrayMax (intervals); i++) {
    currInterval = arrp (intervals,i,Interval);
    for (j = 0; j < arrayMax (currInterval->subIntervals); j++) {
      currSubInterval = arrp (currInterval->subIntervals,j,SubInterval);
       printf ("%s\tannotation\texon\t%d\t%d\t.\t%c\t.\tgroup%d\n",
               currInterval->chromosome,
	       currSubInterval->start + 1,
	       currSubInterval->end,
	       currInterval->strand,
	       i);
    }
  }
  return 0;
}
Пример #2
0
static int containedLocus ( MrfEntry* query, Locus* target )
{
  int overlap=0;
  
  if( query->isPairedEnd ) { // do this only if paired end
    int i;
    for( i=0; i<arrayMax( (query->read1).blocks ); i++) {
      MrfBlock* qBlock = arrp( (query->read1).blocks, i, MrfBlock);
      if( !strcmp( qBlock->targetName, target->chromosome) ) {
        if( positiveRangeIntersection( qBlock->targetStart, qBlock->targetEnd, target->start, target->end ) > 0 ) {
          overlap = 1; // found
          i = arrayMax( (query->read1).blocks ); // found, then stop
        }
      }
    }
    if( overlap == 0 ) { // not found in read 1, let's check read 2
      for( i=0; i<arrayMax( (query->read2).blocks ); i++) {
        MrfBlock* qBlock = arrp( (query->read2).blocks, i, MrfBlock);
        if( !strcmp( qBlock->targetName, target->chromosome) ) {
          if( positiveRangeIntersection( qBlock->targetStart, qBlock->targetEnd, target->start, target->end ) > 0 ) {
            overlap = 1; // found
            i = arrayMax( (query->read2).blocks ); // found, then stop
          }
        }
      }
    }
  }
  
  return overlap;
}
Пример #3
0
static void obtainPairCounts (GfrEntry *currGE)
{
	GfrPairCount *currPC;
	GfrInterRead *currGIR,*nextGIR;
	int i,j;

	currGE->pairCounts = arrayCreate (100,GfrPairCount);
	arraySort (currGE->interReads,(ARRAYORDERF)sortGfrInterReads);
	i = 0;
	while (i < arrayMax (currGE->interReads)) {
		currGIR = arrp (currGE->interReads,i,GfrInterRead);
		currPC = arrayp (currGE->pairCounts,arrayMax (currGE->pairCounts),GfrPairCount);
		currPC->number1 = currGIR->number1;
		currPC->number2 = currGIR->number2;
		currPC->pairType = currGIR->pairType;
		currPC->count = 1;
		j = i + 1;
		while (j < arrayMax (currGE->interReads)) {
			nextGIR = arrp (currGE->interReads,j,GfrInterRead);
			if (currGIR->pairType == nextGIR->pairType && currGIR->number1==nextGIR->number1 && currGIR->number2==nextGIR->number2) {
				currPC->count++;
			}
			else {
				break;
			}
			j++;
		}
		i = j;
	}
}
Пример #4
0
static int isContained (MrfRead *currRead)
{
  MrfBlock* currBlock;
  Array annotatedTranscripts;
  Interval *currTranscript;
  SubInterval *currExon;
  int overlap;
  int i,j,k;

  for (i = 0; i < arrayMax (currRead->blocks); i++) {
    currBlock = arrp (currRead->blocks,i,MrfBlock);
    annotatedTranscripts = intervalFind_getOverlappingIntervals (currBlock->targetName,currBlock->targetStart,currBlock->targetEnd);
    for (j = 0; j < arrayMax (annotatedTranscripts); j++) {
      currTranscript = arru (annotatedTranscripts,j,Interval*);
      for (k = 0; k < arrayMax (currTranscript->subIntervals); k++) {
        currExon = arrp (currTranscript->subIntervals,k,SubInterval);
        overlap = rangeIntersection (currBlock->targetStart,currBlock->targetEnd,currExon->start,currExon->end);
        if (overlap > 0) {
          return 1;
        }
      } 
    }
  }
  return 0;
}
Пример #5
0
int main (int argc, char *argv[]) 
{
  if( argc != 3 ) {
    usage( "%s <errors|noerrors> <gaps|nogaps>", argv[0]);
    return -1;
  }
  MrfEntry *currEntry;
  MrfBlock *currBlock;
  int i, error, errorCode, numErrors;
  int gaps = 0;
  if( strEqual( "gaps", argv[2] ) )
    gaps = 1;
  seq_init();
  mrf_init ("-");
  printf( "%s\n", mrf_writeHeader());
  numErrors = 0;
  while (currEntry = mrf_nextEntry ()) {
    error = 0;
    for( i=0; i<arrayMax(currEntry->read1.blocks); i++) {   
      currBlock=arrp( currEntry->read1.blocks, i, MrfBlock);
      if( i==0 ) {
	errorCode = processBlock ( currBlock, NULL, gaps );
      } else {
	errorCode = processBlock ( currBlock, arrp( currEntry->read1.blocks, i-1, MrfBlock ), gaps );
      }
      if(  errorCode != 0 ) {
	warn( "Error code (read1): %d\t%s:%c:%d:%d:%d:%d", errorCode, currBlock->targetName, currBlock->strand, currBlock->targetStart, 
	      currBlock->targetEnd, currBlock->queryStart, currBlock->queryEnd);	
	error++;
      }
    }
    if (currEntry->isPairedEnd & error == 0) {
      for( i=0; i<arrayMax(currEntry->read2.blocks); i++) {
	currBlock=arrp( currEntry->read2.blocks, i, MrfBlock);
	if( i==0 ) {
	  errorCode = processBlock ( currBlock, NULL , gaps ); 
	} else {
	  errorCode = processBlock (currBlock, arrp( currEntry->read2.blocks, i-1, MrfBlock) , gaps);
	}
	if( errorCode !=0 ) {
	  warn( "Error code (read2): %d\t%s:%c:%d:%d:%d:%d", errorCode, currBlock->targetName, currBlock->strand, currBlock->targetStart, 
		currBlock->targetEnd, currBlock->queryStart, currBlock->queryEnd);
	  error++;
	}
      }
    }
    if( error > 0 && strEqual(argv[1], "errors")) {
      printf( "%s\n", mrf_writeEntry( currEntry ));
      error=0;
      numErrors++;
    } 
    if( error == 0 && strEqual(argv[1], "noerrors")) 
      printf( "%s\n", mrf_writeEntry( currEntry ));
  }
  mrf_deInit ();
  warn("%s: done", argv[0]);
  return 0;

}
Пример #6
0
/*
 * Adds a contig to a scaffold array.
 * Gap size, type and evidence refer to the gap between this and the
 * "previous" contig - ie the last in the scaffold. More complex
 * scaffold manipulations will be handled elsewhere.
 *
 * Set these fields to 0 if you do not know them.
 *
 * Returns 0 on success
 *        -1 on failure
 */
int scaffold_add(GapIO *io, tg_rec scaffold, tg_rec contig,
		 int gap_size, int gap_type, int evidence) {
    scaffold_t *f;
    contig_t *c;
    scaffold_member_t *m;
    int i;

    /* Check if this contig is in a scaffold, if so remove now */
    c = cache_search(io, GT_Contig, contig);
    if (c->scaffold)
	scaffold_remove(io, c->scaffold, contig);

    if (!(f = cache_search(io, GT_Scaffold, scaffold)))
	return -1;

    /* Check if it already exists */
    for (i = 0; i < ArrayMax(f->contig); i++) {
	m = arrp(scaffold_member_t, f->contig, i);
	if (m->rec == contig)
	    return 0;
    }

    /* Append */
    f = cache_rw(io, f);
    m = ArrayRef(f->contig, ArrayMax(f->contig)); // extend
    m->rec = contig;
    m->gap_size = ArrayMax(f->contig) > 1 ? gap_size : 0;
    m->gap_type = gap_type;
    m->evidence = evidence;

    /* Update the contig record too */
    c = cache_search(io, GT_Contig, contig);
    c = cache_rw(io, c);
    c->scaffold = scaffold;

#if 0
    /* Add a scaffold link to the contig graph too */
    if (ArrayMax(f->contig) >= 2) {
	m = arrp(scaffold_member_t, f->contig, ArrayMax(f->contig)-2);
	contig_link_t lnk;

	lnk.rec1 = contig;
	lnk.rec2 = m->rec;
	/* Best guess */
	lnk.pos1 = 0; lnk.end1 = 1;
	lnk.pos2 = 0; lnk.end2 = 0;
	lnk.orientation = 0;
	lnk.size = 100;
	lnk.type = CLINK_TYPE_SCAFFOLD;
	lnk.score = 0;

	contig_add_link(io, &lnk);
    }
#endif

    return 0;
}
Пример #7
0
int main (int argc, char *argv[])
{
	GfrEntry *currGE;
	int count;
	int countRemoved;
	int i;

	if (argc != 3) {
		usage ("%s <offsetCutoff> <minNumUniqueReads>",argv[0]);
	}
	count = 0;
	countRemoved = 0;

	int offsetCutOff = atoi (argv[1]);
	int minNumUniqueReads = atoi (argv[2]);

	gfr_init ("-");
	puts (gfr_writeHeader ());
	while (currGE = gfr_nextEntry ()) {
		Array starts = arrayCreate( 100, int);
		for (i = 0; i < arrayMax( currGE->interReads ); i++) {
			int currStart = arrp(currGE->interReads, i, GfrInterRead)->readStart1 + arrp(currGE->interReads, i, GfrInterRead)->readStart2;
			array(starts, arrayMax(starts), int) = currStart; 
		}
		arraySort( starts, (ARRAYORDERF) arrayIntcmp );
		arrayUniq( starts, NULL, (ARRAYORDERF) arrayIntcmp ) ;
		int numUniqeOffsets = arrayMax( starts );
		arrayDestroy( starts );

	if (arrayMax( currGE->readsTranscript1 ) != arrayMax( currGE->readsTranscript2 ) )
		die( "The two ends have a different number of reads");
	Texta reads = textCreate(arrayMax(currGE->readsTranscript1));
	for (i = 0; i < arrayMax(currGE->readsTranscript1); i++) {
		Stringa strA = stringCreate( strlen(textItem( currGE->readsTranscript1, i) ) * 2 + 1);
		stringAppendf( strA, textItem( currGE->readsTranscript1,i));
		stringAppendf( strA, textItem( currGE->readsTranscript2,i)); 
		textAdd( reads, string(strA));
		stringDestroy( strA );
	}
	textUniqKeepOrder( reads );
	int numRemaining = arrayMax( reads );
	textDestroy ( reads );

	if (numRemaining <= minNumUniqueReads || numUniqeOffsets <= offsetCutOff) {
		countRemoved++;
		continue;
	} 
	puts (gfr_writeGfrEntry (currGE));
	count++;
	}
	gfr_deInit ();
	warn("%s_PCRFilter: offset=%d minNumUniqueReads=%d",
	     argv[0],offsetCutOff, minNumUniqueReads);
	warn("%s_numRemoved: %d",argv[0],countRemoved);
	warn("%s_numGfrEntries: %d",argv[0],count);
	return 0;
}
Пример #8
0
// Return value: 0 if all PBWTs finished, otherwise the current position is returned
static int pbwt_reader_next(pbwt_reader_t *reader, int nshared)
{
	int i, min_pos = INT_MAX;
	char *min_als  = NULL;

	// advance all readers, first looking at coordinates only
	for (i=0; i<reader->n; i++)
	{  
		PBWT *p = reader->pbwt[i];
		int j		= reader->cpos[i];
		if ( j>=p->N ) continue;		// no more sites in this pbwt

		Site *site = arrp(p->sites, j, Site);
		char *als  = dictName(variationDict, site->varD);

		// assuming:
		//	- one chromosome only (no checking sequence name)
		//	- sorted alleles (strcmp() on als)
		while ( j < p->N && site->x <= reader->mpos && (!reader->mals || strcmp(als,reader->mals)<=0) )
		{
			site = arrp(p->sites, j, Site);
			als  = dictName(variationDict, site->varD);
			reader->cpos[i] = j++;
		}
		if ( reader->cpos[i]+1 >= p->N && site->x == reader->mpos && (!reader->mals || !strcmp(als,reader->mals)) )
		{
			// this pbwt is positioned on the last site which has been read before
			reader->cpos[i] = p->N;
			continue;
		}

		if ( reader->cpos[i] < p->N && site->x < min_pos )
		{
			min_pos = site->x;
			min_als = als;
		}
		if ( site->x==min_pos && (!min_als || strcmp(als,min_als)<0) ) min_als = als;
	}
	if ( min_pos==INT_MAX )
	{
		reader->mpos = 0;
		reader->mals = NULL;
	}
	else
	{
		reader->mpos = min_pos;
		reader->mals = min_als;
	}
	return reader->mpos;
}
Пример #9
0
int main (int argc, char *argv[])
{
	Array breakPoints;
	BreakPoint *currBP;
	BreakPointRead *currBPR;
	int i,j,k;
	int readLength;
	int tileSize;
	char *breakPointSequence;

	if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL)
		return EXIT_FAILURE;

	bp_init ("-");
	breakPoints = bp_getBreakPoints ();
	for (i = 0; i < arrayMax (breakPoints); i++) {
		currBP = arrp (breakPoints,i,BreakPoint);
		tileSize = getTileSize (currBP->tileCoordinate1,currBP->tileCoordinate2);
		breakPointSequence = getBreakPointSequence (currBP->tileCoordinate1,currBP->tileCoordinate2);
		printf ("Tile 1: %s\n",currBP->tileCoordinate1);
		printf ("Tile 2: %s\n",currBP->tileCoordinate2);
		printf ("Number of reads spanning breakpoint: %d\n\n\n",arrayMax (currBP->breakPointReads));
		for (j = 0; j < arrayMax (currBP->breakPointReads); j++) {
			currBPR = arrp (currBP->breakPointReads,j,BreakPointRead);
			readLength = strlen (currBPR->read);
			for (k = 0; k < currBPR->offset; k++) {
				printf (" ");
			}
			for (k = 0; k < readLength; k++) {
				if (((currBPR->offset + k) % tileSize) == 0 && (currBPR->offset + k) != 0) {
					printf ("%s",TILE_SEPARATOR);
				}
				printf ("%c",currBPR->read[k]);
			}
			printf ("\n");
		}
		for (k = 0; k < (2 * tileSize); k++) {
			if ((k % tileSize) == 0 && k != 0) {
				printf ("%s",TILE_SEPARATOR);
			}
			printf ("%c",breakPointSequence[k]);
		}
		printf ("\n\n\n\n\n");
	}
	bp_deInit ();

	confp_close(Conf);

	return EXIT_SUCCESS;
}
Пример #10
0
int main (int argc, char *argv[])
{
  int i,j,groupNumber;
  MrfEntry *currEntry;
  GffEntry *currGffEntry,*nextGffEntry;
  Array gffEntries;
  FILE *fp;
  Stringa buffer;
  short int paired;

  if (argc != 2) {
    usage ("%s <prefix>",argv[0]);
  }
  buffer = stringCreate (1000);
  groupNumber = 0;
  mrf_init ("-");
  gffEntries = arrayCreate (100000,GffEntry);
  while (currEntry = mrf_nextEntry ()) {
    processRead (gffEntries, currEntry, &groupNumber);
  }
  mrf_deInit ();

  arraySort (gffEntries,(ARRAYORDERF)sortGffEntriesByTargetNameAndGroupNumber);
  i = 0; 
  while (i < arrayMax (gffEntries)) {
    currGffEntry = arrp (gffEntries,i,GffEntry);
    stringPrintf (buffer,"%s_%s.gff",argv[1],currGffEntry->targetName);
    fp = fopen (string (buffer),"w");
    if (fp == NULL) {
      die ("Unable to open file: %s",string (buffer));
    }
    fprintf (fp,"browser hide all\n");
    fprintf (fp,"track name=\"%s_%s\" visibility=2\n",argv[1],currGffEntry->targetName);
    fprintf (fp,"%s\n",currGffEntry->line);
    j = i + 1;
    while (j < arrayMax (gffEntries)) {
      nextGffEntry = arrp (gffEntries,j,GffEntry);
      if (!strEqual (currGffEntry->targetName,nextGffEntry->targetName)) {
        break;
      } 
      fprintf (fp,"%s\n",nextGffEntry->line);
      j++;
    }
    i = j;
    fclose (fp);
  }
  stringDestroy (buffer);
  return 0;
}
Пример #11
0
/*
 * Given ranges contained within a bin this makes sure that all sequences
 * referred to in these ranges have their parent listed as the new bin.
 *
 * Returns 0 on success
 *        -1 on failure
 */
static int break_contig_reparent_seqs(GapIO *io, bin_index_t *bin) {
    int i, nr = bin->rng ? ArrayMax(bin->rng) : 0;

    for (i = 0; i < nr; i++) {
	range_t *r = arrp(range_t, bin->rng, i);
	if (r->flags & GRANGE_FLAG_UNUSED)
	    continue;

	if ((r->flags & GRANGE_FLAG_ISMASK) == GRANGE_FLAG_ISANNO) {
	    anno_ele_t *a = (anno_ele_t *)cache_search(io, GT_AnnoEle, r->rec);
	    if (a->bin != bin->rec) {
		a = cache_rw(io, a);
		a->bin = bin->rec;
	    }
	} else {
	    seq_t *seq = (seq_t *)cache_search(io, GT_Seq, r->rec);
	    if (seq->bin != bin->rec) {
		seq = cache_rw(io, seq);
		seq->bin = bin->rec;
		seq->bin_index = i;
	    }
	}
    }

    return 0;
}
Пример #12
0
int main (int argc, char *argv[])
{
  if( argc < 2 ) {
    usage("%s <overlap> < file.psl", argv[0]);
  }
  blatParser_initFromFile( "-" );
  BlatQuery* blQ = NULL;
  PslEntry* pslE = NULL;
  int discard = 0;
  int idxOrig ;
  while( blQ = blatParser_nextQuery() ) {
    idxOrig = 0;
    if( arrayMax( blQ->entries ) > 1 ) {
      idxOrig = -1;
      discard = processBlatQuery( blQ, &idxOrig, atof( argv[1] ) );
    }
    if( discard == 1 ) {
      discard = 0;
      continue;
    } else {
      if( idxOrig == -1 )
	die( "Error");
      pslE = arrp( blQ->entries, idxOrig, PslEntry );
      printf( "%s\t%d\t%d\t%s\n", pslE->tName, pslE->tStart, pslE->tEnd, blQ->qName);
    }
  }
  blatParser_deInit();
  return 0;
}
Пример #13
0
int main (int argc, char *argv[])
{
  Array breakPoints;
  BreakPoint *currBP;
  int i;
  char *breakPointSequence;

  if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL)
    return EXIT_FAILURE;

  bp_init ("-");
  breakPoints = bp_getBreakPoints ();
  arraySort (breakPoints,(ARRAYORDERF)sortBreakPointsByTargetAndOffset);
  
  for (i = 0; i < arrayMax (breakPoints); i++) {
    currBP = arrp (breakPoints,i,BreakPoint);
    breakPointSequence = getBreakPointSequence (currBP->tileCoordinate1,currBP->tileCoordinate2);
    printf( ">%s|%s\n%s\n", currBP->tileCoordinate1, currBP->tileCoordinate2, breakPointSequence);
    warn(">%s|%s\n%s", 
	 currBP->tileCoordinate1, 
	 currBP->tileCoordinate2, 
	 subString(breakPointSequence, 10, strlen(breakPointSequence)-10));
  }
  bp_deInit();
  confp_close(Conf);

  return EXIT_SUCCESS;
}
Пример #14
0
int main (int argc, char *argv[])
{
  int i,j;
  Array intervals;
  Interval *currInterval;
  SubInterval *currSubInterval;
  Stringa sizes =NULL;
  Stringa starts=NULL;

  if (argc < 2) {
    usage ("%s <trackName> [simple]",argv[0]);
  }
  if( (argc==3) && !strEqual( argv[2],"simple") ) {
    usage("%s <trackName> [simple]",argv[0]);
  }
  intervalFind_addIntervalsToSearchSpace ("-",0);
  intervals = intervalFind_getAllIntervals ();
  puts ("browser hide all");
  printf ("track name=\"%s\" visibility=2\n",argv[1]);
  for (i = 0; i < arrayMax (intervals); i++) {
    currInterval = arrp (intervals,i,Interval);
    if( argc == 3 ) {
      for (j = 0; j < arrayMax (currInterval->subIntervals); j++) {
	currSubInterval = arrp (currInterval->subIntervals,j,SubInterval);
	printf ("%s\t%d\t%d\t%s_%d\t900\t%c\t%d\t%d\t.\t1\t%d\t0\n",
		currInterval->chromosome,currSubInterval->start,currSubInterval->end,currInterval->name,j+1,currInterval->strand, currSubInterval->start, currSubInterval->end,  currSubInterval->end - currSubInterval->start  );
      }
    } else {
      stringCreateClear( starts, 10);
      stringCreateClear( sizes,   10);
      for( j = 0; j < arrayMax (currInterval->subIntervals); j++) {
	currSubInterval = arrp (currInterval->subIntervals,j,SubInterval);
	stringAppendf( sizes, "%d", currSubInterval->end - currSubInterval->start );
        stringAppendf( starts, "%d", currSubInterval->start - currInterval->start );
	if( j<arrayMax( currInterval->subIntervals) ) {
	  stringAppendf( sizes, "," );
	  stringAppendf(starts, "," );
	}
      }
      printf ("%s\t%d\t%d\t%s\t900\t%c\t%d\t%d\t.\t%d\t%s\t%s\n",
	      currInterval->chromosome,currInterval->start,currInterval->end, currInterval->name,currInterval->strand, currInterval->start, currInterval->end, currInterval->subIntervalCount, string(sizes), string(starts) );
    }
  }
  return 0;
}
Пример #15
0
void dump_gaps(Array gaps) {
    int i;
    puts("\n");
    for (i = 0; i < ArrayMax(gaps); i++) {
	contig_region_t *gap = arrp(contig_region_t, gaps, i);
	printf("Gap %d\t%d %d %d %d\n", i, 
	       gap->start, gap->end, gap->rnum, gap->deleted);
    }
}
Пример #16
0
/*
 * Exports Scaffold information to an AGP file
 *
 * Returns 0 on success
 *        -1 on failure
 */
int scaffold_to_agp(GapIO *io, char *fn) {
    FILE *fp;
    int i, j;

    if (NULL == (fp = fopen(fn, "w+"))) {
	verror(ERR_WARN, "scaffold_from_agp", "%s: %s", fn, strerror(errno));
	return -1;
    }

    for (i = 0; io->scaffold && i < ArrayMax(io->scaffold); i++) {
	scaffold_t *f = cache_search(io, GT_Scaffold,
				     arr(tg_rec, io->scaffold, i));
	int start = 1, end = 1;
	int k = 1;

	if (!f) {
	    verror(ERR_WARN, "scaffold_from_agp", "Failed to load scaffold\n");
	    fclose(fp);
	    return -1;
	}

	cache_incr(io, f);

	for (j = 0; f->contig && j < ArrayMax(f->contig); j++) {
	    scaffold_member_t *m = arrp(scaffold_member_t, f->contig, j);
	    contig_t *c = cache_search(io, GT_Contig, m->rec);
	    int ustart, uend;
	    int len;

	    /* Get the unpadded clipped contig length */
	    consensus_valid_range(io, m->rec, &ustart, &uend);
	    consensus_unpadded_pos(io, m->rec, uend, &uend);
	    len = uend - ustart + 1;

	    if (j) {
		int gap = m->gap_size;
		fprintf(fp, "%s\t%d\t%d\t%d\tN\t%d\tfragment\tyes\n",
			f->name, start, start+gap-1, k++, gap);
		start += gap;
	    }
	    fprintf(fp, "%s\t%d\t%d\t%d\tW\t%s\t%d\t%d\t+\n",
		    f->name, start, start + len-1,
		    k++, c->name, ustart, uend);
	    start += len;
	}

	cache_decr(io, f);
    }

    if (0 != fclose(fp)) {
	verror(ERR_WARN, "scaffold_from_agp", "%s: %s", fn, strerror(errno));
	return -1;
    }

    return 0;
}
Пример #17
0
/**
 * Prints seqs to stdout.
 */
void fastq_printSequences (Array seqs)
{
  int i;
  Fastq *currFQ;
  
  for (i = 0; i < arrayMax (seqs); i++) {
    currFQ = arrp (seqs,i,Fastq);
    fastq_printOneSequence (currFQ); 
  }
}
Пример #18
0
static void findCoordinates( GfrEntry *gfrE, int *start1, int *end1, int *start2, int *end2 )
{
  GfrInterRead *gfrIR;
  int i;
  *start1 = arrp( gfrE->interReads, 0, GfrInterRead )->readStart1;
  *end1 = arrp( gfrE->interReads, 0, GfrInterRead )->readEnd1;
  *start2 = arrp( gfrE->interReads, 0, GfrInterRead )->readStart2;
  *end2 = arrp( gfrE->interReads, 0, GfrInterRead )->readEnd2;
  for( i = 1; i< arrayMax( gfrE->interReads); i++ ) {
    gfrIR = arrp( gfrE->interReads, i, GfrInterRead );
    if( gfrIR->readStart1 < *start1 )
      *start1 = gfrIR->readStart1;
    if( gfrIR->readStart2 < *start2 )
      *start2 = gfrIR->readStart2;
    if( gfrIR->readEnd1 > *end1 )
      *end1 = gfrIR->readEnd1;
    if( gfrIR->readEnd2 > *end2 )
      *end2 = gfrIR->readEnd2;
  }
}
void checkPseudogeneOverlap( BlatQuery* blQ ) 
{
  PslEntry* blE;
  int i;
  Array intervals=arrayCreate(2, Interval);
  for( i=0; i<arrayMax(blQ->entries); i++) {
    blE = arrp( blQ->entries, i, PslEntry );
    intervals = intervalFind_getOverlappingIntervals ( blE->tName, blE->tStart, blE->tEnd);
    if( arrayMax(intervals)>0) arrayRemoveD( blQ->entries, i );
  }
}
Пример #20
0
/*
 * Sets the annotation type, passed in as a string but held in a 4-byte int.
 * This also attempts to set the cached copy of the type held within the
 * bin range array.
 *
 * Returns 0 on success
 *        -1 on failure
 */
int anno_ele_set_type(GapIO *io, anno_ele_t **e, char *str) {
    int type;
    char stype[5];
    anno_ele_t *ae;

    if (!(ae = cache_rw(io, *e)))
	return -1;

    /* Get integer type */
    memset(stype, 0, 5);
    strncpy(stype, str, 4);
    type = str2type(stype);

    /* Update annotation */
    ae->tag_type = type;

    /* Also update range_t cached copy of type */
    if (ae->bin) {
	bin_index_t *bin = (bin_index_t *)cache_search(io, GT_Bin, ae->bin);
	range_t *r = NULL;
	int i, nranges;

	if (!bin)
	    return -1;
	if (!(bin = cache_rw(io, bin)))
	    return -1;

	/*
	 * Find the index into the bin range.
	 * FIXME: we should add a bin_index element, as seen in seq_t,
	 * to avoid the brute force loop. This doesn't have to be
	 * permanently stored - a cached copy would suffice.
	 */
	nranges = bin->rng ? ArrayMax(bin->rng) : 0;
	for (i = 0; i < nranges; i++) {
	    r = arrp(range_t, bin->rng, i);
	    if (r->flags & GRANGE_FLAG_UNUSED)
		continue;

	    if (r->rec == ae->rec)
		break;
	}
	if (i == nranges)
	    return -1;

	bin->flags |= BIN_RANGE_UPDATED;
	r->mqual = type;
    }

    *e = ae;

    return 0;
}
Пример #21
0
void performSegmentation (Array tars, Array wigs, char* targetName, double threshold, int maxGap, int minRun)
{
  Tar *currTar;
  Wig *currWig,*nextWig;
  int i,j,endPosition;
  int countBelowThreshold;

  i = 0; 
  while (i < arrayMax (wigs)) {
    currWig = arrp (wigs,i,Wig);
    if (currWig->value < threshold) {
      i++;
      continue;
    }
    j = i + 1;
    endPosition = j;
    countBelowThreshold = 0;
    while (j < arrayMax (wigs)) {
      nextWig = arrp (wigs,j,Wig);
      if (nextWig->value < threshold) {
        countBelowThreshold++;
        if (countBelowThreshold >= maxGap) {
          break;
        }
      }
      else {
        countBelowThreshold = 0;
        endPosition = j;
      }
      j++;
    }
    if ((endPosition - 1 - currWig->position + 1) >= minRun) {
      currTar = arrayp (tars,arrayMax (tars),Tar);
      currTar->start = currWig->position;
      currTar->end = endPosition + 1;
      currTar->targetName = hlr_strdup (targetName);
     }
    i = j;
  }
}
Пример #22
0
/*
 * Removes a contig from a scaffold.
 *
 * Returns 0 on success
 *        -1 on failure
 */
int scaffold_remove(GapIO *io, tg_rec scaffold, tg_rec contig) {
    scaffold_t *f;
    scaffold_member_t *m, *m2;
    contig_t *c;
    int i;

    c = cache_search(io, GT_Contig, contig);
    f = cache_search(io, GT_Scaffold, scaffold);

    if (!c || !f)
	return -1;

    if (c->scaffold != scaffold) {
	verror(ERR_WARN, "scaffold_remove", "Attempted to remove contig #%"
	       PRIrec" from a scaffold #%"PRIrec" it is not a member of",
	       contig, scaffold);
	return -1;
    }

    c = cache_rw(io, c);
    c->scaffold = 0;

    f = cache_rw(io, f);
    for (i = 0; i < ArrayMax(f->contig); i++) {
	m = arrp(scaffold_member_t, f->contig, i);
	if (m->rec == contig) {
	    /* Shuffle array down */
	    for (i++; i < ArrayMax(f->contig); i++) {
		m2 = arrp(scaffold_member_t, f->contig, i);
		*m = *m2;
		m = m2;
	    }
	    ArrayMax(f->contig)--;
	}
    }

    return 0;
}
static char* lookUpTreeFam (Array kgTreeFams, char *transcript) 
{
  KgTreeFam testKGTF;
  int index;
  int foundIt;
   
  foundIt = 0;
  testKGTF.transcriptName = hlr_strdup (transcript);
  foundIt = arrayFind (kgTreeFams,&testKGTF,&index,(ARRAYORDERF)sortKgTreeFamsByTranscriptName);
  hlr_free (testKGTF.transcriptName);
  if (foundIt) {
    return  arrp (kgTreeFams,index,KgTreeFam)->treeFamId;
  }
  return NULL;
}
Пример #24
0
static int isContained (MrfRead *currRead, char *targetName, int targetStart, int targetEnd)
{
  MrfBlock* currBlock;
  int i;

  for (i = 0; i < arrayMax (currRead->blocks); i++) {
    currBlock = arrp (currRead->blocks,i,MrfBlock);
    if (strEqual (currBlock->targetName,targetName)) {
      if (rangeIntersection (currBlock->targetStart,currBlock->targetEnd,targetStart,targetEnd) > 0 ) {
        return 1;
      }     
    }
  }
  return 0;
}
Пример #25
0
int processBlatQuery( BlatQuery* blQ, int *idxOrig , float cutoff) {
  int i,j;
  PslEntry *curr;
  int sizes[ arrayMax( blQ->entries ) ];
  *idxOrig = -1;
  for( i=0; i<arrayMax(blQ->entries); i++ ) {
    curr = arrp( blQ->entries, i, PslEntry );
    sizes[i]=0;
    for( j=0; j < arrayMax( curr->blockSizes ); j++) 
      sizes[i] += arru( curr->blockSizes, j, int);
    sizes[i] -=  curr->misMatches;
    if( checkOriginal ( blQ, curr ) == 1 )  
      *idxOrig = i;
  }
  if( *idxOrig < 0 ) die("Cannot find exact match: %s", blQ->qName);
  int sizeOrig = sizes[ *idxOrig ];
  for( i=0; i< arrayMax( blQ->entries ); i++ ) {
    curr = arrp( blQ->entries, i, PslEntry );
    warn( "%s\t%s\t%d\t%d\t[ %d, %d - %f]\t%d\t--\t%d\t%d\t%d", blQ->qName, curr->tName, curr->tStart, curr->tEnd, sizes[i], sizeOrig, ( (float)sizes[i] / (float)sizeOrig ) ,curr->blockCount, curr->misMatches, curr->qNumInsert, curr->tNumInsert); 
    if( ( (float)sizes[i] / (float)sizeOrig ) > cutoff && (i != *idxOrig) )
      return 1;
  }
  return 0;
}
Пример #26
0
static void blastParser_freeQuery (BlastQuery *currBlastQuery)
{
    int i;
    BlastEntry *currBlastEntry;

    if (currBlastQuery == NULL) {
        return;
    }
    hlr_free (currBlastQuery->qName);
    for (i = 0; i < arrayMax (currBlastQuery->entries); i++) {
        currBlastEntry = arrp (currBlastQuery->entries,i,BlastEntry);
        hlr_free (currBlastEntry->tName);
    }
    arrayDestroy (currBlastQuery->entries);
    freeMem (currBlastQuery);
}
Пример #27
0
static char* getBreakPointSequence (char *tileCoordinate1, char *tileCoordinate2)
{
	Stringa buffer;
	Stringa targetsFile;
	FILE *fp;
	Array targetSeqs;
	int i;
	Seq *currSeq;
	static Stringa sequence = NULL;

	buffer = stringCreate (100);
	targetsFile = stringCreate (100);
	stringPrintf (targetsFile,"targets_%d.txt",getpid ());
	if (!(fp = fopen (string (targetsFile),"w")) ){
		die ("Unable to open target file: %s",string (targetsFile));
	}
	fprintf (fp,"%s\n%s",tileCoordinate1,tileCoordinate2);
	fclose (fp);

	stringPrintf (buffer,"%s %s/%s stdout -noMask -seqList=%s",
		      confp_get(Conf, "BLAT_TWO_BIT_TO_FA"),
		      confp_get(Conf, "BLAT_DATA_DIR"),
		      confp_get(Conf, "BLAT_TWO_BIT_DATA_FILENAME"),
		      string (targetsFile));
	fasta_initFromPipe (string (buffer));
	targetSeqs = fasta_readAllSequences (0);
	fasta_deInit ();
	if (arrayMax (targetSeqs) != 2) {
		die ("Expected only two target sequences");
	} 
	stringCreateClear (sequence,100);
	for (i = 0; i < arrayMax (targetSeqs); i++) {
		currSeq = arrp (targetSeqs,i,Seq);
		stringAppendf (sequence,"%s",currSeq->sequence);
		hlr_free (currSeq->name);
		hlr_free (currSeq->sequence);
	}
	arrayDestroy (targetSeqs);
	stringPrintf (buffer,"rm -rf %s",string (targetsFile));
	hlr_system (string (buffer),0);
	stringDestroy (targetsFile);
	stringDestroy (buffer);
	return string (sequence);
}
Пример #28
0
static void createGffEntry( Array gffEntries, MrfRead *currRead, int groupNumber ) {
  int i;
  MrfBlock *currBlock;
  GffEntry *currGffEntry;
  static Stringa buffer = NULL;
  stringCreateClear (buffer,100);
  for (i = 0; i < arrayMax (currRead->blocks); i++) {
    currBlock = arrp (currRead->blocks,i,MrfBlock);
    currGffEntry = arrayp (gffEntries,arrayMax (gffEntries),GffEntry);
    stringPrintf (buffer,"%s\tMRF\texon\t%d\t%d\t.\t.\t.\tTG%d",
                  currBlock->targetName,
                  currBlock->targetStart,
                  currBlock->targetEnd,
                  //currBlock->strand,
                  groupNumber);
    currGffEntry->targetName = hlr_strdup (currBlock->targetName);
    currGffEntry->line = hlr_strdup (string (buffer));
  }
}
Пример #29
0
int main (int argc, char *argv[])
{
	GfrEntry *currGE;
	GfrInterRead *currGIR;
	int i;
	Stringa buffer;
	FILE *fp1,*fp2;
	int count;

	count = 0;
	buffer = stringCreate (100);
	gfr_init ("-");
	puts (gfr_writeHeader ());
	while (currGE = gfr_nextEntry ()) {
		stringPrintf (buffer,"%s_1.bed",currGE->id);
		fp1 = fopen (string (buffer),"w");
		stringPrintf (buffer,"%s_2.bed",currGE->id);
		fp2 = fopen (string (buffer),"w");
		if (fp1 == NULL || fp2 == NULL) {
			die ("Unable to open BED files");
		}
		fprintf (fp1,"browser full knownGene\n");
		fprintf (fp1,"track name=\"Inter paird-ends: %s_1\" visibility=2\n",currGE->id);
		fprintf (fp2,"browser full knownGene\n");
		fprintf (fp2,"track name=\"Inter paird-ends: %s_2\" visibility=2\n",currGE->id);
		for (i = 0; i < arrayMax (currGE->interReads); i++) {
			currGIR = arrp (currGE->interReads,i,GfrInterRead);
			fprintf (fp1,"%s\t%d\t%d\n",currGE->chromosomeTranscript1,currGIR->readStart1,currGIR->readEnd1);
			fprintf (fp2,"%s\t%d\t%d\n",currGE->chromosomeTranscript2,currGIR->readStart2,currGIR->readEnd2);
		}
		fclose (fp1);
		fclose (fp2);
		puts (gfr_writeGfrEntry (currGE));
		count++;
	}
	gfr_deInit ();
	stringDestroy (buffer);
	warn ("%s_numGfrEntries: %d",argv[0],count);
	return 0;
}
Пример #30
0
/*
 * Removes an anno_ele from the gap database.
 * FIXME: need to deallocate storage too. (See docs/TODO)
 *
 * Returns 0 on success
 *        -1 on failure
 */
int anno_ele_destroy(GapIO *io, anno_ele_t *e) {
    bin_index_t *bin;
    range_t *r;
    int i;

    /* Find the bin range pointing to this object */
    bin = (bin_index_t *)cache_search(io, GT_Bin, e->bin);
    if (!bin || !bin->rng || ArrayMax(bin->rng) == 0)
	return -1;
    if (!(bin = cache_rw(io, bin)))
	return -1;


    for (i = 0; i < ArrayMax(bin->rng); i++) {
	r = arrp(range_t, bin->rng, i);
	if (r->flags & GRANGE_FLAG_UNUSED)
	    continue;

	if (r->rec == e->rec)
	    break;
    }
    if (i == ArrayMax(bin->rng))
	return -1;

    /* Mark this bin range as unused */
    r->rec = bin->rng_free;
    r->flags |= GRANGE_FLAG_UNUSED;

    bin->rng_free = i;
    bin->flags |= BIN_RANGE_UPDATED | BIN_BIN_UPDATED;

    bin_incr_nanno(io, bin, -1);

    if (bin->start_used == r->start || bin->end_used == r->end)
	bin_set_used_range(io, bin);

    return 0;
}