int main (int argc, char *argv[]) { int i,j; Array intervals; Interval *currInterval; SubInterval *currSubInterval; if (argc != 2) { usage ("%s <trackName>",argv[0]); } intervalFind_addIntervalsToSearchSpace ("-",0); intervals = intervalFind_getAllIntervals (); puts ("browser hide all"); printf ("track name=\"%s\" visibility=2\n",argv[1]); for (i = 0; i < arrayMax (intervals); i++) { currInterval = arrp (intervals,i,Interval); for (j = 0; j < arrayMax (currInterval->subIntervals); j++) { currSubInterval = arrp (currInterval->subIntervals,j,SubInterval); printf ("%s\tannotation\texon\t%d\t%d\t.\t%c\t.\tgroup%d\n", currInterval->chromosome, currSubInterval->start + 1, currSubInterval->end, currInterval->strand, i); } } return 0; }
static int containedLocus ( MrfEntry* query, Locus* target ) { int overlap=0; if( query->isPairedEnd ) { // do this only if paired end int i; for( i=0; i<arrayMax( (query->read1).blocks ); i++) { MrfBlock* qBlock = arrp( (query->read1).blocks, i, MrfBlock); if( !strcmp( qBlock->targetName, target->chromosome) ) { if( positiveRangeIntersection( qBlock->targetStart, qBlock->targetEnd, target->start, target->end ) > 0 ) { overlap = 1; // found i = arrayMax( (query->read1).blocks ); // found, then stop } } } if( overlap == 0 ) { // not found in read 1, let's check read 2 for( i=0; i<arrayMax( (query->read2).blocks ); i++) { MrfBlock* qBlock = arrp( (query->read2).blocks, i, MrfBlock); if( !strcmp( qBlock->targetName, target->chromosome) ) { if( positiveRangeIntersection( qBlock->targetStart, qBlock->targetEnd, target->start, target->end ) > 0 ) { overlap = 1; // found i = arrayMax( (query->read2).blocks ); // found, then stop } } } } } return overlap; }
static void obtainPairCounts (GfrEntry *currGE) { GfrPairCount *currPC; GfrInterRead *currGIR,*nextGIR; int i,j; currGE->pairCounts = arrayCreate (100,GfrPairCount); arraySort (currGE->interReads,(ARRAYORDERF)sortGfrInterReads); i = 0; while (i < arrayMax (currGE->interReads)) { currGIR = arrp (currGE->interReads,i,GfrInterRead); currPC = arrayp (currGE->pairCounts,arrayMax (currGE->pairCounts),GfrPairCount); currPC->number1 = currGIR->number1; currPC->number2 = currGIR->number2; currPC->pairType = currGIR->pairType; currPC->count = 1; j = i + 1; while (j < arrayMax (currGE->interReads)) { nextGIR = arrp (currGE->interReads,j,GfrInterRead); if (currGIR->pairType == nextGIR->pairType && currGIR->number1==nextGIR->number1 && currGIR->number2==nextGIR->number2) { currPC->count++; } else { break; } j++; } i = j; } }
static int isContained (MrfRead *currRead) { MrfBlock* currBlock; Array annotatedTranscripts; Interval *currTranscript; SubInterval *currExon; int overlap; int i,j,k; for (i = 0; i < arrayMax (currRead->blocks); i++) { currBlock = arrp (currRead->blocks,i,MrfBlock); annotatedTranscripts = intervalFind_getOverlappingIntervals (currBlock->targetName,currBlock->targetStart,currBlock->targetEnd); for (j = 0; j < arrayMax (annotatedTranscripts); j++) { currTranscript = arru (annotatedTranscripts,j,Interval*); for (k = 0; k < arrayMax (currTranscript->subIntervals); k++) { currExon = arrp (currTranscript->subIntervals,k,SubInterval); overlap = rangeIntersection (currBlock->targetStart,currBlock->targetEnd,currExon->start,currExon->end); if (overlap > 0) { return 1; } } } } return 0; }
int main (int argc, char *argv[]) { if( argc != 3 ) { usage( "%s <errors|noerrors> <gaps|nogaps>", argv[0]); return -1; } MrfEntry *currEntry; MrfBlock *currBlock; int i, error, errorCode, numErrors; int gaps = 0; if( strEqual( "gaps", argv[2] ) ) gaps = 1; seq_init(); mrf_init ("-"); printf( "%s\n", mrf_writeHeader()); numErrors = 0; while (currEntry = mrf_nextEntry ()) { error = 0; for( i=0; i<arrayMax(currEntry->read1.blocks); i++) { currBlock=arrp( currEntry->read1.blocks, i, MrfBlock); if( i==0 ) { errorCode = processBlock ( currBlock, NULL, gaps ); } else { errorCode = processBlock ( currBlock, arrp( currEntry->read1.blocks, i-1, MrfBlock ), gaps ); } if( errorCode != 0 ) { warn( "Error code (read1): %d\t%s:%c:%d:%d:%d:%d", errorCode, currBlock->targetName, currBlock->strand, currBlock->targetStart, currBlock->targetEnd, currBlock->queryStart, currBlock->queryEnd); error++; } } if (currEntry->isPairedEnd & error == 0) { for( i=0; i<arrayMax(currEntry->read2.blocks); i++) { currBlock=arrp( currEntry->read2.blocks, i, MrfBlock); if( i==0 ) { errorCode = processBlock ( currBlock, NULL , gaps ); } else { errorCode = processBlock (currBlock, arrp( currEntry->read2.blocks, i-1, MrfBlock) , gaps); } if( errorCode !=0 ) { warn( "Error code (read2): %d\t%s:%c:%d:%d:%d:%d", errorCode, currBlock->targetName, currBlock->strand, currBlock->targetStart, currBlock->targetEnd, currBlock->queryStart, currBlock->queryEnd); error++; } } } if( error > 0 && strEqual(argv[1], "errors")) { printf( "%s\n", mrf_writeEntry( currEntry )); error=0; numErrors++; } if( error == 0 && strEqual(argv[1], "noerrors")) printf( "%s\n", mrf_writeEntry( currEntry )); } mrf_deInit (); warn("%s: done", argv[0]); return 0; }
/* * Adds a contig to a scaffold array. * Gap size, type and evidence refer to the gap between this and the * "previous" contig - ie the last in the scaffold. More complex * scaffold manipulations will be handled elsewhere. * * Set these fields to 0 if you do not know them. * * Returns 0 on success * -1 on failure */ int scaffold_add(GapIO *io, tg_rec scaffold, tg_rec contig, int gap_size, int gap_type, int evidence) { scaffold_t *f; contig_t *c; scaffold_member_t *m; int i; /* Check if this contig is in a scaffold, if so remove now */ c = cache_search(io, GT_Contig, contig); if (c->scaffold) scaffold_remove(io, c->scaffold, contig); if (!(f = cache_search(io, GT_Scaffold, scaffold))) return -1; /* Check if it already exists */ for (i = 0; i < ArrayMax(f->contig); i++) { m = arrp(scaffold_member_t, f->contig, i); if (m->rec == contig) return 0; } /* Append */ f = cache_rw(io, f); m = ArrayRef(f->contig, ArrayMax(f->contig)); // extend m->rec = contig; m->gap_size = ArrayMax(f->contig) > 1 ? gap_size : 0; m->gap_type = gap_type; m->evidence = evidence; /* Update the contig record too */ c = cache_search(io, GT_Contig, contig); c = cache_rw(io, c); c->scaffold = scaffold; #if 0 /* Add a scaffold link to the contig graph too */ if (ArrayMax(f->contig) >= 2) { m = arrp(scaffold_member_t, f->contig, ArrayMax(f->contig)-2); contig_link_t lnk; lnk.rec1 = contig; lnk.rec2 = m->rec; /* Best guess */ lnk.pos1 = 0; lnk.end1 = 1; lnk.pos2 = 0; lnk.end2 = 0; lnk.orientation = 0; lnk.size = 100; lnk.type = CLINK_TYPE_SCAFFOLD; lnk.score = 0; contig_add_link(io, &lnk); } #endif return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; int count; int countRemoved; int i; if (argc != 3) { usage ("%s <offsetCutoff> <minNumUniqueReads>",argv[0]); } count = 0; countRemoved = 0; int offsetCutOff = atoi (argv[1]); int minNumUniqueReads = atoi (argv[2]); gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { Array starts = arrayCreate( 100, int); for (i = 0; i < arrayMax( currGE->interReads ); i++) { int currStart = arrp(currGE->interReads, i, GfrInterRead)->readStart1 + arrp(currGE->interReads, i, GfrInterRead)->readStart2; array(starts, arrayMax(starts), int) = currStart; } arraySort( starts, (ARRAYORDERF) arrayIntcmp ); arrayUniq( starts, NULL, (ARRAYORDERF) arrayIntcmp ) ; int numUniqeOffsets = arrayMax( starts ); arrayDestroy( starts ); if (arrayMax( currGE->readsTranscript1 ) != arrayMax( currGE->readsTranscript2 ) ) die( "The two ends have a different number of reads"); Texta reads = textCreate(arrayMax(currGE->readsTranscript1)); for (i = 0; i < arrayMax(currGE->readsTranscript1); i++) { Stringa strA = stringCreate( strlen(textItem( currGE->readsTranscript1, i) ) * 2 + 1); stringAppendf( strA, textItem( currGE->readsTranscript1,i)); stringAppendf( strA, textItem( currGE->readsTranscript2,i)); textAdd( reads, string(strA)); stringDestroy( strA ); } textUniqKeepOrder( reads ); int numRemaining = arrayMax( reads ); textDestroy ( reads ); if (numRemaining <= minNumUniqueReads || numUniqeOffsets <= offsetCutOff) { countRemoved++; continue; } puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); warn("%s_PCRFilter: offset=%d minNumUniqueReads=%d", argv[0],offsetCutOff, minNumUniqueReads); warn("%s_numRemoved: %d",argv[0],countRemoved); warn("%s_numGfrEntries: %d",argv[0],count); return 0; }
// Return value: 0 if all PBWTs finished, otherwise the current position is returned static int pbwt_reader_next(pbwt_reader_t *reader, int nshared) { int i, min_pos = INT_MAX; char *min_als = NULL; // advance all readers, first looking at coordinates only for (i=0; i<reader->n; i++) { PBWT *p = reader->pbwt[i]; int j = reader->cpos[i]; if ( j>=p->N ) continue; // no more sites in this pbwt Site *site = arrp(p->sites, j, Site); char *als = dictName(variationDict, site->varD); // assuming: // - one chromosome only (no checking sequence name) // - sorted alleles (strcmp() on als) while ( j < p->N && site->x <= reader->mpos && (!reader->mals || strcmp(als,reader->mals)<=0) ) { site = arrp(p->sites, j, Site); als = dictName(variationDict, site->varD); reader->cpos[i] = j++; } if ( reader->cpos[i]+1 >= p->N && site->x == reader->mpos && (!reader->mals || !strcmp(als,reader->mals)) ) { // this pbwt is positioned on the last site which has been read before reader->cpos[i] = p->N; continue; } if ( reader->cpos[i] < p->N && site->x < min_pos ) { min_pos = site->x; min_als = als; } if ( site->x==min_pos && (!min_als || strcmp(als,min_als)<0) ) min_als = als; } if ( min_pos==INT_MAX ) { reader->mpos = 0; reader->mals = NULL; } else { reader->mpos = min_pos; reader->mals = min_als; } return reader->mpos; }
int main (int argc, char *argv[]) { Array breakPoints; BreakPoint *currBP; BreakPointRead *currBPR; int i,j,k; int readLength; int tileSize; char *breakPointSequence; if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; bp_init ("-"); breakPoints = bp_getBreakPoints (); for (i = 0; i < arrayMax (breakPoints); i++) { currBP = arrp (breakPoints,i,BreakPoint); tileSize = getTileSize (currBP->tileCoordinate1,currBP->tileCoordinate2); breakPointSequence = getBreakPointSequence (currBP->tileCoordinate1,currBP->tileCoordinate2); printf ("Tile 1: %s\n",currBP->tileCoordinate1); printf ("Tile 2: %s\n",currBP->tileCoordinate2); printf ("Number of reads spanning breakpoint: %d\n\n\n",arrayMax (currBP->breakPointReads)); for (j = 0; j < arrayMax (currBP->breakPointReads); j++) { currBPR = arrp (currBP->breakPointReads,j,BreakPointRead); readLength = strlen (currBPR->read); for (k = 0; k < currBPR->offset; k++) { printf (" "); } for (k = 0; k < readLength; k++) { if (((currBPR->offset + k) % tileSize) == 0 && (currBPR->offset + k) != 0) { printf ("%s",TILE_SEPARATOR); } printf ("%c",currBPR->read[k]); } printf ("\n"); } for (k = 0; k < (2 * tileSize); k++) { if ((k % tileSize) == 0 && k != 0) { printf ("%s",TILE_SEPARATOR); } printf ("%c",breakPointSequence[k]); } printf ("\n\n\n\n\n"); } bp_deInit (); confp_close(Conf); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { int i,j,groupNumber; MrfEntry *currEntry; GffEntry *currGffEntry,*nextGffEntry; Array gffEntries; FILE *fp; Stringa buffer; short int paired; if (argc != 2) { usage ("%s <prefix>",argv[0]); } buffer = stringCreate (1000); groupNumber = 0; mrf_init ("-"); gffEntries = arrayCreate (100000,GffEntry); while (currEntry = mrf_nextEntry ()) { processRead (gffEntries, currEntry, &groupNumber); } mrf_deInit (); arraySort (gffEntries,(ARRAYORDERF)sortGffEntriesByTargetNameAndGroupNumber); i = 0; while (i < arrayMax (gffEntries)) { currGffEntry = arrp (gffEntries,i,GffEntry); stringPrintf (buffer,"%s_%s.gff",argv[1],currGffEntry->targetName); fp = fopen (string (buffer),"w"); if (fp == NULL) { die ("Unable to open file: %s",string (buffer)); } fprintf (fp,"browser hide all\n"); fprintf (fp,"track name=\"%s_%s\" visibility=2\n",argv[1],currGffEntry->targetName); fprintf (fp,"%s\n",currGffEntry->line); j = i + 1; while (j < arrayMax (gffEntries)) { nextGffEntry = arrp (gffEntries,j,GffEntry); if (!strEqual (currGffEntry->targetName,nextGffEntry->targetName)) { break; } fprintf (fp,"%s\n",nextGffEntry->line); j++; } i = j; fclose (fp); } stringDestroy (buffer); return 0; }
/* * Given ranges contained within a bin this makes sure that all sequences * referred to in these ranges have their parent listed as the new bin. * * Returns 0 on success * -1 on failure */ static int break_contig_reparent_seqs(GapIO *io, bin_index_t *bin) { int i, nr = bin->rng ? ArrayMax(bin->rng) : 0; for (i = 0; i < nr; i++) { range_t *r = arrp(range_t, bin->rng, i); if (r->flags & GRANGE_FLAG_UNUSED) continue; if ((r->flags & GRANGE_FLAG_ISMASK) == GRANGE_FLAG_ISANNO) { anno_ele_t *a = (anno_ele_t *)cache_search(io, GT_AnnoEle, r->rec); if (a->bin != bin->rec) { a = cache_rw(io, a); a->bin = bin->rec; } } else { seq_t *seq = (seq_t *)cache_search(io, GT_Seq, r->rec); if (seq->bin != bin->rec) { seq = cache_rw(io, seq); seq->bin = bin->rec; seq->bin_index = i; } } } return 0; }
int main (int argc, char *argv[]) { if( argc < 2 ) { usage("%s <overlap> < file.psl", argv[0]); } blatParser_initFromFile( "-" ); BlatQuery* blQ = NULL; PslEntry* pslE = NULL; int discard = 0; int idxOrig ; while( blQ = blatParser_nextQuery() ) { idxOrig = 0; if( arrayMax( blQ->entries ) > 1 ) { idxOrig = -1; discard = processBlatQuery( blQ, &idxOrig, atof( argv[1] ) ); } if( discard == 1 ) { discard = 0; continue; } else { if( idxOrig == -1 ) die( "Error"); pslE = arrp( blQ->entries, idxOrig, PslEntry ); printf( "%s\t%d\t%d\t%s\n", pslE->tName, pslE->tStart, pslE->tEnd, blQ->qName); } } blatParser_deInit(); return 0; }
int main (int argc, char *argv[]) { Array breakPoints; BreakPoint *currBP; int i; char *breakPointSequence; if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; bp_init ("-"); breakPoints = bp_getBreakPoints (); arraySort (breakPoints,(ARRAYORDERF)sortBreakPointsByTargetAndOffset); for (i = 0; i < arrayMax (breakPoints); i++) { currBP = arrp (breakPoints,i,BreakPoint); breakPointSequence = getBreakPointSequence (currBP->tileCoordinate1,currBP->tileCoordinate2); printf( ">%s|%s\n%s\n", currBP->tileCoordinate1, currBP->tileCoordinate2, breakPointSequence); warn(">%s|%s\n%s", currBP->tileCoordinate1, currBP->tileCoordinate2, subString(breakPointSequence, 10, strlen(breakPointSequence)-10)); } bp_deInit(); confp_close(Conf); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { int i,j; Array intervals; Interval *currInterval; SubInterval *currSubInterval; Stringa sizes =NULL; Stringa starts=NULL; if (argc < 2) { usage ("%s <trackName> [simple]",argv[0]); } if( (argc==3) && !strEqual( argv[2],"simple") ) { usage("%s <trackName> [simple]",argv[0]); } intervalFind_addIntervalsToSearchSpace ("-",0); intervals = intervalFind_getAllIntervals (); puts ("browser hide all"); printf ("track name=\"%s\" visibility=2\n",argv[1]); for (i = 0; i < arrayMax (intervals); i++) { currInterval = arrp (intervals,i,Interval); if( argc == 3 ) { for (j = 0; j < arrayMax (currInterval->subIntervals); j++) { currSubInterval = arrp (currInterval->subIntervals,j,SubInterval); printf ("%s\t%d\t%d\t%s_%d\t900\t%c\t%d\t%d\t.\t1\t%d\t0\n", currInterval->chromosome,currSubInterval->start,currSubInterval->end,currInterval->name,j+1,currInterval->strand, currSubInterval->start, currSubInterval->end, currSubInterval->end - currSubInterval->start ); } } else { stringCreateClear( starts, 10); stringCreateClear( sizes, 10); for( j = 0; j < arrayMax (currInterval->subIntervals); j++) { currSubInterval = arrp (currInterval->subIntervals,j,SubInterval); stringAppendf( sizes, "%d", currSubInterval->end - currSubInterval->start ); stringAppendf( starts, "%d", currSubInterval->start - currInterval->start ); if( j<arrayMax( currInterval->subIntervals) ) { stringAppendf( sizes, "," ); stringAppendf(starts, "," ); } } printf ("%s\t%d\t%d\t%s\t900\t%c\t%d\t%d\t.\t%d\t%s\t%s\n", currInterval->chromosome,currInterval->start,currInterval->end, currInterval->name,currInterval->strand, currInterval->start, currInterval->end, currInterval->subIntervalCount, string(sizes), string(starts) ); } } return 0; }
void dump_gaps(Array gaps) { int i; puts("\n"); for (i = 0; i < ArrayMax(gaps); i++) { contig_region_t *gap = arrp(contig_region_t, gaps, i); printf("Gap %d\t%d %d %d %d\n", i, gap->start, gap->end, gap->rnum, gap->deleted); } }
/* * Exports Scaffold information to an AGP file * * Returns 0 on success * -1 on failure */ int scaffold_to_agp(GapIO *io, char *fn) { FILE *fp; int i, j; if (NULL == (fp = fopen(fn, "w+"))) { verror(ERR_WARN, "scaffold_from_agp", "%s: %s", fn, strerror(errno)); return -1; } for (i = 0; io->scaffold && i < ArrayMax(io->scaffold); i++) { scaffold_t *f = cache_search(io, GT_Scaffold, arr(tg_rec, io->scaffold, i)); int start = 1, end = 1; int k = 1; if (!f) { verror(ERR_WARN, "scaffold_from_agp", "Failed to load scaffold\n"); fclose(fp); return -1; } cache_incr(io, f); for (j = 0; f->contig && j < ArrayMax(f->contig); j++) { scaffold_member_t *m = arrp(scaffold_member_t, f->contig, j); contig_t *c = cache_search(io, GT_Contig, m->rec); int ustart, uend; int len; /* Get the unpadded clipped contig length */ consensus_valid_range(io, m->rec, &ustart, &uend); consensus_unpadded_pos(io, m->rec, uend, &uend); len = uend - ustart + 1; if (j) { int gap = m->gap_size; fprintf(fp, "%s\t%d\t%d\t%d\tN\t%d\tfragment\tyes\n", f->name, start, start+gap-1, k++, gap); start += gap; } fprintf(fp, "%s\t%d\t%d\t%d\tW\t%s\t%d\t%d\t+\n", f->name, start, start + len-1, k++, c->name, ustart, uend); start += len; } cache_decr(io, f); } if (0 != fclose(fp)) { verror(ERR_WARN, "scaffold_from_agp", "%s: %s", fn, strerror(errno)); return -1; } return 0; }
/** * Prints seqs to stdout. */ void fastq_printSequences (Array seqs) { int i; Fastq *currFQ; for (i = 0; i < arrayMax (seqs); i++) { currFQ = arrp (seqs,i,Fastq); fastq_printOneSequence (currFQ); } }
static void findCoordinates( GfrEntry *gfrE, int *start1, int *end1, int *start2, int *end2 ) { GfrInterRead *gfrIR; int i; *start1 = arrp( gfrE->interReads, 0, GfrInterRead )->readStart1; *end1 = arrp( gfrE->interReads, 0, GfrInterRead )->readEnd1; *start2 = arrp( gfrE->interReads, 0, GfrInterRead )->readStart2; *end2 = arrp( gfrE->interReads, 0, GfrInterRead )->readEnd2; for( i = 1; i< arrayMax( gfrE->interReads); i++ ) { gfrIR = arrp( gfrE->interReads, i, GfrInterRead ); if( gfrIR->readStart1 < *start1 ) *start1 = gfrIR->readStart1; if( gfrIR->readStart2 < *start2 ) *start2 = gfrIR->readStart2; if( gfrIR->readEnd1 > *end1 ) *end1 = gfrIR->readEnd1; if( gfrIR->readEnd2 > *end2 ) *end2 = gfrIR->readEnd2; } }
void checkPseudogeneOverlap( BlatQuery* blQ ) { PslEntry* blE; int i; Array intervals=arrayCreate(2, Interval); for( i=0; i<arrayMax(blQ->entries); i++) { blE = arrp( blQ->entries, i, PslEntry ); intervals = intervalFind_getOverlappingIntervals ( blE->tName, blE->tStart, blE->tEnd); if( arrayMax(intervals)>0) arrayRemoveD( blQ->entries, i ); } }
/* * Sets the annotation type, passed in as a string but held in a 4-byte int. * This also attempts to set the cached copy of the type held within the * bin range array. * * Returns 0 on success * -1 on failure */ int anno_ele_set_type(GapIO *io, anno_ele_t **e, char *str) { int type; char stype[5]; anno_ele_t *ae; if (!(ae = cache_rw(io, *e))) return -1; /* Get integer type */ memset(stype, 0, 5); strncpy(stype, str, 4); type = str2type(stype); /* Update annotation */ ae->tag_type = type; /* Also update range_t cached copy of type */ if (ae->bin) { bin_index_t *bin = (bin_index_t *)cache_search(io, GT_Bin, ae->bin); range_t *r = NULL; int i, nranges; if (!bin) return -1; if (!(bin = cache_rw(io, bin))) return -1; /* * Find the index into the bin range. * FIXME: we should add a bin_index element, as seen in seq_t, * to avoid the brute force loop. This doesn't have to be * permanently stored - a cached copy would suffice. */ nranges = bin->rng ? ArrayMax(bin->rng) : 0; for (i = 0; i < nranges; i++) { r = arrp(range_t, bin->rng, i); if (r->flags & GRANGE_FLAG_UNUSED) continue; if (r->rec == ae->rec) break; } if (i == nranges) return -1; bin->flags |= BIN_RANGE_UPDATED; r->mqual = type; } *e = ae; return 0; }
void performSegmentation (Array tars, Array wigs, char* targetName, double threshold, int maxGap, int minRun) { Tar *currTar; Wig *currWig,*nextWig; int i,j,endPosition; int countBelowThreshold; i = 0; while (i < arrayMax (wigs)) { currWig = arrp (wigs,i,Wig); if (currWig->value < threshold) { i++; continue; } j = i + 1; endPosition = j; countBelowThreshold = 0; while (j < arrayMax (wigs)) { nextWig = arrp (wigs,j,Wig); if (nextWig->value < threshold) { countBelowThreshold++; if (countBelowThreshold >= maxGap) { break; } } else { countBelowThreshold = 0; endPosition = j; } j++; } if ((endPosition - 1 - currWig->position + 1) >= minRun) { currTar = arrayp (tars,arrayMax (tars),Tar); currTar->start = currWig->position; currTar->end = endPosition + 1; currTar->targetName = hlr_strdup (targetName); } i = j; } }
/* * Removes a contig from a scaffold. * * Returns 0 on success * -1 on failure */ int scaffold_remove(GapIO *io, tg_rec scaffold, tg_rec contig) { scaffold_t *f; scaffold_member_t *m, *m2; contig_t *c; int i; c = cache_search(io, GT_Contig, contig); f = cache_search(io, GT_Scaffold, scaffold); if (!c || !f) return -1; if (c->scaffold != scaffold) { verror(ERR_WARN, "scaffold_remove", "Attempted to remove contig #%" PRIrec" from a scaffold #%"PRIrec" it is not a member of", contig, scaffold); return -1; } c = cache_rw(io, c); c->scaffold = 0; f = cache_rw(io, f); for (i = 0; i < ArrayMax(f->contig); i++) { m = arrp(scaffold_member_t, f->contig, i); if (m->rec == contig) { /* Shuffle array down */ for (i++; i < ArrayMax(f->contig); i++) { m2 = arrp(scaffold_member_t, f->contig, i); *m = *m2; m = m2; } ArrayMax(f->contig)--; } } return 0; }
static char* lookUpTreeFam (Array kgTreeFams, char *transcript) { KgTreeFam testKGTF; int index; int foundIt; foundIt = 0; testKGTF.transcriptName = hlr_strdup (transcript); foundIt = arrayFind (kgTreeFams,&testKGTF,&index,(ARRAYORDERF)sortKgTreeFamsByTranscriptName); hlr_free (testKGTF.transcriptName); if (foundIt) { return arrp (kgTreeFams,index,KgTreeFam)->treeFamId; } return NULL; }
static int isContained (MrfRead *currRead, char *targetName, int targetStart, int targetEnd) { MrfBlock* currBlock; int i; for (i = 0; i < arrayMax (currRead->blocks); i++) { currBlock = arrp (currRead->blocks,i,MrfBlock); if (strEqual (currBlock->targetName,targetName)) { if (rangeIntersection (currBlock->targetStart,currBlock->targetEnd,targetStart,targetEnd) > 0 ) { return 1; } } } return 0; }
int processBlatQuery( BlatQuery* blQ, int *idxOrig , float cutoff) { int i,j; PslEntry *curr; int sizes[ arrayMax( blQ->entries ) ]; *idxOrig = -1; for( i=0; i<arrayMax(blQ->entries); i++ ) { curr = arrp( blQ->entries, i, PslEntry ); sizes[i]=0; for( j=0; j < arrayMax( curr->blockSizes ); j++) sizes[i] += arru( curr->blockSizes, j, int); sizes[i] -= curr->misMatches; if( checkOriginal ( blQ, curr ) == 1 ) *idxOrig = i; } if( *idxOrig < 0 ) die("Cannot find exact match: %s", blQ->qName); int sizeOrig = sizes[ *idxOrig ]; for( i=0; i< arrayMax( blQ->entries ); i++ ) { curr = arrp( blQ->entries, i, PslEntry ); warn( "%s\t%s\t%d\t%d\t[ %d, %d - %f]\t%d\t--\t%d\t%d\t%d", blQ->qName, curr->tName, curr->tStart, curr->tEnd, sizes[i], sizeOrig, ( (float)sizes[i] / (float)sizeOrig ) ,curr->blockCount, curr->misMatches, curr->qNumInsert, curr->tNumInsert); if( ( (float)sizes[i] / (float)sizeOrig ) > cutoff && (i != *idxOrig) ) return 1; } return 0; }
static void blastParser_freeQuery (BlastQuery *currBlastQuery) { int i; BlastEntry *currBlastEntry; if (currBlastQuery == NULL) { return; } hlr_free (currBlastQuery->qName); for (i = 0; i < arrayMax (currBlastQuery->entries); i++) { currBlastEntry = arrp (currBlastQuery->entries,i,BlastEntry); hlr_free (currBlastEntry->tName); } arrayDestroy (currBlastQuery->entries); freeMem (currBlastQuery); }
static char* getBreakPointSequence (char *tileCoordinate1, char *tileCoordinate2) { Stringa buffer; Stringa targetsFile; FILE *fp; Array targetSeqs; int i; Seq *currSeq; static Stringa sequence = NULL; buffer = stringCreate (100); targetsFile = stringCreate (100); stringPrintf (targetsFile,"targets_%d.txt",getpid ()); if (!(fp = fopen (string (targetsFile),"w")) ){ die ("Unable to open target file: %s",string (targetsFile)); } fprintf (fp,"%s\n%s",tileCoordinate1,tileCoordinate2); fclose (fp); stringPrintf (buffer,"%s %s/%s stdout -noMask -seqList=%s", confp_get(Conf, "BLAT_TWO_BIT_TO_FA"), confp_get(Conf, "BLAT_DATA_DIR"), confp_get(Conf, "BLAT_TWO_BIT_DATA_FILENAME"), string (targetsFile)); fasta_initFromPipe (string (buffer)); targetSeqs = fasta_readAllSequences (0); fasta_deInit (); if (arrayMax (targetSeqs) != 2) { die ("Expected only two target sequences"); } stringCreateClear (sequence,100); for (i = 0; i < arrayMax (targetSeqs); i++) { currSeq = arrp (targetSeqs,i,Seq); stringAppendf (sequence,"%s",currSeq->sequence); hlr_free (currSeq->name); hlr_free (currSeq->sequence); } arrayDestroy (targetSeqs); stringPrintf (buffer,"rm -rf %s",string (targetsFile)); hlr_system (string (buffer),0); stringDestroy (targetsFile); stringDestroy (buffer); return string (sequence); }
static void createGffEntry( Array gffEntries, MrfRead *currRead, int groupNumber ) { int i; MrfBlock *currBlock; GffEntry *currGffEntry; static Stringa buffer = NULL; stringCreateClear (buffer,100); for (i = 0; i < arrayMax (currRead->blocks); i++) { currBlock = arrp (currRead->blocks,i,MrfBlock); currGffEntry = arrayp (gffEntries,arrayMax (gffEntries),GffEntry); stringPrintf (buffer,"%s\tMRF\texon\t%d\t%d\t.\t.\t.\tTG%d", currBlock->targetName, currBlock->targetStart, currBlock->targetEnd, //currBlock->strand, groupNumber); currGffEntry->targetName = hlr_strdup (currBlock->targetName); currGffEntry->line = hlr_strdup (string (buffer)); } }
int main (int argc, char *argv[]) { GfrEntry *currGE; GfrInterRead *currGIR; int i; Stringa buffer; FILE *fp1,*fp2; int count; count = 0; buffer = stringCreate (100); gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { stringPrintf (buffer,"%s_1.bed",currGE->id); fp1 = fopen (string (buffer),"w"); stringPrintf (buffer,"%s_2.bed",currGE->id); fp2 = fopen (string (buffer),"w"); if (fp1 == NULL || fp2 == NULL) { die ("Unable to open BED files"); } fprintf (fp1,"browser full knownGene\n"); fprintf (fp1,"track name=\"Inter paird-ends: %s_1\" visibility=2\n",currGE->id); fprintf (fp2,"browser full knownGene\n"); fprintf (fp2,"track name=\"Inter paird-ends: %s_2\" visibility=2\n",currGE->id); for (i = 0; i < arrayMax (currGE->interReads); i++) { currGIR = arrp (currGE->interReads,i,GfrInterRead); fprintf (fp1,"%s\t%d\t%d\n",currGE->chromosomeTranscript1,currGIR->readStart1,currGIR->readEnd1); fprintf (fp2,"%s\t%d\t%d\n",currGE->chromosomeTranscript2,currGIR->readStart2,currGIR->readEnd2); } fclose (fp1); fclose (fp2); puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); stringDestroy (buffer); warn ("%s_numGfrEntries: %d",argv[0],count); return 0; }
/* * Removes an anno_ele from the gap database. * FIXME: need to deallocate storage too. (See docs/TODO) * * Returns 0 on success * -1 on failure */ int anno_ele_destroy(GapIO *io, anno_ele_t *e) { bin_index_t *bin; range_t *r; int i; /* Find the bin range pointing to this object */ bin = (bin_index_t *)cache_search(io, GT_Bin, e->bin); if (!bin || !bin->rng || ArrayMax(bin->rng) == 0) return -1; if (!(bin = cache_rw(io, bin))) return -1; for (i = 0; i < ArrayMax(bin->rng); i++) { r = arrp(range_t, bin->rng, i); if (r->flags & GRANGE_FLAG_UNUSED) continue; if (r->rec == e->rec) break; } if (i == ArrayMax(bin->rng)) return -1; /* Mark this bin range as unused */ r->rec = bin->rng_free; r->flags |= GRANGE_FLAG_UNUSED; bin->rng_free = i; bin->flags |= BIN_RANGE_UPDATED | BIN_BIN_UPDATED; bin_incr_nanno(io, bin, -1); if (bin->start_used == r->start || bin->end_used == r->end) bin_set_used_range(io, bin); return 0; }