int main() { int numbers[1000]; int i; for(i=0;i<1000;i++) { int number; scanf("%d",&number); if(number == 42) { break; } else { numbers[i] = number; } } int arrayLenght = i; arraySort(numbers,arrayLenght); for(i=0;i<arrayLenght;i++) { printf("%d\n",numbers[i]); } return 0; }
int main (int argc, char *argv[]) { Array breakPoints; BreakPoint *currBP; int i; char *breakPointSequence; if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; bp_init ("-"); breakPoints = bp_getBreakPoints (); arraySort (breakPoints,(ARRAYORDERF)sortBreakPointsByTargetAndOffset); for (i = 0; i < arrayMax (breakPoints); i++) { currBP = arrp (breakPoints,i,BreakPoint); breakPointSequence = getBreakPointSequence (currBP->tileCoordinate1,currBP->tileCoordinate2); printf( ">%s|%s\n%s\n", currBP->tileCoordinate1, currBP->tileCoordinate2, breakPointSequence); warn(">%s|%s\n%s", currBP->tileCoordinate1, currBP->tileCoordinate2, subString(breakPointSequence, 10, strlen(breakPointSequence)-10)); } bp_deInit(); confp_close(Conf); return EXIT_SUCCESS; }
static void obtainPairCounts (GfrEntry *currGE) { GfrPairCount *currPC; GfrInterRead *currGIR,*nextGIR; int i,j; currGE->pairCounts = arrayCreate (100,GfrPairCount); arraySort (currGE->interReads,(ARRAYORDERF)sortGfrInterReads); i = 0; while (i < arrayMax (currGE->interReads)) { currGIR = arrp (currGE->interReads,i,GfrInterRead); currPC = arrayp (currGE->pairCounts,arrayMax (currGE->pairCounts),GfrPairCount); currPC->number1 = currGIR->number1; currPC->number2 = currGIR->number2; currPC->pairType = currGIR->pairType; currPC->count = 1; j = i + 1; while (j < arrayMax (currGE->interReads)) { nextGIR = arrp (currGE->interReads,j,GfrInterRead); if (currGIR->pairType == nextGIR->pairType && currGIR->number1==nextGIR->number1 && currGIR->number2==nextGIR->number2) { currPC->count++; } else { break; } j++; } i = j; } }
int main (int argc, char *argv[]) { GfrEntry *currGE; int count; int countRemoved; int i; if (argc != 3) { usage ("%s <offsetCutoff> <minNumUniqueReads>",argv[0]); } count = 0; countRemoved = 0; int offsetCutOff = atoi (argv[1]); int minNumUniqueReads = atoi (argv[2]); gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { Array starts = arrayCreate( 100, int); for (i = 0; i < arrayMax( currGE->interReads ); i++) { int currStart = arrp(currGE->interReads, i, GfrInterRead)->readStart1 + arrp(currGE->interReads, i, GfrInterRead)->readStart2; array(starts, arrayMax(starts), int) = currStart; } arraySort( starts, (ARRAYORDERF) arrayIntcmp ); arrayUniq( starts, NULL, (ARRAYORDERF) arrayIntcmp ) ; int numUniqeOffsets = arrayMax( starts ); arrayDestroy( starts ); if (arrayMax( currGE->readsTranscript1 ) != arrayMax( currGE->readsTranscript2 ) ) die( "The two ends have a different number of reads"); Texta reads = textCreate(arrayMax(currGE->readsTranscript1)); for (i = 0; i < arrayMax(currGE->readsTranscript1); i++) { Stringa strA = stringCreate( strlen(textItem( currGE->readsTranscript1, i) ) * 2 + 1); stringAppendf( strA, textItem( currGE->readsTranscript1,i)); stringAppendf( strA, textItem( currGE->readsTranscript2,i)); textAdd( reads, string(strA)); stringDestroy( strA ); } textUniqKeepOrder( reads ); int numRemaining = arrayMax( reads ); textDestroy ( reads ); if (numRemaining <= minNumUniqueReads || numUniqeOffsets <= offsetCutOff) { countRemoved++; continue; } puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); warn("%s_PCRFilter: offset=%d minNumUniqueReads=%d", argv[0],offsetCutOff, minNumUniqueReads); warn("%s_numRemoved: %d",argv[0],countRemoved); warn("%s_numGfrEntries: %d",argv[0],count); return 0; }
int main (int argc, char *argv[]) { int i,j,groupNumber; MrfEntry *currEntry; GffEntry *currGffEntry,*nextGffEntry; Array gffEntries; FILE *fp; Stringa buffer; short int paired; if (argc != 2) { usage ("%s <prefix>",argv[0]); } buffer = stringCreate (1000); groupNumber = 0; mrf_init ("-"); gffEntries = arrayCreate (100000,GffEntry); while (currEntry = mrf_nextEntry ()) { processRead (gffEntries, currEntry, &groupNumber); } mrf_deInit (); arraySort (gffEntries,(ARRAYORDERF)sortGffEntriesByTargetNameAndGroupNumber); i = 0; while (i < arrayMax (gffEntries)) { currGffEntry = arrp (gffEntries,i,GffEntry); stringPrintf (buffer,"%s_%s.gff",argv[1],currGffEntry->targetName); fp = fopen (string (buffer),"w"); if (fp == NULL) { die ("Unable to open file: %s",string (buffer)); } fprintf (fp,"browser hide all\n"); fprintf (fp,"track name=\"%s_%s\" visibility=2\n",argv[1],currGffEntry->targetName); fprintf (fp,"%s\n",currGffEntry->line); j = i + 1; while (j < arrayMax (gffEntries)) { nextGffEntry = arrp (gffEntries,j,GffEntry); if (!strEqual (currGffEntry->targetName,nextGffEntry->targetName)) { break; } fprintf (fp,"%s\n",nextGffEntry->line); j++; } i = j; fclose (fp); } stringDestroy (buffer); return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; Array kgTreeFams; Stringa buffer; int count; int countRemoved; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; buffer = stringCreate (100); stringPrintf (buffer,"%s/%s", confp_get(conf, "ANNOTATION_DIR"), confp_get(conf, "KNOWN_GENE_TREE_FAM_FILENAME")); kgTreeFams = util_readKnownGeneTreeFams (string (buffer)); arraySort (kgTreeFams,(ARRAYORDERF)sortKgTreeFamsByTranscriptName); stringDestroy (buffer); count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()){ if (isHomologous (kgTreeFams,currGE->nameTranscript1,currGE->nameTranscript2)) { countRemoved++; continue; } puts (gfr_writeGfrEntry (currGE)); count++; } gfr_deInit (); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
/** * \file bgrQuantifier <annotation.interval>. * \pre: it requires a BedGraph file from STDIN normalized by the number of mapped nucleotides */ int main( int argc, char* argv[] ) { Array bgrs; Array intervals; Array entries; int i, j, length; double value; if( argc < 2 ) { usage("%s <annotation.interval>\n%s requires a BedGraph from STDIN", argv[0], argv[0]); } bgrs = arrayCreate( 1000, BedGraph ); bgrParser_initFromFile ( "-" ); bgrs = bgrParser_getAllEntries (); bgrParser_deInit(); arraySort( bgrs, (ARRAYORDERF) bgrParser_sort ); intervalFind_addIntervalsToSearchSpace ( argv[1], 0 ); intervals = intervalFind_getAllIntervals (); for( i=0; i<arrayMax(intervals); i++ ) { Interval *currInterval = arrp( intervals, i, Interval ); length = currInterval->end - currInterval->start; entries = bgrParser_getValuesForRegion( bgrs, currInterval->chromosome, currInterval->start, currInterval->end); value = 0.0; for( j=0; j<arrayMax( entries ); j++) value += arru( entries, j, double ); printf("%s\t%s:%d-%d\t%f\n", currInterval->name, currInterval->chromosome, currInterval->start+1, currInterval->end, value /= length / 1000.0 ); arrayDestroy( entries ); } arrayDestroy( intervals ); return 0; }
int main (int argc, char *argv[]) { Array kgXrefs; Stringa buffer; LineStream ls; int count=0; char* geneSymbolTranscript; char* descriptionTranscript; char* line; char* exonID = NULL; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) return EXIT_FAILURE; buffer = stringCreate (100); stringPrintf (buffer,"%s/%s", confp_get(conf, "ANNOTATION_DIR"), confp_get(conf, "KNOWN_GENE_XREF_FILENAME")); kgXrefs = util_readKnownGeneXrefs (string (buffer)); arraySort (kgXrefs,(ARRAYORDERF)sortKgXrefsByTranscriptName); stringDestroy (buffer); // gfr_init ("-"); ls = ls_createFromFile("-"); while (line = ls_nextLine(ls)) { char *lineP = hlr_strdup(line); WordIter w = wordIterCreate( line, "\t", 0); char *nameTranscript = wordNext( w ); char *p = rindex(nameTranscript, '_'); if (p) { exonID = hlr_strdup( p+1 ); *p='\0'; } transcript2geneSymbolAndGeneDescription(kgXrefs, nameTranscript, &geneSymbolTranscript, &descriptionTranscript); if (exonID) { printf("%s_%s\t%s\t%s\t%s", nameTranscript, exonID, geneSymbolTranscript, exonID, descriptionTranscript); hlr_free(exonID); } else { printf("%s\t%s\t1\t%s", nameTranscript, geneSymbolTranscript, descriptionTranscript); } printf("%s\n", lineP+strlen(nameTranscript)); count++; hlr_free(lineP); wordIterDestroy(w); } ls_destroy (ls); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { Array intervals; Interval *currInterval; SubInterval *currSubInterval; int refLength,altLength,offset; int h,i,j; Stringa buffer; Array geneTranscriptEntries; Texta geneTranscriptIds; Array alterations; Alteration *currAlteration,*nextAlteration; int numTranscripts; Stringa transcripts; VcfEntry *currVcfEntry; int position; Texta alternateAlleles; int flag1,flag2; VcfGenotype *currVcfGenotype; if (argc != 3) { usage ("%s <annotation.interval> <nameFeature>",argv[0]); } intervalFind_addIntervalsToSearchSpace (argv[1],0); geneTranscriptEntries = util_getGeneTranscriptEntries (intervalFind_getAllIntervals ()); buffer = stringCreate (100); transcripts = stringCreate (100); alterations = arrayCreate (100,Alteration); vcf_init ("-"); stringPrintf (buffer,"##INFO=<ID=VA,Number=.,Type=String,Description=\"Variant Annotation, %s, %s\">",argv[1],argv[2]); vcf_addComment (string (buffer)); puts (vcf_writeMetaData ()); puts (vcf_writeColumnHeaders ()); while (currVcfEntry = vcf_nextEntry ()) { if (vcf_isInvalidEntry (currVcfEntry)) { continue; } flag1 = 0; flag2 = 0; position = currVcfEntry->position - 1; // make zero-based alternateAlleles = vcf_getAlternateAlleles (currVcfEntry); for (h = 0; h < arrayMax (alternateAlleles); h++) { refLength = strlen (currVcfEntry->referenceAllele); altLength = strlen (textItem (alternateAlleles,h)); offset = MAX (refLength,altLength) - 1; util_clearAlterations (alterations); intervals = intervalFind_getOverlappingIntervals (currVcfEntry->chromosome,position,position + offset); for (i = 0; i < arrayMax (intervals); i++) { currInterval = arru (intervals,i,Interval*); j = 0; while (j < arrayMax (currInterval->subIntervals)) { currSubInterval = arrp (currInterval->subIntervals,j,SubInterval); if (currSubInterval->start <= position && (position + offset) < currSubInterval->end) { break; } j++; } if (j == arrayMax (currInterval->subIntervals)) { continue; } util_addAlteration (arrayp (alterations,arrayMax (alterations),Alteration),currInterval->name,argv[2],currInterval,position,0); } if (arrayMax (alterations) == 0) { continue; } arraySort (alterations,(ARRAYORDERF)util_sortAlterationsByGeneIdAndType); stringClear (buffer); i = 0; while (i < arrayMax (alterations)) { currAlteration = arrp (alterations,i,Alteration); stringAppendf (buffer,"%s%d:%s:%s:%c:%s",stringLen (buffer) == 0 ? "" : "|",h + 1,currAlteration->geneName,currAlteration->geneId,currAlteration->strand,currAlteration->type); stringClear (transcripts); stringAppendf (transcripts,"%s:%s:%d_%d",currAlteration->transcriptName,currAlteration->transcriptId,currAlteration->transcriptLength,currAlteration->relativePosition); numTranscripts = 1; j = i + 1; while (j < arrayMax (alterations)) { nextAlteration = arrp (alterations,j,Alteration); if (strEqual (currAlteration->geneId,nextAlteration->geneId) && strEqual (currAlteration->type,nextAlteration->type)) { stringAppendf (transcripts,":%s:%s:%d_%d",nextAlteration->transcriptName,nextAlteration->transcriptId,nextAlteration->transcriptLength,nextAlteration->relativePosition); numTranscripts++; } else { break; } j++; } i = j; geneTranscriptIds = util_getTranscriptIdsForGeneId (geneTranscriptEntries,currAlteration->geneId); stringAppendf (buffer,":%d/%d:%s",numTranscripts,arrayMax (geneTranscriptIds),string (transcripts)); } if (flag1 == 0) { printf ("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s;VA=", currVcfEntry->chromosome,currVcfEntry->position,currVcfEntry->id, currVcfEntry->referenceAllele,currVcfEntry->alternateAllele, currVcfEntry->quality,currVcfEntry->filter,currVcfEntry->info); flag1 = 1; } printf ("%s%s",flag2 == 1 ? "," : "",string (buffer)); flag2 = 1; } if (flag1 == 1) { for (i = 0; i < arrayMax (currVcfEntry->genotypes); i++) { currVcfGenotype = arrp (currVcfEntry->genotypes,i,VcfGenotype); if (i == 0) { printf ("\t%s\t",currVcfEntry->genotypeFormat); } printf ("%s%s%s%s",currVcfGenotype->genotype, currVcfGenotype->details[0] != '\0' ? ":" : "", currVcfGenotype->details[0] != '\0' ? currVcfGenotype->details : "", i < arrayMax (currVcfEntry->genotypes) - 1 ? "\t" : ""); } puts (""); } } vcf_deInit (); return 0; }
double WindscreenLocator::judgeVerticalBorder(int size, int* topList, int topNr, int intervalA, int intervalB, int& xa, int& xb) { // increase order arraySort(topList, topNr); int interval = intervalB - intervalA; int inner_reg = interval * 0.07; int outer_reg_1 = interval * 0.5 / 2; int outer_reg_2 = interval * 0.7 / 2; // outer_reg_1 int a_start = intervalA - outer_reg_1; int a_end = intervalA + inner_reg; a_start = std::max(0, a_start); int b_end = intervalB + outer_reg_1; int b_start = intervalB - inner_reg; b_end = std::min(b_end, size - 1); xa = -1; xb = -1; for(int i = 0; i < topNr; i++){ if(topList[i] >= a_start && topList[i] <= intervalA){ xa = topList[i]; } if(topList[topNr-i-1] >= intervalB && topList[topNr-i-1] <= b_end){ xb = topList[topNr-i-1]; } } if(xa == -1){ for(int i = topNr - 1; i >= 0; i--){ if(topList[i] >= intervalA && topList[i] <= a_end){ xa = topList[i]; } } } if(xb == -1){ for(int i = 0; i < topNr; i++){ if(topList[i] >= b_start && topList[i] <= intervalB){ xb = topList[i]; } } } a_end = a_start; a_start = intervalA - outer_reg_2; a_start = std::max(0, a_start); b_start = b_end; b_end = intervalB + outer_reg_2; b_end = std::min(b_end, size - 1); if(xa == -1){ for(int i = 0; i < topNr; i++){ if(topList[i] >= a_start && topList[i] <= a_end){ xa = topList[i]; } } } if(xb == -1){ for(int i = 0; i < topNr; i++){ if(topList[topNr-i-1] >= b_start && topList[topNr-i-1] <= b_end){ xb = topList[topNr-i-1]; } } } if(xa == -1) xa = intervalA; if(xb == -1) xb = intervalB; return 0.0; }
int main (int argc, char *argv[]) { Array intervals; Interval *currInterval; SubInterval *currSubInterval; int h,i,j; Array seqs; Seq *currSeq,testSeq; int index; Stringa buffer; Array geneTranscriptEntries; Texta geneTranscriptIds; Array alterations; Alteration *currAlteration,*nextAlteration; char *proteinSequenceBeforeIndel; char *proteinSequenceAfterIndel; int numDisabledTranscripts; Stringa disabledTranscripts; int seqLength,refLength,altLength; char *sequenceBeforeIndel = NULL; int overlapMode; int numOverlaps; int sizeIndel,indelOffset; int overlap; Array coordinates; VcfEntry *currVcfEntry; VcfGenotype *currVcfGenotype; int position; Texta alternateAlleles; int flag1,flag2; if (argc != 3) { usage ("%s <annotation.interval> <annotation.fa>",argv[0]); } intervalFind_addIntervalsToSearchSpace (argv[1],0); geneTranscriptEntries = util_getGeneTranscriptEntries (intervalFind_getAllIntervals ()); seq_init (); fasta_initFromFile (argv[2]); seqs = fasta_readAllSequences (0); fasta_deInit (); arraySort (seqs,(ARRAYORDERF)util_sortSequencesByName); buffer = stringCreate (100); disabledTranscripts = stringCreate (100); alterations = arrayCreate (100,Alteration); vcf_init ("-"); stringPrintf (buffer,"##INFO=<ID=VA,Number=.,Type=String,Description=\"Variant Annotation, %s\">",argv[1]); vcf_addComment (string (buffer)); puts (vcf_writeMetaData ()); puts (vcf_writeColumnHeaders ()); while (currVcfEntry = vcf_nextEntry ()) { if (vcf_isInvalidEntry (currVcfEntry)) { continue; } flag1 = 0; flag2 = 0; position = currVcfEntry->position - 1; // make zero-based alternateAlleles = vcf_getAlternateAlleles (currVcfEntry); for (h = 0; h < arrayMax (alternateAlleles); h++) { refLength = strlen (currVcfEntry->referenceAllele); altLength = strlen (textItem (alternateAlleles,h)); sizeIndel = abs (refLength - altLength); indelOffset = MAX (refLength,altLength) - 1; util_clearAlterations (alterations); intervals = intervalFind_getOverlappingIntervals (currVcfEntry->chromosome,position,position + indelOffset); for (i = 0; i < arrayMax (intervals); i++) { currInterval = arru (intervals,i,Interval*); overlapMode = OVERLAP_NONE; numOverlaps = 0; for (j = 0; j < arrayMax (currInterval->subIntervals); j++) { currSubInterval = arrp (currInterval->subIntervals,j,SubInterval); overlap = rangeIntersection (position,position + indelOffset,currSubInterval->start,currSubInterval->end); if (currSubInterval->start <= position && (position + indelOffset) < currSubInterval->end) { overlapMode = OVERLAP_FULLY_CONTAINED; numOverlaps++; } else if (j == 0 && overlap > 0 && position < currSubInterval->start) { overlapMode = OVERLAP_START; numOverlaps++; } else if (j == (arrayMax (currInterval->subIntervals) - 1) && overlap > 0 && (position + indelOffset) >= currSubInterval->end) { overlapMode = OVERLAP_END; numOverlaps++; } else if (overlap > 0 && overlap <= indelOffset) { overlapMode = OVERLAP_SPLICE; numOverlaps++; } } if (overlapMode == OVERLAP_NONE) { continue; } currAlteration = arrayp (alterations,arrayMax (alterations),Alteration); if (numOverlaps > 1) { util_addAlteration (currAlteration,currInterval->name,"multiExonHit",currInterval,position,0); continue; } else if (numOverlaps == 1 && overlapMode == OVERLAP_SPLICE) { util_addAlteration (currAlteration,currInterval->name,"spliceOverlap",currInterval,position,0); continue; } else if (numOverlaps == 1 && overlapMode == OVERLAP_START) { util_addAlteration (currAlteration,currInterval->name,"startOverlap",currInterval,position,0); continue; } else if (numOverlaps == 1 && overlapMode == OVERLAP_END) { util_addAlteration (currAlteration,currInterval->name,"endOverlap",currInterval,position,0); continue; } else if (numOverlaps == 1 && overlapMode == OVERLAP_FULLY_CONTAINED && altLength > refLength) { if ((sizeIndel % 3) == 0) { util_addAlteration (currAlteration,currInterval->name,"insertionNFS",currInterval,position,0); } else { util_addAlteration (currAlteration,currInterval->name,"insertionFS",currInterval,position,0); } } else if (numOverlaps == 1 && overlapMode == OVERLAP_FULLY_CONTAINED && altLength < refLength) { if ((sizeIndel % 3) == 0) { util_addAlteration (currAlteration,currInterval->name,"deletionNFS",currInterval,position,0); } else { util_addAlteration (currAlteration,currInterval->name,"deletionFS",currInterval,position,0); } } else if (numOverlaps == 1 && overlapMode == OVERLAP_FULLY_CONTAINED && altLength == refLength) { util_addAlteration (currAlteration,currInterval->name,"substitution",currInterval,position,0); } else { die ("Unexpected type: %d %s %s %s", currVcfEntry->position,currVcfEntry->chromosome, currVcfEntry->referenceAllele,currVcfEntry->alternateAllele); } if ((sizeIndel % 3) != 0 && altLength != refLength) { continue; } // Only run the remaining block of code if the indel is fully contained (insertion or deletion) AND does not cause a frameshift OR // if it is a substitution that is fully contained in the coding sequence stringPrintf (buffer,"%s|%s|%c|",currInterval->name,currInterval->chromosome,currInterval->strand); for (j = 0; j < arrayMax (currInterval->subIntervals); j++) { currSubInterval = arrp (currInterval->subIntervals,j,SubInterval); stringAppendf (buffer,"%d|%d%s",currSubInterval->start,currSubInterval->end,j < arrayMax (currInterval->subIntervals) - 1 ? "|" : ""); } testSeq.name = hlr_strdup (string (buffer)); if (!arrayFind (seqs,&testSeq,&index,(ARRAYORDERF)util_sortSequencesByName)) { die ("Expected to find %s in seqs",string (buffer)); } hlr_free (testSeq.name); currSeq = arrp (seqs,index,Seq); strReplace (&sequenceBeforeIndel,currSeq->sequence); seqLength = strlen (sequenceBeforeIndel); coordinates = util_getCoordinates (currInterval); // arraySort (coordinates,(ARRAYORDERF)util_sortCoordinatesByChromosomeAndTranscriptPosition); Array is already sorted by definition j = 0; stringClear (buffer); while (j < seqLength) { if (util_getGenomicCoordinate (coordinates,j,currVcfEntry->chromosome) == position) { if (altLength > refLength) { stringCat (buffer,textItem (alternateAlleles,h)); j++; continue; } else if (altLength < refLength) { stringCatChar (buffer,sequenceBeforeIndel[j]); j = j + refLength - altLength + 1; continue; } else { stringCat (buffer,textItem (alternateAlleles,h)); j = j + altLength; continue; } } stringCatChar (buffer,sequenceBeforeIndel[j]); j++; } util_destroyCoordinates (coordinates); proteinSequenceBeforeIndel = hlr_strdup (util_translate (currInterval,sequenceBeforeIndel)); proteinSequenceAfterIndel = hlr_strdup (util_translate (currInterval,string (buffer))); addSubstitution (currAlteration,proteinSequenceBeforeIndel,proteinSequenceAfterIndel,indelOffset); hlr_free (proteinSequenceBeforeIndel); hlr_free (proteinSequenceAfterIndel); } if (arrayMax (alterations) == 0) { continue; } arraySort (alterations,(ARRAYORDERF)util_sortAlterationsByGeneIdAndType); stringClear (buffer); i = 0; while (i < arrayMax (alterations)) { currAlteration = arrp (alterations,i,Alteration); stringAppendf (buffer,"%s%d:%s:%s:%c:%s",stringLen (buffer) == 0 ? "" : ",",h + 1,currAlteration->geneName,currAlteration->geneId,currAlteration->strand,currAlteration->type); stringClear (disabledTranscripts); if (currAlteration->substitution[0] != '\0') { stringAppendf (disabledTranscripts,"%s:%s:%d_%d_%s",currAlteration->transcriptName,currAlteration->transcriptId,currAlteration->transcriptLength,currAlteration->relativePosition,currAlteration->substitution); } else if (strEqual (currAlteration->type,"multiExonHit") || strEqual (currAlteration->type,"spliceOverlap") || strEqual (currAlteration->type,"startOverlap") || strEqual (currAlteration->type,"endOverlap")) { stringAppendf (disabledTranscripts,"%s:%s:%d",currAlteration->transcriptName,currAlteration->transcriptId,currAlteration->transcriptLength); } else { stringAppendf (disabledTranscripts,"%s:%s:%d_%d",currAlteration->transcriptName,currAlteration->transcriptId,currAlteration->transcriptLength,currAlteration->relativePosition); } numDisabledTranscripts = 1; j = i + 1; while (j < arrayMax (alterations)) { nextAlteration = arrp (alterations,j,Alteration); if (strEqual (currAlteration->geneId,nextAlteration->geneId) && strEqual (currAlteration->type,nextAlteration->type)) { if (nextAlteration->substitution[0] != '\0') { stringAppendf (disabledTranscripts,":%s:%s:%d_%d_%s",nextAlteration->transcriptName,nextAlteration->transcriptId,nextAlteration->transcriptLength,nextAlteration->relativePosition,nextAlteration->substitution); } else if (strEqual (nextAlteration->type,"multiExonHit") || strEqual (nextAlteration->type,"spliceOverlap") || strEqual (nextAlteration->type,"startOverlap") || strEqual (nextAlteration->type,"endOverlap")) { stringAppendf (disabledTranscripts,":%s:%s:%d",nextAlteration->transcriptName,nextAlteration->transcriptId,nextAlteration->transcriptLength); } else { stringAppendf (disabledTranscripts,":%s:%s:%d_%d",nextAlteration->transcriptName,nextAlteration->transcriptId,nextAlteration->transcriptLength,nextAlteration->relativePosition); } numDisabledTranscripts++; } else { break; } j++; } i = j; geneTranscriptIds = util_getTranscriptIdsForGeneId (geneTranscriptEntries,currAlteration->geneId); stringAppendf (buffer,":%d/%d:%s",numDisabledTranscripts,arrayMax (geneTranscriptIds),string (disabledTranscripts)); } if (flag1 == 0) { printf ("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s;VA=", currVcfEntry->chromosome,currVcfEntry->position,currVcfEntry->id, currVcfEntry->referenceAllele,currVcfEntry->alternateAllele, currVcfEntry->quality,currVcfEntry->filter,currVcfEntry->info); flag1 = 1; } printf ("%s%s",flag2 == 1 ? "," : "",string (buffer)); flag2 = 1; } if (flag1 == 1) { for (i = 0; i < arrayMax (currVcfEntry->genotypes); i++) { currVcfGenotype = arrp (currVcfEntry->genotypes,i,VcfGenotype); if (i == 0) { printf ("\t%s\t",currVcfEntry->genotypeFormat); } printf ("%s%s%s%s",currVcfGenotype->genotype, currVcfGenotype->details[0] != '\0' ? ":" : "", currVcfGenotype->details[0] != '\0' ? currVcfGenotype->details : "", i < arrayMax (currVcfEntry->genotypes) - 1 ? "\t" : ""); } puts (""); } } vcf_deInit (); return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; BLEntry *currBLE; BLEntry currQuery; FILE *fp; char *line; int count; int countRemoved; int index; WordIter w; Array blackList = arrayCreate(20, BLEntry); if (argc != 2) { usage ("%s <blackList.txt>",argv[0]); } fp = fopen( argv[1], "r" ); if( !fp ) die("Unable to open file: %s", argv[1]); // reading blacklist file LineStream ls = ls_createFromFile( argv[1] ); while( line = ls_nextLine(ls) ) { w = wordIterCreate( line, "\t", 1); currBLE = arrayp( blackList, arrayMax(blackList), BLEntry); currBLE->gene1 = hlr_strdup ( wordNext(w) ); currBLE->gene2 = hlr_strdup ( wordNext(w) ); wordIterDestroy(w); } fclose(fp); arraySort( blackList, (ARRAYORDERF) sortBlackListByName1); // beginFiltering count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { // reading the gfr // creating a new query to the black list currQuery.gene1 = currGE->geneSymbolTranscript1; currQuery.gene2 = currGE->geneSymbolTranscript2; // searching against read_1/read_2 int res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1); if( !res ) { // not found, then searching against read_2/read_1 currQuery.gene1 = currGE->geneSymbolTranscript2; currQuery.gene2 = currGE->geneSymbolTranscript1; res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1 ); if( !res ) { // not found, write the instance to stdout, update the counts puts (gfr_writeGfrEntry (currGE)); count++; } else { // found: read2/read1 countRemoved++; } } else { //found: read1/read2 countRemoved++; } } gfr_deInit (); arrayDestroy( blackList ); warn ("%s_BlackListFilter: %s",argv[0], argv[1]); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); return 0; }
int main (int argc, char *argv[]) { GfrEntry *currGE; BLEntry *currBLE; BLEntry currQuery; FILE *fp; char *line; int count; int countRemoved; int index; WordIter w; Array blackList = arrayCreate(20, BLEntry); config *Conf; if ((Conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH")); return EXIT_FAILURE; } if( confp_get( Conf, "ANNOTATION_DIR")==NULL ) { die("%s:\tCannot find ANNOTATION_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( Conf, "BLACKLIST_FILENAME")==NULL ) { die("%s:\tCannot find BLACKLIST_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } Stringa buffer=stringCreate( 100 ); stringPrintf( buffer, "%s/%s", confp_get( Conf, "ANNOTATION_DIR"), confp_get( Conf, "BLACKLIST_FILENAME") ); /* fp = fopen( string( buffer ), "r" ); if( !fp ) die("Unable to open file: %s", string(buffer)); stringDestroy( buffer ); */ // reading blacklist file LineStream ls = ls_createFromFile( string(buffer) ); while( line = ls_nextLine(ls) ) { w = wordIterCreate( line, "\t", 1); currBLE = arrayp( blackList, arrayMax(blackList), BLEntry); currBLE->gene1 = hlr_strdup ( wordNext(w) ); currBLE->gene2 = hlr_strdup ( wordNext(w) ); wordIterDestroy(w); } //fclose(fp); ls_destroy( ls ); stringDestroy( buffer ); arraySort( blackList, (ARRAYORDERF) sortBlackListByName1); // beginFiltering count = 0; countRemoved = 0; gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { // reading the gfr if( currGE->geneSymbolTranscript1 == NULL ) { die("Gene symbols are not present in the GFR file. Please run gfrAddInfo before gfrBlackListFilter."); return EXIT_FAILURE; } // creating a new query to the black list currQuery.gene1 = currGE->geneSymbolTranscript1; currQuery.gene2 = currGE->geneSymbolTranscript2; if( strEqual( currQuery.gene1 , currQuery.gene2 ) ) { countRemoved++; continue; } // searching against read_1/read_2 int res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1); if( !res ) { // not found, then searching against read_2/read_1 currQuery.gene1 = currGE->geneSymbolTranscript2; currQuery.gene2 = currGE->geneSymbolTranscript1; res = arrayFind( blackList, &currQuery, &index, (ARRAYORDERF) sortBlackListByName1 ); if( !res ) { // not found, write the instance to stdout, update the counts puts (gfr_writeGfrEntry (currGE)); count++; } else { // found: read2/read1 countRemoved++; } } else { //found: read1/read2 countRemoved++; } } gfr_deInit (); arrayDestroy( blackList ); warn ("%s_BlackListFilter: %s",argv[0], confp_get( Conf, "BLACKLIST_FILENAME")); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close( Conf); return 0; }
int main(int argc, char *argv[]) { Array breakPoints; BreakPoint *currBP; BreakPointRead *currBPR; int minNumReads, minNumUniqueOffsets, minNumReadsForKS,numPossibleOffsets; double pValueCutoffForKS; Array offsets; Array randomNumbers; double *observedOffsets; double *randomOffsets; if (argc != 6) { usage((char*) "%s <minNumReads> <minNumUniqueOffsets> " "<minNumReadsForKS> <pValueCutoffForKS> <numPossibleOffsets>", argv[0]); } minNumReads = std::atoi(argv[1]); minNumUniqueOffsets = std::atoi(argv[2]); minNumReadsForKS = std::atoi(argv[3]); pValueCutoffForKS = std::atof(argv[4]); numPossibleOffsets = std::atoi(argv[5]); bp_init("-"); offsets = arrayCreate(100, int); randomNumbers = arrayCreate(100, int); breakPoints = bp_getBreakPoints(); for (int i = 0; i < arrayMax(breakPoints); i++) { currBP = arrp(breakPoints, i, BreakPoint); arrayClear(offsets); for (int j = 0; j < arrayMax(currBP->breakPointReads); j++) { currBPR = arrp(currBP->breakPointReads, j, BreakPointRead); array(offsets, arrayMax(offsets), int) = currBPR->offset; } arraySort(offsets, (ARRAYORDERF) arrayIntcmp); arrayUniq(offsets, NULL, (ARRAYORDERF) arrayIntcmp); if (arrayMax(currBP->breakPointReads) >= minNumReads && arrayMax(currBP->breakPointReads) < minNumReadsForKS) { if (arrayMax(offsets) >= minNumUniqueOffsets) std::puts(bp_writeBreakPoint(currBP)); } else if (arrayMax(currBP->breakPointReads) >= minNumReads && arrayMax(currBP->breakPointReads) >= minNumReadsForKS) { arrayClear(randomNumbers); for (int j = 0; j < arrayMax(offsets); j++) array(randomNumbers, arrayMax(randomNumbers), int) = std::rand() % numPossibleOffsets; arraySort(randomNumbers, (ARRAYORDERF) arrayIntcmp); observedOffsets = (double *) hlr_malloc(arrayMax(offsets) * sizeof(double)); randomOffsets = (double *) hlr_malloc(arrayMax(offsets) * sizeof(double)); for (int j = 0; j < arrayMax(offsets); j++) { observedOffsets[j] = arru(offsets, j, int); randomOffsets[j] = arru(randomNumbers, j, int); } if (pValueCutoffForKS < TMath::KolmogorovTest(arrayMax(offsets), observedOffsets, arrayMax(offsets), randomOffsets, "")) std::puts(bp_writeBreakPoint(currBP)); hlr_free(observedOffsets); hlr_free(randomOffsets); } }