int main (int argc, char *argv[]) { GfrEntry *currGE; int i,j,k,l, h,index; Stringa buffer,cmd,fnSequencesToAlign; FILE *fp; FILE *fp1; FILE *fp2; FILE *freads1; FILE *freads2; Array gfrEntries; BowtieQuery *currBQ,testBQ; BowtieEntry *currBE; Texta seqNames; int readSize1, readSize2, minReadSize; Array bowtieQueries; char transcriptNumber; int isHomologous,homologousCount; int count; int countRemoved; unsigned short int tooMany; BlatQuery *blQ; config *conf; if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc", argv[0]); return EXIT_FAILURE; } if ( (confp_get( conf, "BLAT_TWO_BIT_TO_FA")) == NULL) { die("%s:\tCannot find BLAT_TWO_BIT_TO_FA in the configuration file: %s", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if ( (confp_get( conf,"BLAT_DATA_DIR")) == NULL) { die("%s:\tCannot find BLAT_DATA_DIR in the configuration file: %sc", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "TMP_DIR")==NULL ) { die("%s:\tCannot find TMP_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFCLIENT")==NULL ) { die("%s:\tCannot find BLAT_GFCLIENT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER_HOST")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_HOST in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; }if( confp_get( conf, "BLAT_GFSERVER_PORT")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_PORT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "PSEUDOGENE_DIR")==NULL ) { die("%s:\tCannot find PSEUDOGENE_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "PSEUDOGENE_FILENAME")==NULL ) { die("%s:\tCannot find PSEUDOGENE_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } cmd = stringCreate (100); // initializing the gfServers stringPrintf( cmd, "%s status %s %s &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT") ); int ret = hlr_system( string(cmd), 1 ); if( ret != 0 ) { // not initialized stringPrintf( cmd , "%s -repMatch=100000 -tileSize=12 -canStop -log=%s/gfServer_genome.log start %s %s %s/%s &", confp_get( conf, "BLAT_GFSERVER"), confp_get(conf, "TMP_DIR"),confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"), confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME")); hlr_system( string( cmd ), 0 ); long int startTime = time(0); stringPrintf( cmd , "%s status %s %s &2> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); while( hlr_system( string(cmd), 1) && (time(0)-startTime)<600 ) ; if( hlr_system( string(cmd), 1 ) != 0 ) { die("gfServer for %s/%s not initialized: %s %s %s", confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); return EXIT_FAILURE; } } // end initialization gfr_init ("-"); gfrEntries = gfr_parse (); if (arrayMax (gfrEntries) == 0){ puts (gfr_writeHeader ()); gfr_deInit (); return 0; } seqNames = textCreate (10000); buffer = stringCreate (100); fnSequencesToAlign = stringCreate (100); count = 0; countRemoved = 0; stringPrintf( buffer, "%s/%s", confp_get( conf, "PSEUDOGENE_DIR"), confp_get( conf, "PSEUDOGENE_FILENAME") ); intervalFind_addIntervalsToSearchSpace (string(buffer),0); puts (gfr_writeHeader ()); for (i = 0; i < arrayMax (gfrEntries); i++) { currGE = arrp (gfrEntries,i,GfrEntry); homologousCount = 0; minReadSize=10000; // creating two fasta files with the two genes stringPrintf( cmd, "%s %s/%s -seq=%s -start=%d -end=%d %s/%s_transcript1.fa", confp_get(conf, "BLAT_TWO_BIT_TO_FA") , confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), currGE->chromosomeTranscript1, currGE->startTranscript1, currGE->endTranscript1, confp_get(conf, "TMP_DIR"), currGE->id); hlr_system( string(cmd) , 0); stringPrintf( cmd, "%s %s/%s -seq=%s -start=%d -end=%d %s/%s_transcript2.fa", confp_get(conf, "BLAT_TWO_BIT_TO_FA"), confp_get(conf, "BLAT_DATA_DIR"), confp_get(conf, "BLAT_TWO_BIT_DATA_FILENAME"), currGE->chromosomeTranscript2, currGE->startTranscript2, currGE->endTranscript2, confp_get(conf, "TMP_DIR"), currGE->id); hlr_system( string(cmd) , 0); Stringa fa1 = stringCreate( 100 ); Stringa fa2 = stringCreate( 100 ); // creating the two fasta files with the reads stringPrintf( fa1, "%s/%s_reads1.fa", confp_get(conf, "TMP_DIR"), currGE->id); if (!(freads1 = fopen ( string(fa1) ,"w"))) { die ("Unable to open file: %s",string (fa1)); } // writing the reads of the first end into file for (l = 0; l < arrayMax (currGE->readsTranscript1); l++) { char* currRead1 = hlr_strdup( textItem (currGE->readsTranscript1,l)); // read1 readSize1 = strlen( currRead1 ); if( readSize1 == 0 ) die("Read size cannot be zero: read1[ %s ]", currRead1); if( readSize1 < minReadSize ) minReadSize = readSize1; fprintf( freads1, ">%d\n%s\n", l, currRead1 ); hlr_free( currRead1 ); } fclose( freads1 ); stringPrintf( fa2, "%s/%s_reads2.fa", confp_get(conf, "TMP_DIR"), currGE->id); if (!(freads2 = fopen ( string(fa2) ,"w"))) { die ("Unable to open file: %s",string (fa2)); } // writing the reads of the second end into file for (l = 0; l < arrayMax (currGE->readsTranscript2); l++) { char* currRead2 = hlr_strdup( textItem (currGE->readsTranscript2,l)); // read2 readSize2 = strlen( currRead2 ); if( readSize2 == 0 ) die("Read size cannot be zero: read2[ %s ]", currRead2); if( readSize2 < minReadSize ) minReadSize = readSize2; fprintf( freads2, ">%d\n%s\n", l, currRead2 ); hlr_free( currRead2 ); } fclose( freads2 ); // collapse the reads 2 ## requires the FASTX package stringPrintf( cmd, "%s -i %s/%s_reads2.fa -o %s/%s_reads2.collapsed.fa", confp_get(conf, "FASTX_COLLAPSER"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); hlr_system (string (cmd),0); //blat of reads2 against the first transcript stringPrintf( cmd, "%s -t=dna -out=psl -fine -tileSize=15 %s/%s_transcript1.fa %s/%s_reads2.collapsed.fa stdout",confp_get(conf, "BLAT_BLAT"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); // reading the results of blast from Pipe blatParser_initFromPipe( string(cmd) ); while( blQ = blatParser_nextQuery() ) { int nucleotideOverlap = getNucleotideOverlap ( blQ ); if ( nucleotideOverlap > ( ((double)readSize2)* atof(confp_get(conf,"MAX_OVERLAP_ALLOWED"))) ) { char* value = strchr(blQ->qName,'-'); homologousCount+=atoi(value+1); } } blatParser_deInit(); // collapse the reads 1 ## requires the FASTX package on the path stringPrintf( cmd, "%s -i %s/%s_reads1.fa -o %s/%s_reads1.collapsed.fa", confp_get(conf, "FASTX_COLLAPSER"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); hlr_system (string (cmd),0); //blat of reads1 against the second transcript stringPrintf( cmd, "%s -t=dna -out=psl -fine -tileSize=15 %s/%s_transcript2.fa %s/%s_reads1.collapsed.fa stdout",confp_get(conf, "BLAT_BLAT"), confp_get(conf, "TMP_DIR"), currGE->id, confp_get(conf, "TMP_DIR"), currGE->id ); blatParser_initFromPipe( string(cmd) ); while( blQ = blatParser_nextQuery() ) { int nucleotideOverlap = getNucleotideOverlap ( blQ ); if ( nucleotideOverlap > ( ((double)readSize1)* atof(confp_get(conf,"MAX_OVERLAP_ALLOWED"))) ) { char* value = strchr(blQ->qName,'-'); homologousCount+=atoi(value+1); } } blatParser_deInit(); stringPrintf (cmd,"cd %s;rm -rf %s_reads?.fa %s_reads?.collapsed.fa %s_transcript?.fa", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id); hlr_system( string(cmd) , 0); if (((double)homologousCount / (double)arrayMax(currGE->readsTranscript1)) <= atof(confp_get(conf, "MAX_FRACTION_HOMOLOGOUS")) ) { homologousCount = 0; // there is no homology between the two genes, but what about the rest of the genome writeFasta( currGE, &minReadSize, confp_get(conf, "TMP_DIR") ); stringPrintf(cmd, "cd %s; %s %s %s / -t=dna -q=dna -minScore=%d -out=psl %s_reads.fa %s.smallhomology.psl &>/dev/null", confp_get(conf, "TMP_DIR"), confp_get( conf, "BLAT_GFCLIENT"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT"), minReadSize - (int)(0.1 * minReadSize) > 20 ? minReadSize - (int) (0.1 * minReadSize) : 20 , currGE->id, currGE->id); int attempts=0; ret = hlr_system( string(cmd), 1 ); while( hlr_system( string(cmd), 1 ) && attempts<5000 ) attempts++; if( attempts == 5000 ) { die("Cannot map the reads %s", string( cmd )); return EXIT_FAILURE; } // reading the results of blast from File stringPrintf(cmd, "%s/%s.smallhomology.psl", confp_get( conf, "TMP_DIR"), currGE->id); blatParser_initFromFile( string(cmd) ); tooMany = 1; while( blQ = blatParser_nextQuery() ) { tooMany = 0; checkPseudogeneOverlap( blQ ); if( arrayMax( blQ->entries ) > 1 ) { homologousCount+= arrayMax( blQ->entries ) - 1; char* value = strchr( blQ->qName,'/' ); if( value ) *value = '\0'; else die("Not a valid index in the blat query name:\t%s", blQ->qName ); int indexOfInter = atoi( blQ->qName ); // the following three lines should removed the read if writing the GFR entry GfrInterRead *currGIR = arrp( currGE->interReads, indexOfInter, GfrInterRead ); currGIR->flag = 1; } } blatParser_deInit(); if ( tooMany == 1 || ( ( (double) homologousCount / (double) ( arrayMax(currGE->readsTranscript1) + arrayMax(currGE->readsTranscript2) ) ) > atof(confp_get(conf, "MAX_FRACTION_HOMOLOGOUS")) ) ) { countRemoved++; stringPrintf (cmd,"cd %s; rm -rf %s_reads*.fa %s_reads?.collapsed.fa %s_transcript?.fa %s.smallhomology.psl", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id,currGE->id); hlr_system( string(cmd), 1 ); continue; } // writing the gfrEntry, if everthing else didn't stop if( homologousCount > 0 ) updateStats( currGE ); puts (gfr_writeGfrEntry (currGE)); count++; // removing temporary files stringPrintf (cmd,"cd %s;rm -rf %s_reads*.fa %s_reads?.collapsed.fa %s_transcript?.fa %s.smallhomology.psl", confp_get(conf, "TMP_DIR"), currGE->id,currGE->id,currGE->id,currGE->id); hlr_system( string(cmd) , 1); } else { countRemoved++; } } gfr_deInit (); stringDestroy (fnSequencesToAlign); stringDestroy (cmd); stringDestroy (buffer); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { GfrEntry *currGE; int count; int countRemoved; int mitochondrialCount; unsigned int minReadSize; int i; Stringa cmd; BlatQuery *blQ=NULL; config *conf = NULL; /**< Pointer to configuration file .fusionseqrc */ if ((conf = confp_open(getenv("FUSIONSEQ_CONFPATH"))) == NULL) { die("%s:\tCannot find .fusionseqrc: %s", argv[0], getenv("FUSIONSEQ_CONFPATH")); return EXIT_FAILURE; } if( confp_get( conf,"MAX_OVERLAP_ALLOWED")==NULL ) { die("%s:\tCannot find MAX_OVERLAP_ALLOWED in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf,"MAX_FRACTION_HOMOLOGOUS")==NULL ) { die("%s:\tCannot find MAX_FRACTION_HOMOLOGOUS in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "MITOCHONDRIAL_DIR")==NULL ) { die("%s:\tCannot find MITOCHONDRIAL_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "MITOCHONDRIAL_FILENAME")==NULL ) { die("%s:\tCannot find MITOCHONDRIAL_FILENAME in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "TMP_DIR")==NULL ) { die("%s:\tCannot find TMP_DIR in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFCLIENT")==NULL ) { die("%s:\tCannot find BLAT_GFCLIENT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } if( confp_get( conf, "BLAT_GFSERVER_HOST")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_HOST in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; }if( confp_get( conf, "BLAT_GFSERVER_PORT")==NULL ) { die("%s:\tCannot find BLAT_GFSERVER_PORT in the configuration file: %s)", argv[0], getenv("FUSIONSEQ_CONFPATH") ); return EXIT_FAILURE; } count = 0; countRemoved = 0; cmd = stringCreate (100); // initializing the gfServers stringPrintf( cmd, "%s status %s %d &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2); int ret = hlr_system( string(cmd), 1 ); if( ret != 0 ) { // not initialized stringPrintf( cmd , "%s -repMatch=100000 -tileSize=12 -canStop -log=%s/gfServer_mitochondrial.log start %s %d %s/%s &", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "TMP_DIR"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2, confp_get( conf, "MITOCHONDRIAL_DIR"), confp_get( conf,"MITOCHONDRIAL_FILENAME")); hlr_system( string( cmd ), 0 ); long int startTime = time(0); stringPrintf( cmd , "%s status %s %d &> /dev/null", confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2); while( hlr_system( string(cmd), 1) && (time(0)-startTime)<600 ) ; if( hlr_system( string(cmd), 1 ) != 0 ) { die("gfServer for %s/%s not initialized: %s %s %s", confp_get( conf, "MITOCHONDRIAL_DIR"), confp_get( conf, "MITOCHONDRIAL_FILENAME"), confp_get( conf, "BLAT_GFSERVER"), confp_get( conf, "BLAT_GFSERVER_HOST"), confp_get( conf, "BLAT_GFSERVER_PORT")); return EXIT_FAILURE; } } gfr_init ("-"); puts (gfr_writeHeader ()); while (currGE = gfr_nextEntry ()) { if (strEqual(currGE->chromosomeTranscript1, "chrM") || strEqual(currGE->chromosomeTranscript2, "chrM")) { countRemoved++; continue; } else { mitochondrialCount = 0; minReadSize=1000; writeFasta( currGE, &minReadSize, confp_get( conf, "TMP_DIR") ); // in util.c stringPrintf(cmd, "cd %s;%s %s %d / -t=dna -q=dna -minScore=%d -out=psl %s_reads.fa %s.mito.psl &>/dev/null", confp_get( conf, "TMP_DIR"), confp_get( conf, "BLAT_GFCLIENT"), confp_get( conf, "BLAT_GFSERVER_HOST"), atoi(confp_get( conf, "BLAT_GFSERVER_PORT")) + 2, minReadSize - 5 > 20 ? minReadSize - 5 : 20 , currGE->id, currGE->id); int attempts=0; ret = hlr_system( string(cmd), 1 ); while( hlr_system( string(cmd), 1 ) && attempts<5000 ) attempts++; if( attempts == 5000 ) { die("Cannot map the reads %s", string( cmd )); return EXIT_FAILURE; } // reading the results of blast from File stringPrintf(cmd, "%s/%s.mito.psl", confp_get( conf, "TMP_DIR"), currGE->id); blatParser_initFromFile( string(cmd) ); while( blQ = blatParser_nextQuery() ) { //warn("iter %d\tquery %s", iter, blQ->qName );iter++; int nucleotideOverlap = getNucleotideOverlap ( blQ ); if (nucleotideOverlap > (((double) minReadSize) * strtod(confp_get( conf, "MAX_OVERLAP_ALLOWED"), NULL))) { char* value = strchr( blQ->qName,'/' ); if( value ) *value = '\0'; else die("Not a valid index in the blat query name:\t%s", blQ->qName ); int indexOfInter = atoi( blQ->qName ); // the following three lines should removed the read if writing the GFR entry GfrInterRead *currGIR = arrp( currGE->interReads, indexOfInter, GfrInterRead ); currGIR->flag = 1; mitochondrialCount++; } } blatParser_deInit(); if ( ( (double) mitochondrialCount / (double) ( arrayMax(currGE->readsTranscript1) + arrayMax(currGE->readsTranscript2) ) ) <= strtod(confp_get( conf, "MAX_FRACTION_HOMOLOGOUS"), NULL)) { if( mitochondrialCount > 0 ) updateStats( currGE ); // writing the gfrEntry puts (gfr_writeGfrEntry (currGE)); count++; } else { countRemoved++; } // removing temporary files stringPrintf (cmd,"rm -rf %s/%s_reads.fa %s/%s.mito.psl", confp_get( conf, "TMP_DIR"), currGE->id, confp_get( conf, "TMP_DIR"), currGE->id ); hlr_system( string(cmd) , 1); } } gfr_deInit (); stringDestroy( cmd ); warn ("%s_numRemoved: %d",argv[0],countRemoved); warn ("%s_numGfrEntries: %d",argv[0],count); confp_close(conf); return 0; }
int main(int argc, char * const argv[]) { GArgs args(argc, argv, "hFCq:r:o:"); int e; if ((e=args.isError())>0) GError("%s\nInvalid argument: %s\n", USAGE, argv[e]); if (args.getOpt('h')!=NULL){ GMessage("%s\n", USAGE); exit(1); } args.startNonOpt(); GStr fadb(args.nextNonOpt()); if (fadb.is_empty()) GError("%s Error: multi-fasta file expected!\n",USAGE); GStr fname(fadb); fname.append(".fai"); bool createLocal=(args.getOpt('F')!=NULL); const char* idxname=(createLocal)? NULL : fname.chars(); GFastaIndex faidx(fadb.chars(), idxname); //also tried to load the index if exists in the current directory GStr fnamecwd(fname); //name in current directory (without path) int ip=-1; if ((ip=fnamecwd.rindex(CHPATHSEP))>=0) { fnamecwd.cut(0,ip+1); } if (!createLocal) { //look for existing indexes to load //try the same directory as the fasta file first if (!faidx.hasIndex() and fileExists(fnamecwd.chars())>1) { //try current working directory next faidx.loadIndex(fnamecwd.chars()); } if (!faidx.hasIndex()) {//could not load any index data //try to create it in the same directory as the fasta file GMessage("No fasta index found. Rebuilding..\n"); faidx.buildIndex(); if (faidx.getCount()==0) GError("Error: no fasta records to be indexed!\n"); GMessage("Fasta index rebuilt.\n"); //check if we can create a file there FILE* fcreate=fopen(fname.chars(), "w"); if (fcreate==NULL) GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fname.chars()); else { fclose(fcreate); if (faidx.storeIndex(fname.chars())<faidx.getCount()) GMessage("Warning: error writing the index file %s!\n",fname.chars()); } //creating index file in the same directory as fasta file }//trying to create the index file } if (createLocal || !faidx.hasIndex()) { //simply rebuild the index in the current directory and use it: //remove directories in path, if any if (faidx.getCount()==0) { faidx.buildIndex(); if (faidx.getCount()==0) GError("Error: no fasta records to be indexed!\n"); } if (faidx.storeIndex(fnamecwd.chars())<faidx.getCount()) GMessage("Warning: error writing the index file %s!\n",fnamecwd.chars()); } GStr qry(args.getOpt('q')); if (qry.is_empty()) exit(0); GFastaRec* farec=faidx.getRecord(qry.chars()); if (farec==NULL) { GMessage("Error: couldn't find fasta record for '%s'!\n",qry.chars()); exit(1); } GFaSeqGet faseq(fadb.chars(),farec->seqlen, farec->fpos, farec->line_len, farec->line_blen); //TODO: read these from -r option uint qstart=0; uint qend=0; //farec->seqlen bool revCompl=(args.getOpt('C')!=NULL); char* s=args.getOpt('r'); if (s!=NULL) { char *p=s; while (isdigit(*p)) p++; if (*p=='-') { sscanf(s,"%u-%u",&qstart, &qend); if (qstart==0 || qend==0) GError("Error parsing sequence range: %s\n",s); } else if (*p==':') { int qlen=0; sscanf(s,"%u:%d", &qstart, &qlen); if (qstart==0 || qlen==0) GError("Error parsing sequence range: %s\n",s); qend=qstart+qlen-1; } else if (*p=='.') { sscanf(s,"%u..%u",&qstart, &qend); if (qstart==0 || qend==0) GError("Error parsing sequence range: %s\n",s); } } if (qstart==0) qstart=1; if (qend==0) qend=farec->seqlen; // call faseq.loadall() here if multiple ranges are to be extracted all // over this genomic sequence char* subseq=faseq.copyRange(qstart, qend, revCompl, true); FILE* f_out=NULL; openfwrite(f_out, args, 'o'); if (f_out==NULL) f_out=stdout; writeFasta(f_out, qry.chars(), NULL, subseq, 70, qend-qstart+1); GFREE(subseq); }