static void dbiblast_newname(AjPStr* nname, const AjPStr oname, const char *suff) { ajStrAssignS(nname, oname); if(ajStrGetCharFirst(oname)=='@') ajStrCutStart(nname, 1); ajStrAppendK(nname, '.'); ajStrAppendC(nname, suff); return; }
static void dbiblast_dbname(AjPStr* dbname, const AjPStr oname, const char *suff) { AjPStr suffix = NULL; ajFmtPrintS(&suffix, ".%s", suff); ajStrAssignS(dbname, oname); if(ajStrGetCharFirst(oname)=='@') ajStrCutStart(dbname, 1); if(!ajStrSuffixS(*dbname, suffix)) { ajStrDel(&suffix); return; } ajStrCutEnd(dbname, ajStrGetLen(suffix)); ajStrDel(&suffix); return; }
int main(int argc, char **argv) { AjPSeqall seqall = NULL; AjPFile dend_outfile = NULL; AjPStr tmp_dendfilename = NULL; AjPFile tmp_dendfile = NULL; AjPStr tmp_aln_outfile = NULL; AjPSeqset seqset = NULL; AjPSeqout seqout = NULL; AjPSeqin seqin = NULL; AjBool only_dend; AjBool are_prot = ajFalse; AjBool do_slow; AjBool use_dend; AjPFile dend_file = NULL; AjPStr dend_filename = NULL; ajint ktup; ajint gapw; ajint topdiags; ajint window; AjBool nopercent; AjPStr pw_matrix = NULL; AjPStr pw_dna_matrix = NULL; AjPFile pairwise_matrix = NULL; float pw_gapc; float pw_gapv; AjPStr pwmstr = NULL; char pwmc = '\0'; AjPStr pwdstr = NULL; char pwdc = '\0'; AjPStr m1str = NULL; AjPStr m2str = NULL; char m1c = '\0'; char m2c = '\0'; AjPStr matrix = NULL; AjPStr dna_matrix = NULL; AjPFile ma_matrix = NULL; float gapc; float gapv; AjBool endgaps; AjBool norgap; AjBool nohgap; ajint gap_dist; ajint maxdiv; AjPStr hgapres = NULL; AjPSeqout fil_file = NULL; AjPSeq seq = NULL; AjPStr cmd = NULL; AjPStr tmp = NULL; AjPStr tmpFilename; AjPStr line = NULL; ajint nb = 0; /* get all the parameters */ embInit("emma", argc, argv); pwmstr = ajStrNew(); pwdstr = ajStrNew(); m1str = ajStrNew(); m2str = ajStrNew(); seqall = ajAcdGetSeqall("sequence"); seqout = ajAcdGetSeqoutset("outseq"); dend_outfile = ajAcdGetOutfile("dendoutfile"); only_dend = ajAcdGetToggle("onlydend"); use_dend = ajAcdGetToggle("dendreuse"); dend_file = ajAcdGetInfile("dendfile"); if (dend_file) ajStrAssignS(&dend_filename, ajFileGetPrintnameS(dend_file)); ajFileClose(&dend_file); do_slow = ajAcdGetToggle("slowalign"); ktup = ajAcdGetInt("ktup"); gapw = ajAcdGetInt("gapw"); topdiags = ajAcdGetInt("topdiags"); window = ajAcdGetInt("window"); nopercent = ajAcdGetBoolean("nopercent"); pw_matrix = ajAcdGetListSingle("pwmatrix"); pwmc = ajStrGetCharFirst(pw_matrix); if(pwmc=='b') ajStrAssignC(&pwmstr,"blosum"); else if(pwmc=='p') ajStrAssignC(&pwmstr,"pam"); else if(pwmc=='g') ajStrAssignC(&pwmstr,"gonnet"); else if(pwmc=='i') ajStrAssignC(&pwmstr,"id"); else if(pwmc=='o') ajStrAssignC(&pwmstr,"own"); pw_dna_matrix = ajAcdGetListSingle("pwdnamatrix"); pwdc = ajStrGetCharFirst(pw_dna_matrix); if(pwdc=='i') ajStrAssignC(&pwdstr,"iub"); else if(pwdc=='c') ajStrAssignC(&pwdstr,"clustalw"); else if(pwdc=='o') ajStrAssignC(&pwdstr,"own"); pairwise_matrix = ajAcdGetInfile("pairwisedatafile"); pw_gapc = ajAcdGetFloat( "pwgapopen"); pw_gapv = ajAcdGetFloat( "pwgapextend"); matrix = ajAcdGetListSingle( "matrix"); m1c = ajStrGetCharFirst(matrix); if(m1c=='b') ajStrAssignC(&m1str,"blosum"); else if(m1c=='p') ajStrAssignC(&m1str,"pam"); else if(m1c=='g') ajStrAssignC(&m1str,"gonnet"); else if(m1c=='i') ajStrAssignC(&m1str,"id"); else if(m1c=='o') ajStrAssignC(&m1str,"own"); dna_matrix = ajAcdGetListSingle( "dnamatrix"); m2c = ajStrGetCharFirst(dna_matrix); if(m2c=='i') ajStrAssignC(&m2str,"iub"); else if(m2c=='c') ajStrAssignC(&m2str,"clustalw"); else if(m2c=='o') ajStrAssignC(&m2str,"own"); ma_matrix = ajAcdGetInfile("mamatrixfile"); gapc = ajAcdGetFloat("gapopen"); gapv = ajAcdGetFloat("gapextend"); endgaps = ajAcdGetBoolean("endgaps"); norgap = ajAcdGetBoolean("norgap"); nohgap = ajAcdGetBoolean("nohgap"); gap_dist = ajAcdGetInt("gapdist"); hgapres = ajAcdGetString("hgapres"); maxdiv = ajAcdGetInt("maxdiv"); tmp = ajStrNewC("fasta"); /* ** Start by writing sequences into a unique temporary file ** get file pointer to unique file */ fil_file = ajSeqoutNew(); tmpFilename = emma_getUniqueFileName(); if(!ajSeqoutOpenFilename( fil_file, tmpFilename)) embExitBad(); /* Set output format to fasta */ ajSeqoutSetFormatS( fil_file, tmp); while(ajSeqallNext(seqall, &seq)) { /* ** Check sequences are all of the same type ** Still to be done ** Write out sequences */ if (!nb) are_prot = ajSeqIsProt(seq); ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); if(nb < 2) ajFatal("Multiple alignments need at least two sequences"); /* Generate clustalw command line */ cmd = ajStrNewS(ajAcdGetpathC("clustalw")); /* add tmp file containing sequences */ ajStrAppendC(&cmd, " -infile="); ajStrAppendS(&cmd, tmpFilename); /* add out file name */ tmp_aln_outfile = emma_getUniqueFileName(); ajStrAppendC(&cmd, " -outfile="); ajStrAppendS(&cmd, tmp_aln_outfile); /* calculating just the nj tree or doing full alignment */ if(only_dend) ajStrAppendC(&cmd, " -tree"); else if(!use_dend) ajStrAppendC(&cmd, " -align"); /* Set sequence type from information from acd file */ if(are_prot) ajStrAppendC(&cmd, " -type=protein"); else ajStrAppendC(&cmd, " -type=dna"); /* ** set output to MSF format - will read in this file later and output ** user requested format */ ajStrAppendC(&cmd, " -output="); ajStrAppendC(&cmd, "gcg"); /* If going to do pairwise alignment */ if(!use_dend) { /* add fast pairwise alignments*/ if(!do_slow) { ajStrAppendC(&cmd, " -quicktree"); ajStrAppendC(&cmd, " -ktuple="); ajStrFromInt(&tmp, ktup); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -window="); ajStrFromInt(&tmp, window); ajStrAppendS(&cmd, tmp); if(nopercent) ajStrAppendC(&cmd, " -score=percent"); else ajStrAppendC(&cmd, " -score=absolute"); ajStrAppendC(&cmd, " -topdiags="); ajStrFromInt(&tmp, topdiags); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -pairgap="); ajStrFromInt(&tmp, gapw); ajStrAppendS(&cmd, tmp); } else { if(pairwise_matrix) { if(are_prot) ajStrAppendC(&cmd, " -pwmatrix="); else ajStrAppendC(&cmd, " -pwdnamatrix="); ajStrAppendS(&cmd, ajFileGetPrintnameS(pairwise_matrix)); } else { if(are_prot) { ajStrAppendC(&cmd, " -pwmatrix="); ajStrAppendS(&cmd, pwmstr); } else { ajStrAppendC(&cmd, " -pwdnamatrix="); ajStrAppendS(&cmd, pwdstr); } } ajStrAppendC(&cmd, " -pwgapopen="); ajStrFromFloat(&tmp, pw_gapc, 3); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -pwgapext="); ajStrFromFloat(&tmp, pw_gapv, 3); ajStrAppendS(&cmd, tmp); } } /* Multiple alignments */ /* using existing tree or generating new tree? */ if(use_dend) { ajStrAppendC(&cmd, " -usetree="); ajStrAppendS(&cmd, dend_filename); } else { /* use tmp file to hold dend file, will read back in later */ tmp_dendfilename = emma_getUniqueFileName(); ajStrAppendC(&cmd, " -newtree="); ajStrAppendS(&cmd, tmp_dendfilename); } if(ma_matrix) { if(are_prot) ajStrAppendC(&cmd, " -matrix="); else ajStrAppendC(&cmd, " -pwmatrix="); ajStrAppendS(&cmd, ajFileGetPrintnameS(ma_matrix)); } else { if(are_prot) { ajStrAppendC(&cmd, " -matrix="); ajStrAppendS(&cmd, m1str); } else { ajStrAppendC(&cmd, " -dnamatrix="); ajStrAppendS(&cmd, m2str); } } ajStrAppendC(&cmd, " -gapopen="); ajStrFromFloat(&tmp, gapc, 3); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -gapext="); ajStrFromFloat(&tmp, gapv, 3); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -gapdist="); ajStrFromInt(&tmp, gap_dist); ajStrAppendS(&cmd, tmp); ajStrAppendC(&cmd, " -hgapresidues="); ajStrAppendS(&cmd, hgapres); if(!endgaps) ajStrAppendC(&cmd, " -endgaps"); if(norgap) ajStrAppendC(&cmd, " -nopgap"); if(nohgap) ajStrAppendC(&cmd, " -nohgap"); ajStrAppendC(&cmd, " -maxdiv="); ajStrFromInt(&tmp, maxdiv); ajStrAppendS(&cmd, tmp); /* run clustalw */ /* ajFmtError("..%s..\n\n", ajStrGetPtr( cmd)); */ ajDebug("Executing '%S'\n", cmd); ajSysExecS(cmd); /* produce alignment file only if one was produced */ if(!only_dend) { /* read in tmp alignment output file to output through EMBOSS output */ seqin = ajSeqinNew(); /* ** add the Usa format to the start of the filename to tell EMBOSS ** format of file */ ajStrInsertC(&tmp_aln_outfile, 0, "msf::"); ajSeqinUsa(&seqin, tmp_aln_outfile); seqset = ajSeqsetNew(); if(ajSeqsetRead(seqset, seqin)) { ajSeqoutWriteSet(seqout, seqset); ajSeqoutClose(seqout); ajSeqinDel(&seqin); /* remove the Usa from the start of the string */ ajStrCutStart(&tmp_aln_outfile, 5); } else ajFmtError("Problem writing out EMBOSS alignment file\n"); } /* read in new tmp dend file (if produced) to output through EMBOSS */ if(tmp_dendfilename!=NULL) { tmp_dendfile = ajFileNewInNameS( tmp_dendfilename); if(tmp_dendfile!=NULL){ while(ajReadlineTrim(tmp_dendfile, &line)) ajFmtPrintF(dend_outfile, "%s\n", ajStrGetPtr( line)); ajFileClose(&tmp_dendfile); ajSysFileUnlinkS(tmp_dendfilename); } } ajSysFileUnlinkS(tmpFilename); if(!only_dend) ajSysFileUnlinkS(tmp_aln_outfile); ajStrDel(&pw_matrix); ajStrDel(&matrix); ajStrDel(&pw_dna_matrix); ajStrDel(&dna_matrix); ajStrDel(&tmp_dendfilename); ajStrDel(&dend_filename); ajStrDel(&tmp_aln_outfile); ajStrDel(&pwmstr); ajStrDel(&pwdstr); ajStrDel(&m1str); ajStrDel(&m2str); ajStrDel(&hgapres); ajStrDel(&cmd); ajStrDel(&tmp); ajStrDel(&tmpFilename); ajStrDel(&line); ajFileClose(&dend_outfile); ajFileClose(&tmp_dendfile); ajFileClose(&dend_file); ajFileClose(&pairwise_matrix); ajFileClose(&ma_matrix); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&seq); ajSeqoutDel(&seqout); ajSeqoutDel(&fil_file); ajSeqinDel(&seqin); embExit(); return 0; }
AjPPatlistRegex ajPatlistRegexRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, ajuint type, AjBool upper, AjBool lower) { AjPPatlistRegex patlist = NULL; AjPStr line = NULL; AjPStr pat = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPStr patstr = NULL; ajuint ifmt; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "regex"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistRegexNewType(type); ifmt = patternRegexFormat(fmt); if(ajStrGetCharFirst(patspec) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open regular expression file '%S'", patstr); return NULL; } line = ajStrNew(); pat = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&pat); } break; default: while (ajBuffreadLineTrim(infile,&line)) { if (ajStrFindC(line,">")>-1) { npat++; if (ajStrGetLen(name)) { if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&name); ajStrSetClear(&pat); } ajStrCutStart(&line,1); ajStrAssignS (&name,line); if(!ajStrGetLen(name)) ajFmtPrintS(&name, "%S%u", namestr, npat); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternRegexNewList(patlist,name,pat); ajStrSetClear(&pat); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&pat, patspec); if(lower) ajStrFmtLower(&pat); if(upper) ajStrFmtUpper(&pat); ajStrAssignS(&name, namestr); ajPatternRegexNewList(patlist,name,pat); } ajStrDel(&name); ajStrDel(&namestr); ajStrDel(&patstr); ajStrDel(&line); ajStrDel(&pat); return patlist; }
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, AjBool protein, ajuint mismatches) { AjPPatlistSeq patlist = NULL; AjPStr line = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPRegexp mismreg = NULL; AjPStr patstr = NULL; AjPStr pat = NULL; ajuint mismatch = 0; ajint ifmt = 0; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "pattern"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistSeqNewType(protein); ifmt = patternSeqFormat(fmt); ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' " "protein: %B mismatches: %d\n", patspec, patname, protein, mismatches); if(ajStrGetCharFirst(patstr) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open pattern file '%S'", patstr); return NULL; } line = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternSeqNewList(patlist,name,pat,mismatches); ajStrSetClear(&pat); } break; default: mismreg = ajRegCompC("<mismatch=(\\d+)>"); while (ajBuffreadLineTrim(infile,&line)) { if (ajStrGetCharFirst(line) == '>') { if (ajStrGetLen(name)) { ajPatternSeqNewList(patlist,name,pat, mismatch); ajStrSetClear(&name); ajStrSetClear(&pat); mismatch=mismatches; } ajStrCutStart(&line,1); if (ajRegExec(mismreg,line)) { ajRegSubI(mismreg,1,&name); ajStrToUint(name,&mismatch); ajStrTruncateLen(&line,ajRegOffset(mismreg)); ajStrTrimWhiteEnd(&line); } ajStrAssignS (&name,line); ajStrAssignEmptyS(&name, patname); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternSeqNewList(patlist,name,pat,mismatch); ajRegFree(&mismreg); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&name, namestr); ajPatternSeqNewList(patlist,name,patstr,mismatches); } ajStrDel(&name); ajStrDel(&line); ajStrDel(&pat); ajStrDel(&namestr); ajStrDel(&patstr); return patlist; }
int main(int argc, char **argv) { AjPSeqall nucseq; /* input nucleic sequences */ AjPSeqset protseq; /* input aligned protein sequences */ AjPSeqout seqout; AjPSeq nseq; /* next nucleic sequence to align */ const AjPSeq pseq; /* next protein sequence use in alignment */ AjPTrn trnTable; AjPSeq pep; /* translation of nseq */ AjPStr tablelist; ajint table; AjPSeqset outseqset; /* set of aligned nucleic sequences */ ajint proteinseqcount = 0; AjPStr degapstr = NULL; /* used to check if it matches with START removed */ AjPStr degapstr2 = NULL; AjPStr codon = NULL; /* holds temporary codon to check if is START */ char aa; /* translated putative START codon */ ajint type; /* returned type of the putative START codon */ /* start position of guide protein in translation */ ajlong pos = 0; AjPSeq newseq = NULL; /* output aligned nucleic sequence */ ajint frame; embInit("tranalign", argc, argv); nucseq = ajAcdGetSeqall("asequence"); protseq = ajAcdGetSeqset("bsequence"); tablelist = ajAcdGetListSingle("table"); seqout = ajAcdGetSeqoutset("outseq"); outseqset = ajSeqsetNew(); degapstr = ajStrNew(); /* initialise the translation table */ ajStrToInt(tablelist, &table); trnTable = ajTrnNewI(table); ajSeqsetFill(protseq); while(ajSeqallNext(nucseq, &nseq)) { if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL) ajErr("No guide protein sequence available for " "nucleic sequence %S", ajSeqGetNameS(nseq)); ajDebug("Aligning %S and %S\n", ajSeqGetNameS(nseq), ajSeqGetNameS(pseq)); /* get copy of pseq string with no gaps */ ajStrAssignS(°apstr, ajSeqGetSeqS(pseq)); ajStrRemoveGap(°apstr); /* ** for each translation frame look for subset of pep that ** matches pseq */ for(frame = 1; frame <4; frame++) { ajDebug("trying frame %d\n", frame); pep = ajTrnSeqOrig(trnTable, nseq, frame); degapstr2 = ajStrNew(); ajStrAssignRef(°apstr2, degapstr); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr); /* ** we might have a START codon that should be translated as 'M' ** we need to check if there is a match after a possible START ** codon */ if(pos == -1 && ajStrGetLen(degapstr) > 1 && (ajStrGetPtr(degapstr)[0] == 'M' || ajStrGetPtr(degapstr)[0] == 'm')) { /* see if pep minus the first character is a match */ ajStrCutStart(°apstr2, 1); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); /* ** pos is >= 1 if we have a match that is after the first ** residue */ if(pos >= 1) { /* point back at the putative START Methionine */ pos--; /* test if first codon is a START */ codon = ajStrNew(); ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), (pos*3)+frame-1, (pos*3)+frame+2); type = ajTrnCodonstrTypeS(trnTable, codon, &aa); if(type != 1) { /* first codon is not a valid START, force a mismatch */ pos = -1; } ajStrDel(&codon); } else { /* force 'pos == 0' to be treated as a mismatch */ pos = -1; } } ajStrDel(°apstr2); ajSeqDel(&pep); if(pos != -1) break; } if(pos == -1) ajErr("Guide protein sequence %S not found in nucleic sequence %S", ajSeqGetNameS(pseq), ajSeqGetNameS(nseq)); else { ajDebug("got a match with frame=%d\n", frame); /* extract the coding region of nseq with gaps */ newseq = ajSeqNew(); ajSeqSetNuc(newseq); ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq)); ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq)); tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1); /* output the gapped nucleic sequence */ ajSeqsetApp(outseqset, newseq); ajSeqDel(&newseq); } ajStrRemoveWhiteExcess(°apstr); } ajSeqoutWriteSet(seqout, outseqset); ajSeqoutClose(seqout); ajTrnDel(&trnTable); ajSeqsetDel(&outseqset); ajStrDel(°apstr); ajStrDel(°apstr2); ajSeqallDel(&nucseq); ajSeqDel(&nseq); ajSeqoutDel(&seqout); ajSeqsetDel(&protseq); ajStrDel(&tablelist); embExit(); return 0; }
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; ajuint seqlen = 0; ajuint qlen = 0; AjPStr tmpfd = NULL; AjPStr str = NULL; AjPStr de = NULL; AjBool ok; if(!dbxflat_wrdexp) dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)"); line = ajStrNewC(""); pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } /* first line of entry */ if(!ajStrPrefixC(line,"@")) return ajFalse; entry->fpos = pos; ajStrCutStart(&line, 1); ajStrExtractFirst(line, &de, &entry->id); if(desfield && ajStrGetLen(de)) { while(ajRegExec(dbxflat_wrdexp,de)) { ajRegSubI(dbxflat_wrdexp, 1, &tmpfd); str = ajStrNew(); ajStrAssignS(&str,tmpfd); ajListPush(desfield->data,(void *)str); ajRegPost(dbxflat_wrdexp, &de); } } /* now read sequence */ ok = ajReadlineTrim(inf,&line); while(ok && !ajStrPrefixC(line, "+")) { ajStrRemoveWhite(&line); seqlen += MAJSTRGETLEN(line); ok = ajReadlineTrim(inf,&line); } if(!ok) return ajFalse; ok = ajReadlineTrim(inf,&line); while(ok) { qlen += MAJSTRGETLEN(line); if(qlen < seqlen) ok = ajReadlineTrim(inf,&line); else ok = ajFalse; } ajStrDel(&de); ajStrDel(&tmpfd); ajStrDel(&line); return ajTrue; }