AjPMatrix ajMatrixNew(const AjPPStr codes, ajint n, const AjPStr filename) { ajint i = 0; AjPMatrix ret = NULL; ajint nsize; if((!n) || (!codes) || (!filename)) return NULL; nsize = n + 1; AJNEW0(ret); ajStrAssignS(&ret->Name, filename); AJCNEW0(ret->Codes, n); for(i=0; i<n; i++) ret->Codes[i] = ajStrNew(); for(i=0; i<n; i++) ajStrAssignS(&ret->Codes[i], codes[i]); ret->Size = nsize; AJCNEW0(ret->Matrix, nsize); for(i=0; i<nsize; i++) AJCNEW0(ret->Matrix[i], nsize); ret->Cvt = ajSeqcvtNewStr(codes, n); return ret; }
AjPMatrixf ajMatrixfNewAsym(const AjPPStr codes, ajint n, const AjPPStr rcodes, ajint rn, const AjPStr filename) { ajint i = 0; AjPMatrixf ret = 0; ajint nsize; ajint rnsize; if((!n) || (!codes) || (!filename)) return NULL; nsize = n + 1; rnsize = rn + 1; AJNEW0(ret); ajStrAssignS(&ret->Name, filename); AJCNEW0(ret->Codes, n); for(i=0; i<n; i++) ret->Codes[i] = ajStrNew(); for(i=0; i<n; i++) ajStrAssignS(&ret->Codes[i], codes[i]); ret->Size = nsize; AJCNEW0(ret->CodesRow, rn); for(i=0; i<rn; i++) ret->CodesRow[i] = ajStrNew(); for(i=0; i<rn; i++) ajStrAssignS(&ret->CodesRow[i], rcodes[i]); ret->SizeRow = rnsize; AJCNEW0(ret->Matrixf, rnsize); for(i=0; i<rnsize; i++) AJCNEW0(ret->Matrixf[i], nsize); ret->Cvt = ajSeqcvtNewStrAsym(codes, n, rcodes, rn); return ret; }
AjPRegexp ajRegCompC(const char* rexp) { AjPRegexp ret; int options = 0; int errpos = 0; const char *errptr = NULL; const unsigned char *tableptr = NULL; AJNEW0(ret); AJCNEW0(ret->ovector, AJREG_OVECSIZE); ret->ovecsize = AJREG_OVECSIZE/3; ret->pcre = pcre_compile(rexp, options, &errptr, &errpos, tableptr); if(!ret->pcre) { ajErr("Failed to compile regular expression '%s' at position %d: %s", rexp, errpos, errptr); AJFREE(ret); return NULL; } regAlloc += sizeof(ret); regCount ++; regTotal ++; /*ajDebug("ajRegCompC %x size %d regexp '%s'\n", ret, (int) sizeof(ret), rexp);*/ return ret; }
static ajuint jaspscan_readmatrix(const AjPStr mfname, float ***matrix) { AjPFile inf = NULL; AjPStr line = NULL; ajuint i = 0; ajuint cols = 0; AJCNEW0(*matrix,4); line = ajStrNew(); inf = ajFileNewInNameS(mfname); if(!inf) ajFatal("Cannot open matrix file %S",mfname); i = 0; while(ajReadlineTrim(inf,&line)) { if(!i) cols = ajStrParseCountC(line," \n"); (*matrix)[i++] = ajArrFloatLine(line," \n",1,cols); } ajStrDel(&line); ajFileClose(&inf); return cols;; }
AjPHist ajHistNew(ajuint numofsets, ajuint numofpoints) { static AjPHist hist = NULL; ajuint i; AJNEW0(hist); hist->numofsets = 0; hist->numofsetsmax = numofsets; hist->numofdatapoints = numofpoints; hist->xmin = 0; hist->xmax = 0; hist->displaytype = HIST_SIDEBYSIDE; /* default draw multiple histograms side by side */ hist->bins = 0; hist->BaW = AJFALSE; ajStrAssignEmptyC(&hist->title,""); ajStrAssignEmptyC(&hist->xaxis,""); ajStrAssignEmptyC(&hist->yaxisleft,""); ajStrAssignEmptyC(&hist->yaxisright,""); AJCNEW0(hist->hists,numofsets); for(i=0;i<numofsets; i++) { AJNEW0((hist->hists[i])); (hist->hists)[i]->data = NULL; (hist->hists)[i]->deletedata = AJFALSE; (hist->hists)[i]->colour = i+2; (hist->hists)[i]->pattern = 0; (hist->hists)[i]->title = NULL; (hist->hists)[i]->xaxis = NULL; (hist->hists)[i]->yaxis = NULL; } return hist; }
EmbPPropAmino* embPropEaminoRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropAmino *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE); for(i=0; i < EMBPROPSIZE; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"aa")) ajFatal("Incorrect (old?) format amino data file"); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) ajFatal("Amino file line doesn't begin with a single character"); i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Amino file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%d%d%d%d%d%d%f%d%d%d", &ret[i]->tiny, &ret[i]->sm_all, &ret[i]->aliphatic, &ret[i]->aromatic, &ret[i]->nonpolar, &ret[i]->polar, &ret[i]->charge, &ret[i]->pve, &ret[i]->nve, &ret[i]->extcoeff); if(n!= 10) ajFatal("Only %d columns in amino file - expected %d",n+1,11); } ajStrDel(&line); ajStrDel(&token); return ret; }
static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem) { AjPFile outf = ajOutfileGetFile(outfile); AjPSeqBamHeader header = NULL; AjPAssemContig c = NULL; AjPSeqBam bam; AjPAssemRead r = NULL; AjPAssemContig* contigs = NULL; AjPAssemTag t = NULL; AjIList j = NULL; AjPSeqBamBgzf gzfile = NULL; AjPStr headertext=NULL; const AjPStr rgheadertext=NULL; AjBool ret = ajTrue; ajint i=0; ajulong ncontigs=0UL; if(!outf) return ajFalse; if(!assem) return ajFalse; if(!assem->Hasdata) { if(ajListGetLength(assem->ContigsOrder)) ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs); else ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs); ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem)); header = ajSeqBamHeaderNewN((ajuint) ncontigs); gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w"); outfile->OutData = gzfile; while (contigs[i]) /* contigs */ { c = contigs[i]; if(ajStrMatchC(c->Name, "*")) { i++; continue; } header->target_name[i] = strdup(ajStrGetPtr(c->Name)); header->target_len[i++] = c->Length; ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length); if(c->URI) ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI); if(c->MD5) ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5); if(c->Species) ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species); ajFmtPrintAppS(&headertext, "\n"); j = ajListIterNewread(c->Tags); while (!ajListIterDone(j)) { t = ajListIterGet(j); ajFmtPrintAppS(&headertext, "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1, t->Comment); } ajListIterDel(&j); } rgheadertext = assemSAMGetReadgroupHeaderlines(assem); if(rgheadertext) ajStrAppendS(&headertext, rgheadertext); ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext)); ajSeqBamHeaderWrite(gzfile, header); ajSeqBamHeaderDel(&header); ajStrDel(&headertext); AJFREE(contigs); if(!assem->BamHeader) return ajTrue; } /* data */ gzfile = outfile->OutData; AJNEW0(bam); bam->m_data=10; AJCNEW0(bam->data, bam->m_data); j = ajListIterNewread(assem->Reads); while (!ajListIterDone(j)) /* reads */ { r = ajListIterGet(j); assemoutWriteBamAlignment(gzfile, r, bam); } ajListIterDel(&j); AJFREE(bam->data); AJFREE(bam); /* ajSeqBamBgzfClose(gzfile);*/ return ret; }
AjPMatrixf ajMatrixfNewFile(const AjPStr filename) { AjPMatrixf ret = NULL; AjPStr *orderstring = NULL; AjPStr buffer = NULL; AjPStr firststring = NULL; AjPStr reststring = NULL; const AjPStr tok = NULL; ajint len = 0; ajint i = 0; ajint l = 0; ajint k = 0; ajint cols = 0; ajint rows = 0; const char *ptr = NULL; AjPFile file = NULL; AjBool first = ajTrue; float **matrix = NULL; float *templine = NULL; float minval = -1.0; AjPList rlabel_list = NULL; AjPStr *rlabel_arr = NULL; #ifndef WIN32 static const char *delimstr = " :\t\n"; #else static const char *delimstr = " :\t\n\r"; #endif rlabel_list = ajListNew(); firststring = ajStrNew(); reststring = ajStrNew(); file = ajDatafileNewInNameS(filename); if(!file) { ajStrDel(&firststring); ajStrDel(&reststring); return NULL; } /* Read row labels */ while(ajReadline(file,&buffer)) { ptr = ajStrGetPtr(buffer); #ifndef WIN32 if(*ptr != '#' && *ptr != '\n') #else if(*ptr != '#' && *ptr != '\n' && *ptr != '\r') #endif { if(first) first = ajFalse; else { ajFmtScanC(ptr, "%S", &firststring); ajListPushAppend(rlabel_list, firststring); firststring = ajStrNew(); } } } first = ajTrue; ajStrDel(&firststring); rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr); ajFileSeek(file, 0, 0); while(ajReadline(file,&buffer)) { ajStrRemoveWhiteExcess(&buffer); ptr = ajStrGetPtr(buffer); if(*ptr && *ptr != '#') { if(first) { cols = ajStrParseCountC(buffer,delimstr); AJCNEW0(orderstring, cols); for(i=0; i<cols; i++) orderstring[i] = ajStrNew(); tok = ajStrParseC(buffer, " :\t\n"); ajStrAssignS(&orderstring[l++], tok); while((tok = ajStrParseC(NULL, " :\t\n"))) ajStrAssignS(&orderstring[l++], tok); first = ajFalse; ret = ajMatrixfNewAsym(orderstring, cols, rlabel_arr, rows, filename); matrix = ret->Matrixf; } else { ajFmtScanC(ptr, "%S", &firststring); /* JISON 19/7/4 k = ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(firststring)); */ k = ajSeqcvtGetCodeS(ret->Cvt, firststring); len = MAJSTRGETLEN(firststring); ajStrAssignSubC(&reststring, ptr, len, -1); /* ** Must discard the first string (label) and use ** reststring otherwise ajArrFloatLine would fail (it ** cannot convert a string to a float) ** ** Use cols,1,cols in below because although 2nd and ** subsequent lines have one more string in them (the ** residue label in the 1st column) we've discarded that ** from the string that's passed */ templine = ajArrFloatLine(reststring,delimstr,1,cols); for(i=0; i<cols; i++) { if(templine[i] < minval) minval = templine[i]; /* JISON 19/7/4 matrix[k][ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(orderstring[i]))] = templine[i]; */ matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt, orderstring[i])] = templine[i]; } AJFREE(templine); } } } ajDebug("fill rest with minimum value %d\n", minval); ajFileClose(&file); ajStrDel(&buffer); for(i=0; i<cols; i++) ajStrDel(&orderstring[i]); AJFREE(orderstring); ajDebug("read matrix file %S\n", filename); ajStrDel(&firststring); ajStrDel(&reststring); for(i=0; i<rows; i++) ajStrDel(&rlabel_arr[i]); AJFREE(rlabel_arr); ajListFree(&rlabel_list); return ret; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajint blastv = 0; char dbtype = '\0'; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPStr version = NULL; AjPStr seqtype = NULL; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjBool usesrc = AJTRUE; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPStr idformat = NULL; EmbPEntry entry; PBlastDb db = NULL; ajuint idCount = 0; ajuint idDone; AjPList listTestFiles = NULL; void ** testFiles = NULL; ajuint nfiles; ajuint ifile; ajuint jfile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i = 0; embInit("dbiblast", argc, argv); idformat = ajStrNewC("NCBI"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); version = ajAcdGetListSingle("blastversion"); seqtype = ajAcdGetListSingle("seqtype"); usesrc = ajAcdGetBoolean("sourcefile"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint) maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); if(ajUtilGetBigendian()) readReverse = ajFalse; else readReverse = ajTrue; ajStrToInt(version, &blastv); dbtype = ajStrGetCharFirst(seqtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listTestFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listTestFiles, ajStrVcmp); nfiles = ajListToarray(listTestFiles, &testFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) testFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ jfile = 0; for(ifile=0; ifile < nfiles; ifile++) { curfilename = (AjPStr) testFiles[ifile]; if(!dbiblast_blastopenlib(curfilename, usesrc, blastv, dbtype, &db)) continue; /* could be the wrong file type with "*.*" */ ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%S' ...\n", db->TFile->Name); ajStrAssignS(&divfiles[jfile], db->TFile->Name); ajFilenameTrimPath(&divfiles[jfile]); if(ajStrGetLen(divfiles[jfile]) >= maxfilelen) maxfilelen = ajStrGetLen(divfiles[jfile]) + 1; if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, jfile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbiblast_nextblastentry(db, jfile, idformat, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ { embDbiMemEntry(idlist, fieldList, nfields, entry, jfile); } } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); /* lost the entry, so can't free it :-) */ } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); dbiblast_dbfree(&db); jfile++; } nfiles = jfile; /* ** write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* ** Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* ** Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajListMap(idlist, embDbiEntryDelMap, NULL); ajListFree(&idlist); AJFREE(entryIds); ajStrDelarray(&fields); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); ajStrDel(&version); ajStrDel(&seqtype); ajFileClose(&elistfile); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(countField); AJFREE(fieldTot); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&idformat); ajStrDel(&tmpfname); AJFREE(maxFieldLen); ajFileClose(&logfile); ajListstrFreeData(&listTestFiles); ajStrDel(&t); ajStrDel(&id); ajStrDel(&acc); ajStrDel(&hline); ajStrDel(&tmpdes); ajStrDel(&tmpfd); ajStrDel(&tmpgi); ajStrDel(&tmpdb); ajStrDel(&tmpac); ajStrDel(&tmpsv); ajRegFree(&wrdexp); embDbiEntryDel(&dbiblastEntry); if(fdl) { for(i=0; i < nfields; i++) ajListFree(&fdl[i]); AJFREE(fdl); } for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(testFiles); embExit(); return 0; }
AjPMatrix ajMatrixNewFile(const AjPStr filename) { AjPMatrix ret = NULL; AjPStr buffer = NULL; const AjPStr tok = NULL; AjPStr firststring = NULL; AjPStr *orderstring = NULL; AjPFile file = NULL; AjBool first = ajTrue; const char *ptr = NULL; ajint **matrix = NULL; ajint minval = -1; ajint i = 0; ajint l = 0; ajint k = 0; ajint cols = 0; ajint rows = 0; ajint *templine = NULL; AjPList rlabel_list = NULL; AjPStr *rlabel_arr = NULL; #ifndef WIN32 static const char *delimstr = " :\t\n"; #else static const char *delimstr = " :\t\n\r"; #endif rlabel_list = ajListNew(); firststring = ajStrNew(); file = ajDatafileNewInNameS(filename); if(!file) { ajStrDel(&firststring); ajListFree(&rlabel_list); return NULL; } /* Read row labels */ while(ajReadline(file,&buffer)) { ptr = ajStrGetPtr(buffer); #ifndef WIN32 if(*ptr != '#' && *ptr != '\n') #else if(*ptr != '#' && *ptr != '\n' && *ptr != '\r') #endif { if(first) first = ajFalse; else { ajFmtScanC(ptr, "%S", &firststring); ajListPushAppend(rlabel_list, firststring); firststring = ajStrNew(); } } } first = ajTrue; ajStrDel(&firststring); rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr); ajFileSeek(file, 0, 0); while(ajReadline(file,&buffer)) { ajStrRemoveWhiteExcess(&buffer); ptr = ajStrGetPtr(buffer); if(*ptr && *ptr != '#') { if(first) { cols = ajStrParseCountC(buffer,delimstr); AJCNEW0(orderstring, cols); for(i=0; i<cols; i++) orderstring[i] = ajStrNew(); tok = ajStrParseC(buffer, " :\t\n"); ajStrAssignS(&orderstring[l++], tok); while((tok = ajStrParseC(NULL, " :\t\n"))) ajStrAssignS(&orderstring[l++], tok); first = ajFalse; ret = ajMatrixNewAsym(orderstring, cols, rlabel_arr, rows, filename); matrix = ret->Matrix; } else { ajFmtScanC(ptr, "%S", &firststring); /* JISON 19/7/4 k = ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(firststring)); */ k = ajSeqcvtGetCodeS(ret->Cvt, firststring); /* ** cols+1 is used below because 2nd and subsequent lines have ** one more string in them (the residue label) */ templine = ajArrIntLine(buffer,delimstr,2,cols+1); for(i=0; i<cols; i++) { if(templine[i] < minval) minval = templine[i]; /* JISON 19/7/4 matrix[k][ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(orderstring[i]))] = templine[i]; */ matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt, orderstring[i])] = templine[i]; } AJFREE(templine); } } } ajDebug("fill rest with minimum value %d\n", minval); ajFileClose(&file); ajStrDel(&buffer); for(i=0; i<cols; i++) ajStrDel(&orderstring[i]); AJFREE(orderstring); ajDebug("read matrix file %S\n", filename); ajStrDel(&firststring); for(i=0; i<rows; i++) ajStrDel(&rlabel_arr[i]); AJFREE(rlabel_arr); ajListFree(&rlabel_list); return ret; }
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info) { ajint n; ajint maxlen; ajint len; char **seqs; const AjPSeq seq = NULL; ajint i=0; const AjPStr fmt=NULL; const char *p=NULL; char c='\0'; /* char *q=NULL; AjPSelexseq sqdata=NULL; AjPSelexdata sdata=NULL; */ ajint cnt=0; info->name = NULL; info->rf=NULL; info->cs=NULL; info->desc=NULL; info->acc=NULL; info->au=NULL; info->flags=0; AjPStr tmpstr = NULL; ajSeqsetFill(seqset); fmt = ajSeqsetGetFormat(seqset); n = ajSeqsetGetSize(seqset); ajSeqsetFmtUpper(seqset); maxlen = ajSeqsetGetLen(seqset); /* First allocate and copy sequences */ AJCNEW0(seqs,n); for(i=0; i<n; ++i) { seqs[i] = ajCharNewRes(maxlen+1); strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i))); } info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; strcpy(info->sqinfo[i].name,""); strcpy(info->sqinfo[i].id,""); strcpy(info->sqinfo[i].acc,""); strcpy(info->sqinfo[i].desc,""); info->sqinfo[i].len = 0; info->sqinfo[i].start = 0; info->sqinfo[i].stop = 0; info->sqinfo[i].olen = 0; info->sqinfo[i].type = 0; info->sqinfo[i].ss = NULL; info->sqinfo[i].sa =NULL; } AJCNEW0(info->wgt,n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; info->wgt[i] = ajSeqsetGetseqWeight(seqset,i); } info->nseq = n; info->alen = maxlen; for(i=0; i<n; ++i) { seq = ajSeqsetGetseqSeq(seqset,i); if((len=ajStrGetLen(ajSeqGetNameS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len); strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ID; strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_NAME; } if((len=ajStrGetLen(ajSeqGetAccS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len); strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ACC; } } seq = ajSeqsetGetseqSeq(seqset,0); info->cs = ajCharNewS(ajSeqGetSeqS(seq)); info->name = ajCharNewS(ajSeqGetNameS(seq)); info->acc = ajCharNewS(ajSeqGetAccS(seq)); info->desc = ajCharNewS(ajSeqGetDescS(seq)); info->rf = ajCharNewS(ajSeqGetSeqS(seq)); /* info->rf = ajCharNewS(seq); len = ajStrGetLen(seq->Selexdata->name); info->name = ajCharNewRes(len+1); strcpy(info->name,ajStrGetPtr(seq->Selexdata->name)); len = ajStrGetLen(seq->Selexdata->de); info->desc = ajCharNewRes(len+1); sdata = seq->Selexdata; strcpy(info->desc,ajStrGetPtr(sdata->de)); len = ajStrGetLen(sdata->ac); info->acc = ajCharNewRes(len+1); strcpy(info->acc,ajStrGetPtr(sdata->ac)); len = ajStrGetLen(sdata->au); info->au = ajCharNewRes(len+1); strcpy(info->au,ajStrGetPtr(sdata->au)); if(sdata->tc[0] || sdata->tc[1]) { info->flags |= AINFO_TC; info->tc1 = sdata->tc[0]; info->tc2 = sdata->tc[1]; } if(sdata->nc[0] || sdata->nc[1]) { info->flags |= AINFO_NC; info->nc1 = sdata->nc[0]; info->nc2 = sdata->nc[1]; } if(sdata->ga[0] || sdata->ga[1]) { info->flags |= AINFO_GA; info->ga1 = sdata->ga[0]; info->ga2 = sdata->ga[1]; } for(i=0;i<n;++i) { seq = ajSeqsetGetseqSeq(seqset,i); sqdata = seq->Selexdata->sq; if((len=ajStrGetLen(sqdata->name))) { if(len<64) strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name)); else strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63); info->sqinfo[i].name[63]='\0'; info->sqinfo[i].flags |= SQINFO_NAME; } / * if((len=ajStrGetLen(sqdata->id))) { if(len<64) strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id)); else strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63); info->sqinfo[i].id[63]='\0'; info->sqinfo[i].flags |= SQINFO_ID; } * / strcpy(info->sqinfo[i].id,info->sqinfo[i].name); info->sqinfo[i].flags |= SQINFO_ID; if((len=ajStrGetLen(sqdata->ac))) { if(len<64) strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac)); else strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63); info->sqinfo[i].acc[63]='\0'; info->sqinfo[i].flags |= SQINFO_ACC; } if((len=ajStrGetLen(sqdata->de))) { if(len<127) strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de)); else strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127); info->sqinfo[i].desc[127]='\0'; info->sqinfo[i].flags |= SQINFO_DESC; } if(sqdata->start || sqdata->stop || sqdata ->len) { info->sqinfo[i].start = sqdata->start; info->sqinfo[i].stop = sqdata->stop; info->sqinfo[i].olen = sqdata->len; info->sqinfo[i].flags |= SQINFO_START; info->sqinfo[i].flags |= SQINFO_STOP; info->sqinfo[i].flags |= SQINFO_OLEN; } if(ajStrGetLen(seq->Selexdata->ss)) { info->sqinfo[i].ss = ajCharNewRes(maxlen+1); p = ajStrGetPtr(seq->Selexdata->ss); q = info->sqinfo[i].ss; while((c==*p)) { if(c=='.' || c==' ' || c=='_' || c=='-') *q++ = c; ++p; } *q = '\0'; info->sqinfo[i].flags |= SQINFO_SS; } } } / * } */ for(i=0; i<n; ++i) { info->sqinfo[i].type = kOtherSeq; if(ajSeqsetIsDna(seqset)) info->sqinfo[i].type = kDNA; if(ajSeqsetIsRna(seqset)) info->sqinfo[i].type = kRNA; if(ajSeqsetIsProt(seqset)) info->sqinfo[i].type = kAmino; info->sqinfo[i].flags |= SQINFO_TYPE; seq = ajSeqsetGetseqSeq(seqset,i); p = ajSeqGetSeqC(seq); cnt = 0; while((c=*p)) { if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~')) ++cnt; ++p; } info->sqinfo[i].len = cnt; info->sqinfo[i].flags |= SQINFO_LEN; } *retseqs = seqs; ajStrDel(&tmpstr); return; }
EmbPPropMolwt* embPropEmolwtRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropMolwt *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE+2); for(i=0; i < EMBPROPSIZE+2; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"Mol")) ajFatal("Incorrect format molwt file: '%S'", line); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) { if(ajStrPrefixC(token,"HYDROGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPHINDEX]->average, &ret[EMBPROPHINDEX]->mono) != 2) ajFatal("Bad format hydrogen data line"); } else if(ajStrPrefixC(token,"OXYGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPOINDEX]->average, &ret[EMBPROPOINDEX]->mono) != 2) ajFatal("Bad format oxygen data line"); } else if(ajStrPrefixC(token,"WATER")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPWINDEX]->average, &ret[EMBPROPWINDEX]->mono) != 2) ajFatal("Bad format water data line"); } else ajFatal("Unknown molwt token %S",token); continue; } i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Molwt file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%lf%lf", &ret[i]->average, &ret[i]->mono); if(n != 2) ajFatal("Only %d columns in amino file - expected %d",n,3); } ajStrDel(&line); ajStrDel(&token); return ret; }
int main(int argc, char **argv) { AjPSeqall queryseqs; AjPSeqset targetseqs; AjPSeq queryseq; const AjPSeq targetseq; AjPStr queryaln = 0; AjPStr targetaln = 0; AjPFile errorf; AjBool show = ajFalse; const char *queryseqc; const char *targetseqc; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; float minscore; ajuint j, k; ajint querystart = 0; ajint targetstart = 0; ajint queryend = 0; ajint targetend = 0; ajint width = 0; AjPTable kmers = 0; ajint wordlen = 6; ajint oldmax = 0; ajint newmax = 0; ajuint ntargetseqs; ajuint nkmers; AjPAlign align = NULL; EmbPWordMatch maxmatch; /* match with maximum score */ /* Cursors for the current sequence being scanned, ** i.e., until which location it was scanned. ** Separate cursor/location entries for each sequence in the seqset. */ ajuint* lastlocation; EmbPWordRK* wordsw = NULL; AjPList* matchlist = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); queryseqs = ajAcdGetSeqall("asequence"); targetseqs= ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* width for banded Smith-Waterman */ minscore = ajAcdGetFloat("minscore"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); /* seqset sequence is the reference sequence for SAM format */ ajAlignSetRefSeqIndx(align, 1); ajSeqsetTrim(targetseqs); ntargetseqs = ajSeqsetGetSize(targetseqs); AJCNEW0(matchlist, ntargetseqs); /* get tables of words */ for(k=0;k<ntargetseqs;k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); embWordGetTable(&kmers, targetseq); ajDebug("Number of distinct kmers found so far: %d\n", ajTableGetLength(kmers)); } AJCNEW0(lastlocation, ntargetseqs); if(ajTableGetLength(kmers)<1) ajErr("no kmers found"); nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs); while(ajSeqallNext(queryseqs,&queryseq)) { ajSeqTrim(queryseq); queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq)); ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq)); for(k=0;k<ntargetseqs;k++) { lastlocation[k]=0; matchlist[k] = ajListstrNew(); } embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs, (const EmbPWordRK*)wordsw, wordlen, nkmers, matchlist, lastlocation, ajFalse); for(k=0;k<ajSeqsetGetSize(targetseqs);k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq)); if(ajListGetLength(matchlist[k])==0) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq)); embWordMatchListDelete(&matchlist[k]); continue; } /* only the maximum match is used as seed * (if there is more than one location with the maximum match * only the first one is used) * TODO: we should add a new option to make above limit optional */ maxmatch = embWordMatchFirstMax(matchlist[k]); supermatcher_findendpoints(maxmatch,targetseq, queryseq, &targetstart, &querystart, &targetend, &queryend); targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq)); queryseqc = ajSeqGetSeqC(queryseq); targetseqc = ajSeqGetSeqC(targetseq); ajStrAssignC(&queryaln,""); ajStrAssignC(&targetaln,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq), targetstart, querystart, targetend, queryend); newmax = (targetend-targetstart+2)*width; if(newmax > oldmax) { AJCRESIZE0(path,oldmax,newmax); AJCRESIZE0(compass,oldmax,newmax); oldmax=newmax; ajDebug("++ memory re/allocation for path/compass arrays" " to size: %d\n", newmax); } else { AJCSET0(path,newmax); AJCSET0(compass,newmax); } ajDebug("Calling embAlignPathCalcSWFast " "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n", querystart, queryend, (queryend - querystart + 1), ajSeqGetLen(queryseq), targetstart, targetend, (targetend - targetstart + 1), ajSeqGetLen(targetseq), width); score = embAlignPathCalcSWFast(&targetseqc[targetstart], &queryseqc[querystart], targetend-targetstart+1, queryend-querystart+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); if(score>minscore) { embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend, targetseq,queryseq, &targetaln,&queryaln, targetend-targetstart+1, queryend-querystart+1, 0,width, &targetstart,&querystart); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(targetaln), ajStrGetPtr(queryaln), ajSeqGetNameC(targetseq), ajSeqGetNameC(queryseq)); ajAlignSetScoreR(align, score); } else { ajDebug(" queryaln:%S \ntargetaln:%S\n", queryaln,targetaln); embAlignReportLocal(align, queryseq, targetseq, queryaln, targetaln, querystart, targetstart, gapopen, gapextend, score, matrix, 1 + ajSeqGetOffset(queryseq), 1 + ajSeqGetOffset(targetseq) ); } ajAlignWrite(align); ajAlignReset(align); } ajStrDel(&targetaln); embWordMatchListDelete(&matchlist[k]); } ajStrDel(&queryaln); } for(k=0;k<nkmers;k++) { AJFREE(wordsw[k]->seqindxs); AJFREE(wordsw[k]->nSeqMatches); for(j=0;j<wordsw[k]->nseqs;j++) AJFREE(wordsw[k]->locs[j]); AJFREE(wordsw[k]->nnseqlocs); AJFREE(wordsw[k]->locs); AJFREE(wordsw[k]); } embWordFreeTable(&kmers); if(!ajAlignFormatShowsSequences(align)) ajMatrixfDel(&matrix); AJFREE(path); AJFREE(compass); AJFREE(kmers); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&queryseqs); ajSeqDel(&queryseq); ajSeqsetDel(&targetseqs); ajFileClose(&errorf); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeq seq; AjPGraph graph = 0; AjPFile outf = NULL; AjPFile file = NULL; AjPStr buffer = NULL; float twist[4][4][4]; float roll[4][4][4]; float tilt[4][4][4]; float rbend; float rcurve; float bendscale; float curvescale; float twistsum = (float) 0.0; float pi = (float) 3.14159; float pifac = (pi/(float) 180.0); float pi2 = pi/(float) 2.0; ajint *iseq = NULL; float *x; float *y; float *xave; float *yave; float *curve; float *bend; const char *ptr; ajint i; ajint ii; ajint k; ajint j; char residue[2]; float maxbend; float minbend; float bendfactor; float maxcurve; float mincurve; float curvefactor; float fxp; float fyp; float yincr; float yy1; ajint ixlen; ajint iylen; ajint ixoff; ajint iyoff; float ystart; float defheight; float currentheight; ajint count; ajint portrait = 0; ajint title = 0; ajint numres; ajint ibeg; ajint iend; ajint ilen; AjPStr sstr = NULL; ajint ipos; float dx; float dy; float rxsum; float rysum; float yp1; float yp2; double td; embInit("banana", argc, argv); seq = ajAcdGetSeq("sequence"); file = ajAcdGetDatafile("anglesfile"); outf = ajAcdGetOutfile("outfile"); graph = ajAcdGetGraph("graph"); numres = ajAcdGetInt("residuesperline"); ibeg = ajSeqGetBegin(seq); iend = ajSeqGetEnd(seq); ajStrAssignSubS(&sstr, ajSeqGetSeqS(seq), ibeg-1, iend-1); ilen = ajStrGetLen(sstr); AJCNEW0(iseq,ilen+1); AJCNEW0(x,ilen+1); AJCNEW0(y,ilen+1); AJCNEW0(xave,ilen+1); AJCNEW0(yave,ilen+1); AJCNEW0(curve,ilen+1); AJCNEW0(bend,ilen+1); ptr= ajStrGetPtr(sstr); for(ii=0;ii<ilen;ii++) { if(*ptr=='A' || *ptr=='a') iseq[ii+1] = 0; else if(*ptr=='T' || *ptr=='t') iseq[ii+1] = 1; else if(*ptr=='G' || *ptr=='g') iseq[ii+1] = 2; else if(*ptr=='C' || *ptr=='c') iseq[ii+1] = 3; else ajErr("%c is not an ATCG hence not valid",*ptr); ptr++; } if(!file) ajErr("Banana failed to open angle file"); ajReadline(file,&buffer); /* 3 junk lines */ ajReadline(file,&buffer); ajReadline(file,&buffer); for(k=0;k<4;k++) for(ii=0;ii<4;ii++) { if(ajReadline(file,&buffer)) { sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f", &twist[ii][0][k],&twist[ii][1][k],&twist[ii][2][k], &twist[ii][3][k]); } else ajErr("Error reading angle file"); for(j=0;j<4;j++) twist[ii][j][k] *= pifac; } for(k=0;k<4;k++) for(ii=0;ii<4;ii++) if(ajReadline(file,&buffer)) { sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&roll[ii][0][k], &roll[ii][1][k],&roll[ii][2][k],&roll[ii][3][k]); } else ajErr("Error reading angle file"); for(k=0;k<4;k++) for(ii=0;ii<4;ii++) if(ajReadline(file,&buffer)) sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&tilt[ii][0][k], &tilt[ii][1][k],&tilt[ii][2][k],&tilt[ii][3][k]); else ajErr("Error reading angle file"); if(ajReadline(file,&buffer)) sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&rbend,&rcurve, &bendscale,&curvescale); else ajErr("Error reading angle file"); ajFileClose(&file); ajStrDel(&buffer); for(ii=1;ii<ilen-1;ii++) { twistsum += twist[iseq[ii]][iseq[ii+1]][iseq[ii+2]]; dx = (roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum)) + (tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum-pi2)); dy = roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum) + tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum-pi2); x[ii+1] = x[ii]+dx; y[ii+1] = y[ii]+dy; } for(ii=6;ii<ilen-6;ii++) { rxsum = 0.0; rysum = 0.0; for(k=-4;k<=4;k++) { rxsum+=x[ii+k]; rysum+=y[ii+k]; } rxsum+=(x[ii+5]*(float)0.5); rysum+=(y[ii+5]*(float)0.5); rxsum+=(x[ii-5]*(float)0.5); rysum+=(y[ii-5]*(float)0.5); xave[ii] = rxsum*(float)0.1; yave[ii] = rysum*(float)0.1; } for(i=(ajint)rbend+1;i<=ilen-(ajint)rbend-1;i++) { td = sqrt(((x[i+(ajint)rbend]-x[i-(ajint)rbend])* (x[i+(ajint)rbend]-x[i-(ajint)rbend])) + ((y[i+(ajint)rbend]-y[i-(ajint)rbend])* (y[i+(ajint)rbend]-y[i-(ajint)rbend]))); bend[i] = (float) td; bend[i]*=bendscale; } for(i=(ajint)rcurve+6;i<=ilen-(ajint)rcurve-6;i++) { td = sqrt(((xave[i+(ajint)rcurve]- xave[i-(ajint)rcurve])*(xave[i+(ajint)rcurve]- xave[i-(ajint)rcurve]))+ ((yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])* (yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve]))); curve[i] = (float) td; } if(outf) { ajFmtPrintF(outf,"Base Bend Curve\n"); ptr = ajStrGetPtr(sstr); for(ii=1;ii<=ilen;ii++) { ajFmtPrintF(outf,"%c %6.1f %6.1f\n", *ptr, bend[ii], curve[ii]); ptr++; } ajFileClose(&outf); } if(graph) { maxbend = minbend = 0.0; maxcurve = mincurve = 0.0; for(ii=1;ii<=ilen;ii++) { if(bend[ii] > maxbend) maxbend = bend[ii]; if(bend[ii] < minbend) minbend = bend[ii]; if(curve[ii] > maxcurve) maxcurve = curve[ii]; if(curve[ii] < mincurve) mincurve = curve[ii]; } ystart = 75.0; ajGraphAppendTitleS(graph, ajSeqGetUsaS(seq)); ajGraphicsSetPagesize(960, 768); ajGraphOpenWin(graph,(float)-1.0, (float)numres+(float)10.0, (float)0.0, ystart+(float)5.0); ajGraphicsGetParamsPage(&fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff); if(portrait) { ixlen = 768; iylen = 960; } else { ixlen = 960; iylen = 768; } ajGraphicsGetCharsize(&defheight,¤theight); if(!currentheight) { defheight = currentheight = (float) 4.440072; currentheight = defheight * ((float)ixlen/ ((float)(numres)*(currentheight+(float)1.0))) /currentheight; } ajGraphicsSetCharscale(((float)ixlen/((float)(numres)* (currentheight+(float)1.0)))/ currentheight); ajGraphicsGetCharsize(&defheight,¤theight); yincr = (currentheight + (float)3.0)*(float)0.3; if(!title) yy1 = ystart; else yy1 = ystart-(float)5.0; count = 1; residue[1]='\0'; bendfactor = (3*yincr)/maxbend; curvefactor = (3*yincr)/maxcurve; ptr = ajStrGetPtr(sstr); yy1 = yy1-(yincr*((float)5.0)); for(ii=1;ii<=ilen;ii++) { if(count > numres) { yy1 = yy1-(yincr*((float)5.0)); if(yy1<1.0) { if(!title) yy1=ystart; else yy1 = ystart-(float)5.0; yy1 = yy1-(yincr*((float)5.0)); ajGraphNewpage(graph,AJFALSE); } count = 1; } residue[0] = *ptr; ajGraphicsDrawposTextAtend((float)(count)+(float)2.0,yy1,residue); if(ii>1 && ii < ilen) { yp1 = yy1+yincr + (bend[ii]*bendfactor); yp2 = yy1+yincr + (bend[ii+1]*bendfactor); ajGraphicsDrawposLine((float)count+(float)1.5,yp1, (float)(count)+(float)2.5,yp2); } ipos = ilen-(ajint)rcurve-7; if(ipos < 0) ipos = 0; if(ii>rcurve+5 && ii<ipos) { yp1 = yy1+yincr + (curve[ii]*curvefactor); yp2 = yy1+yincr + (curve[ii+1]*curvefactor); ajGraphicsDrawposLine((float)count+(float)1.7,yp1, (float)(count)+(float)2.3,yp2); } ajGraphicsDrawposLine((float)count+(float)1.5,yy1+yincr, (float)(count)+(float)2.5,yy1+yincr); count++; ptr++; } ajGraphicsClose(); } AJFREE(iseq); AJFREE(x); AJFREE(y); AJFREE(xave); AJFREE(yave); AJFREE(curve); AJFREE(bend); ajStrDel(&sstr); ajSeqDel(&seq); ajFileClose(&file); ajFileClose(&outf); ajGraphxyDel(&graph); embExit(); return 0; }
int main(int argc, char **argv) { /* Global details */ AjBool explain_flag; AjBool file_flag; AjPStr* task; AjBool do_primer; AjBool do_hybrid; ajint num_return; ajint first_base_index; /* "Sequence" Input Tags */ AjPSeqall sequence; AjPRange included_region; AjPRange target; AjPRange excluded_region; AjPStr left_input; AjPStr right_input; /* Primer details */ AjBool pick_anyway; AjPFile mispriming_library; float max_mispriming; float pair_max_mispriming; ajint gc_clamp; ajint opt_size; ajint min_size; ajint max_size; float opt_tm; float min_tm; float max_tm; float max_diff_tm; float opt_gc_percent; float min_gc; float max_gc; float salt_conc; float dna_conc; ajint num_ns_accepted; float self_any; float self_end; ajint max_poly_x; /* Sequence Quality. These are not (yet) implemented */ /* AjPFile sequence_quality; ajint min_quality; ajint min_end_quality; ajint quality_range_min; ajint quality_range_max; */ /* Product details */ ajint product_opt_size; AjPRange product_size_range; float product_opt_tm; float product_min_tm; float product_max_tm; /* Objective Function Penalty Weights for Primers */ float max_end_stability; /* these are not (yet) implemented */ /* float inside_penalty; float outside_penalty; */ /* Primer penalties */ /* these are not (yet) implemented */ /* Internal Oligo "Sequence" Input Tags */ AjPRange internal_oligo_excluded_region; /* Internal Oligo "Global" Input Tags */ AjPStr internal_oligo_input; ajint internal_oligo_opt_size; ajint internal_oligo_min_size; ajint internal_oligo_max_size; float internal_oligo_opt_tm; float internal_oligo_min_tm; float internal_oligo_max_tm; float internal_oligo_opt_gc_percent; float internal_oligo_min_gc; float internal_oligo_max_gc; float internal_oligo_salt_conc; float internal_oligo_dna_conc; float internal_oligo_self_any; float internal_oligo_self_end; ajint internal_oligo_max_poly_x; AjPFile internal_oligo_mishyb_library; float internal_oligo_max_mishyb; /* ajint internal_oligo_min_quality; */ /* Internal Oligo penalties */ /* these are not (yet) implemented */ /* EMBOSS-wrapper-specific stuff */ AjPFile outfile; /* other variables */ AjPStr result = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPSeq seq = NULL; ajint begin = 0; ajint end; FILE* stream; AjPStr taskstr = NULL; const AjPStr program = NULL; /* pipe variables */ int *pipeto; /* pipe to feed the exec'ed program input */ int *pipefrom; /* pipe to get the exec'ed program output */ embInit("eprimer3", argc, argv); /* Global details */ explain_flag = ajAcdGetBoolean("explainflag"); file_flag = ajAcdGetBoolean("fileflag"); task = ajAcdGetList("task"); do_primer = ajAcdGetToggle("primer"); do_hybrid = ajAcdGetToggle("hybridprobe"); num_return = ajAcdGetInt("numreturn"); first_base_index = ajAcdGetInt("firstbaseindex"); /* "Sequence" Input Tags */ sequence = ajAcdGetSeqall("sequence"); included_region = ajAcdGetRange("includedregion"); target = ajAcdGetRange("targetregion"); excluded_region = ajAcdGetRange("excludedregion"); left_input = ajAcdGetString("forwardinput"); right_input = ajAcdGetString("reverseinput"); /* Primer details */ pick_anyway = ajAcdGetBoolean("pickanyway"); mispriming_library = ajAcdGetInfile("mispriminglibraryfile"); max_mispriming = ajAcdGetFloat("maxmispriming"); pair_max_mispriming = ajAcdGetFloat("pairmaxmispriming"); gc_clamp = ajAcdGetInt("gcclamp"); opt_size = ajAcdGetInt("osize"); min_size = ajAcdGetInt("minsize"); max_size = ajAcdGetInt("maxsize"); opt_tm = ajAcdGetFloat("otm"); min_tm = ajAcdGetFloat("mintm"); max_tm = ajAcdGetFloat("maxtm"); max_diff_tm = ajAcdGetFloat("maxdifftm"); opt_gc_percent = ajAcdGetFloat("ogcpercent"); min_gc = ajAcdGetFloat("mingc"); max_gc = ajAcdGetFloat("maxgc"); salt_conc = ajAcdGetFloat("saltconc"); dna_conc = ajAcdGetFloat("dnaconc"); num_ns_accepted = ajAcdGetInt("numnsaccepted"); self_any = ajAcdGetFloat("selfany"); self_end = ajAcdGetFloat("selfend"); max_poly_x = ajAcdGetInt("maxpolyx"); AJCNEW0(pipeto,2); AJCNEW0(pipefrom,2); /* Sequence Quality */ /* these are not (yet) implemented */ /* sequence_quality = ajAcdGetInfile("sequencequality"); min_quality = ajAcdGetInt("minquality"); min_end_quality = ajAcdGetInt("minendquality"); quality_range_min = ajAcdGetInt("qualityrangemin"); quality_range_max = ajAcdGetInt("qualityrangemax"); */ /* Product details */ product_opt_size = ajAcdGetInt("psizeopt"); product_size_range = ajAcdGetRange("prange"); product_opt_tm = ajAcdGetFloat("ptmopt"); product_min_tm = ajAcdGetFloat("ptmmin"); product_max_tm = ajAcdGetFloat("ptmmax"); /* Objective Function Penalty Weights for Primers */ max_end_stability = ajAcdGetFloat("maxendstability"); /* these are not (yet) implemented */ /* inside_penalty = ajAcdGetFloat("insidepenalty"); outside_penalty = ajAcdGetFloat("outsidepenalty"); */ /* Primer penalties */ /* these are not (yet) implemented */ /* Internal Oligo "Sequence" Input Tags */ internal_oligo_excluded_region = ajAcdGetRange("oexcludedregion"); internal_oligo_input = ajAcdGetString("oligoinput"); /* Internal Oligo "Global" Input Tags */ internal_oligo_opt_size = ajAcdGetInt("osizeopt"); internal_oligo_min_size = ajAcdGetInt("ominsize"); internal_oligo_max_size = ajAcdGetInt("omaxsize"); internal_oligo_opt_tm = ajAcdGetFloat("otmopt"); internal_oligo_min_tm = ajAcdGetFloat("otmmin"); internal_oligo_max_tm = ajAcdGetFloat("otmmax"); internal_oligo_opt_gc_percent = ajAcdGetFloat("ogcopt"); internal_oligo_min_gc = ajAcdGetFloat("ogcmin"); internal_oligo_max_gc = ajAcdGetFloat("ogcmax"); internal_oligo_salt_conc = ajAcdGetFloat("osaltconc"); internal_oligo_dna_conc = ajAcdGetFloat("odnaconc"); internal_oligo_self_any = ajAcdGetFloat("oanyself"); internal_oligo_self_end = ajAcdGetFloat("oendself"); internal_oligo_max_poly_x = ajAcdGetInt("opolyxmax"); internal_oligo_mishyb_library = ajAcdGetInfile("mishyblibraryfile"); internal_oligo_max_mishyb = ajAcdGetFloat("omishybmax"); /* internal_oligo_min_quality = ajAcdGetInt("oligominquality"); */ /* Internal Oligo penalties */ /* these are not (yet) implemented */ /* EMBOSS-wrapper-specific stuff */ outfile = ajAcdGetOutfile("outfile"); ajStrRemoveWhite(&left_input); ajStrRemoveWhite(&right_input); /* ** OK - we will now try to do a separate fork-exec for each sequence. */ result = ajStrNew(); while(ajSeqallNext(sequence, &seq)) { program = ajAcdGetpathC("primer3_core"); if(!ajSysExecRedirectC(ajStrGetPtr(program),&pipeto,&pipefrom)) ajFatal("eprimer3: Could not exec primer3_core"); stream = eprimer3_start_write(pipeto[1]); /* send primer3 Primer "Global" parameters */ eprimer3_send_bool(stream, "PRIMER_EXPLAIN_FLAG", explain_flag); eprimer3_send_bool(stream, "PRIMER_FILE_FLAG", file_flag); if(do_hybrid) { if(!ajStrCmpC(task[0], "1")) ajStrAssignC(&taskstr, "pick_pcr_primers_and_hyb_probe"); else if(!ajStrCmpC(task[0], "2")) ajStrAssignC(&taskstr, "pick_left_only"); else if(!ajStrCmpC(task[0], "3")) ajStrAssignC(&taskstr, "pick_right_only"); else if(!ajStrCmpC(task[0], "4")) ajStrAssignC(&taskstr, "pick_hyb_probe_only"); if (!do_primer) ajStrAssignC(&taskstr, "pick_hyb_probe_only"); } else { if(!ajStrCmpC(task[0], "1")) ajStrAssignC(&taskstr, "pick_pcr_primers"); else if(!ajStrCmpC(task[0], "2")) ajStrAssignC(&taskstr, "pick_left_only"); else if(!ajStrCmpC(task[0], "3")) ajStrAssignC(&taskstr, "pick_right_only"); else if(!ajStrCmpC(task[0], "4")) ajStrAssignC(&taskstr, "pick_hyb_probe_only"); } eprimer3_send_string(stream, "PRIMER_TASK", taskstr); eprimer3_send_int(stream, "PRIMER_NUM_RETURN", num_return); eprimer3_send_int(stream, "PRIMER_FIRST_BASE_INDEX", first_base_index); eprimer3_send_bool(stream, "PRIMER_PICK_ANYWAY", pick_anyway); /* mispriming library may not have been specified */ if(mispriming_library) eprimer3_send_stringC(stream, "PRIMER_MISPRIMING_LIBRARY", ajFileGetPrintnameC(mispriming_library)); eprimer3_send_float(stream, "PRIMER_MAX_MISPRIMING", max_mispriming); eprimer3_send_float(stream, "PRIMER_PAIR_MAX_MISPRIMING", pair_max_mispriming); eprimer3_send_int(stream, "PRIMER_GC_CLAMP", gc_clamp); eprimer3_send_int(stream, "PRIMER_OPT_SIZE", opt_size); eprimer3_send_int(stream, "PRIMER_MIN_SIZE", min_size); eprimer3_send_int(stream, "PRIMER_MAX_SIZE", max_size); eprimer3_send_float(stream, "PRIMER_OPT_TM", opt_tm); eprimer3_send_float(stream, "PRIMER_MIN_TM", min_tm); eprimer3_send_float(stream, "PRIMER_MAX_TM", max_tm); eprimer3_send_float(stream, "PRIMER_MAX_DIFF_TM", max_diff_tm); eprimer3_send_float(stream, "PRIMER_OPT_GC_PERCENT", opt_gc_percent); eprimer3_send_float(stream, "PRIMER_MIN_GC", min_gc); eprimer3_send_float(stream, "PRIMER_MAX_GC", max_gc); eprimer3_send_float(stream, "PRIMER_SALT_CONC", salt_conc); eprimer3_send_float(stream, "PRIMER_DNA_CONC", dna_conc); eprimer3_send_int(stream, "PRIMER_NUM_NS_ACCEPTED", num_ns_accepted); eprimer3_send_float(stream, "PRIMER_SELF_ANY", self_any); eprimer3_send_float(stream, "PRIMER_SELF_END", self_end); eprimer3_send_int(stream, "PRIMER_MAX_POLY_X", max_poly_x); eprimer3_send_int(stream, "PRIMER_PRODUCT_OPT_SIZE", product_opt_size); eprimer3_send_range2(stream, "PRIMER_PRODUCT_SIZE_RANGE", product_size_range); eprimer3_send_float(stream, "PRIMER_PRODUCT_OPT_TM", product_opt_tm); eprimer3_send_float(stream, "PRIMER_PRODUCT_MIN_TM", product_min_tm); eprimer3_send_float(stream, "PRIMER_PRODUCT_MAX_TM", product_max_tm); eprimer3_send_float(stream, "PRIMER_MAX_END_STABILITY", max_end_stability); /* send primer3 Internal Oligo "Global" parameters */ eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_OPT_SIZE", internal_oligo_opt_size); eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MIN_SIZE", internal_oligo_min_size); eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_SIZE", internal_oligo_max_size); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_TM", internal_oligo_opt_tm); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_TM", internal_oligo_min_tm); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_TM", internal_oligo_max_tm); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_GC_PERCENT", internal_oligo_opt_gc_percent); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_GC", internal_oligo_min_gc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_GC", internal_oligo_max_gc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SALT_CONC", internal_oligo_salt_conc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_DNA_CONC", internal_oligo_dna_conc); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_ANY", internal_oligo_self_any); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_END", internal_oligo_self_end); eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_POLY_X", internal_oligo_max_poly_x); /* ** internal oligo mishybridising library may not have been ** specified */ if(internal_oligo_mishyb_library) eprimer3_send_stringC(stream, "PRIMER_INTERNAL_OLIGO_MISHYB_LIBRARY", ajFileGetPrintnameC(internal_oligo_mishyb_library)); eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_MISHYB", internal_oligo_max_mishyb); /* ** Start sequence-specific stuff */ begin = ajSeqallGetseqBegin(sequence) - 1; end = ajSeqallGetseqEnd(sequence) - 1; strand = ajSeqGetSeqCopyS(seq); ajStrFmtUpper(&strand); ajStrAssignSubC(&substr,ajStrGetPtr(strand), begin, end); /* send flags to turn on using optimal product size */ eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_GT", (float)0.05); eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_LT", (float)0.05); /* send primer3 Primer "Sequence" parameters */ eprimer3_send_string(stream, "SEQUENCE", substr); /* if no ID name, use the USA */ if(ajStrMatchC(ajSeqGetNameS(seq),"")) eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID", ajSeqGetUsaS(seq)); else eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID", ajSeqGetNameS(seq)); eprimer3_send_range(stream, "INCLUDED_REGION", included_region, begin); eprimer3_send_range(stream, "TARGET", target, begin); eprimer3_send_range(stream, "EXCLUDED_REGION", excluded_region, begin); eprimer3_send_string(stream, "PRIMER_LEFT_INPUT", left_input); eprimer3_send_string(stream, "PRIMER_RIGHT_INPUT", right_input); /* send primer3 Internal Oligo "Sequence" parameters */ eprimer3_send_range(stream, "PRIMER_INTERNAL_OLIGO_EXCLUDED_REGION", internal_oligo_excluded_region, begin); eprimer3_send_string(stream, "PRIMER_INTERNAL_OLIGO_INPUT", internal_oligo_input); /* end the primer3 input sequence record with a '=' */ eprimer3_send_end(stream); /* and close the ouput pipe stream */ eprimer3_end_write(stream); /* read the primer3 output */ eprimer3_read(pipefrom[0], &result); eprimer3_report(outfile, result, num_return, begin); ajStrSetClear(&result); #ifndef WIN32 close(pipeto[1]); close(pipefrom[0]); #endif } /* end of sequence loop */ ajStrDel(&result); ajSeqDel(&seq); ajStrDel(&strand); ajStrDel(&substr); ajFileClose(&outfile); ajStrDel(&taskstr); ajStrDelarray(&task); ajSeqallDel(&sequence); ajSeqDel(&seq); ajRangeDel(&included_region); ajRangeDel(&target); ajRangeDel(&excluded_region); ajRangeDel(&product_size_range); ajRangeDel(&internal_oligo_excluded_region); ajStrDel(&left_input); ajStrDel(&right_input); ajStrDel(&internal_oligo_input); AJFREE(pipeto); AJFREE(pipefrom); ajFileClose(&mispriming_library); embExit(); return 0; }
EmbPMatPrints embMatProtReadInt(AjPFile fp) { EmbPMatPrints ret; AjPStr line; ajint i; ajuint j; ajuint m; const char *p; line = ajStrNewC("#"); p = ajStrGetPtr(line); while(!*p || *p=='#' || *p=='!' || *p=='\n') { if(!ajReadlineTrim(fp,&line)) { ajStrDel(&line); return NULL; } p = ajStrGetPtr(line); } ajDebug("embMatProtReadint starting\n"); ajDebug ("Line: %S\n", line); AJNEW0 (ret); ret->cod = ajStrNew(); ajStrAssignS(&ret->cod,line); ajReadlineTrim(fp,&line); ret->acc = ajStrNew(); ajStrAssignS(&ret->acc,line); ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->n); ajReadlineTrim(fp,&line); ret->tit = ajStrNew(); ajStrAssignS(&ret->tit,line); ajDebug ("Lineb: %S\n", line); AJCNEW(ret->len, ret->n); AJCNEW(ret->max, ret->n); AJCNEW(ret->thresh, ret->n); AJCNEW(ret->matrix, ret->n); for(m=0;m<ret->n;++m) { ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->len[m]); ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->thresh[m]); ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->max[m]); ajDebug ("m: %d/%d len:%d thresh:%d max:%d\n", m, ret->n, ret->len[m], ret->thresh[m], ret->max[m]); for(i=0;i<26;++i) { AJCNEW0(ret->matrix[m][i], ret->len[m]); ajReadlineTrim(fp,&line); ajDebug ("Linec [%d][%d]: %S\n", m, i, line); p = ajStrGetPtr(line); for(j=0;j<ret->len[m];++j) { if(!j) p = ajSysFuncStrtok(p," "); else p = ajSysFuncStrtok(NULL," "); sscanf(p,"%u",&ret->matrix[m][i][j]); } } } ajReadlineTrim(fp,&line); ajDebug ("Linec: %S\n", line); ajStrDel(&line); return ret; }
int main(int argc, char **argv) { AjPFile outf = NULL; AjPSeq sequence = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr revstr = NULL; AjPStr p1; AjPStr p2; PPrimer eric = NULL; PPrimer fred = NULL; PPrimer f; PPrimer r; PPair pair; AjPList forlist = NULL; AjPList revlist = NULL; AjPList pairlist = NULL; AjBool targetrange; AjBool isDNA = ajTrue; AjBool dolist = ajFalse; ajint primerlen = 0; ajint minprimerlen = 0; ajint maxprimerlen = 0; ajint minprodlen = 0; ajint maxprodlen = 0; ajint prodlen = 0; ajint seqlen = 0; ajint stepping_value = 1; ajint targetstart = 0; ajint targetend = 0; ajint limit = 0; ajint limit2 = 0; ajint lastpos = 0; ajint startpos = 0; ajint endpos = 0; ajint begin; ajint end; ajint v1; ajint v2; ajint overlap; float minpmGCcont = 0.; float maxpmGCcont = 0.; float minprodGCcont = 0.; float maxprodGCcont = 0.; float prodTm; float prodGC; ajint i; ajint j; ajint neric=0; ajint nfred=0; ajint npair=0; float minprimerTm = 0.0; float maxprimerTm = 0.0; float saltconc = 0.0; float dnaconc = 0.0; embInit ("prima", argc, argv); substr = ajStrNew(); forlist = ajListNew(); revlist = ajListNew(); pairlist = ajListNew(); p1 = ajStrNew(); p2 = ajStrNew(); sequence = ajAcdGetSeq("sequence"); outf = ajAcdGetOutfile("outfile"); minprimerlen = ajAcdGetInt("minprimerlen"); maxprimerlen = ajAcdGetInt("maxprimerlen"); minpmGCcont = ajAcdGetFloat("minpmGCcont"); maxpmGCcont = ajAcdGetFloat("maxpmGCcont"); minprimerTm = ajAcdGetFloat("mintmprimer"); maxprimerTm = ajAcdGetFloat("maxtmprimer"); minprodlen = ajAcdGetInt("minplen"); maxprodlen = ajAcdGetInt("maxplen"); minprodGCcont = ajAcdGetFloat("minpgccont"); maxprodGCcont = ajAcdGetFloat("maxpgccont"); saltconc = ajAcdGetFloat("saltconc"); dnaconc = ajAcdGetFloat("dnaconc"); targetrange = ajAcdGetToggle("targetrange"); targetstart = ajAcdGetInt("targetstart"); targetend = ajAcdGetInt("targetend"); overlap = ajAcdGetInt("overlap"); dolist = ajAcdGetBoolean("list"); seqstr = ajSeqGetSeqCopyS(sequence); ajStrFmtUpper(&seqstr); begin = ajSeqGetBegin(sequence); end = ajSeqGetEnd(sequence); seqlen = end-begin+1; ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1); revstr = ajStrNewC(ajStrGetPtr(substr)); ajSeqstrReverse(&revstr); AJCNEW0(entropy, seqlen); AJCNEW0(enthalpy, seqlen); AJCNEW0(energy, seqlen); /* Initialise Tm calculation arrays */ ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1, &entropy, &enthalpy, &energy); ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n"); ajFmtPrintF(outf, "*************\n\n"); if(targetrange) ajFmtPrintF (outf, "Prima of %s from positions %d to %d bps\n", ajSeqGetNameC(sequence),targetstart, targetend); else ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence)); ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n"); ajFmtPrintF (outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR "); ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n"); ajFmtPrintF(outf, "Primer size range is %d-%d\n",minprimerlen,maxprimerlen); ajFmtPrintF(outf, "Primer GC content range is %.2f-%.2f\n",minpmGCcont, maxpmGCcont); ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n", minprimerTm, maxprimerTm); ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n"); ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n", minprodGCcont, maxprodGCcont); ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc); ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc); if(targetrange) ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n", targetstart,targetend); else { ajFmtPrintF(outf,"Considering all suitable Primer pairs with "); ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen, maxprodlen); } ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n"); ajFmtPrintF(outf, "*****************************************\n\n"); if(seqlen-minprimerlen < 0) ajFatal("Sequence too short"); if(targetrange) { ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin); prodGC = ajMeltGC(substr,seqlen); prodTm = ajMeltTempProd(prodGC,saltconc,seqlen); if(prodGC<minprodGCcont || prodGC>maxprodGCcont) { ajFmtPrintF(outf, "Product GC content [%.2f] outside acceptable range\n", prodGC); embExitBad(); return 0; } prima_testtarget(substr, revstr, targetstart-begin, targetend-begin, minprimerlen, maxprimerlen, seqlen, minprimerTm, maxprimerTm, minpmGCcont, maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc, dnaconc, pairlist, &npair); } if(!targetrange) { limit = seqlen-minprimerlen-minprodlen+1; lastpos = seqlen-minprodlen; limit2 = maxprodlen-minprodlen; /* Outer loop selects all possible product start points */ for(i=minprimerlen; i<limit; ++i) { startpos = i; ajDebug("Position in sequence %d\n",startpos); endpos = i+minprodlen-1; /* Inner loop selects all possible product lengths */ for(j=0; j<limit2; ++j, ++endpos) { if(endpos>lastpos) break; v1 = endpos-startpos+1; ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos); prodGC = ajMeltGC(p1,v1); prodTm = ajMeltTempProd(prodGC,saltconc,v1); if(prodGC<minprodGCcont || prodGC>maxprodGCcont) continue; /* Only accept primers with acceptable Tm and GC */ neric = 0; nfred = 0; prima_testproduct(substr, startpos, endpos, primerlen, minprimerlen, maxprimerlen,minpmGCcont, maxpmGCcont, minprimerTm, maxprimerTm, minprodlen, maxprodlen, prodTm, prodGC, seqlen, &eric,&fred,forlist,revlist,&neric,&nfred, stepping_value, saltconc,dnaconc, isDNA, begin); if(!neric) continue; /* Now reject those primers with self-complementarity */ prima_reject_self(forlist,revlist,&neric,&nfred); if(!neric) continue; /* Reject any primers that could bind elsewhere in the sequence */ prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr, seqlen); /* Now select the least complementary pair (if any) */ prima_best_primer(forlist, revlist, &neric, &nfred); if(!neric) continue; AJNEW(pair); ajListPop(forlist,(void **)&f); ajListPop(revlist,(void **)&r); pair->f = f; pair->r = r; ++npair; ajListPush(pairlist,(void *)pair); } } } if(!targetrange) { /* Get rid of primer pairs nearby the top scoring ones */ prima_TwoSortscorepos(&pairlist); prima_prune_nearby(pairlist, &npair, maxprimerlen-1); ajListSort(pairlist,prima_PosCompare); prima_check_overlap(pairlist,&npair,overlap); } if(npair) { if(!targetrange) ajFmtPrintF(outf,"%d pairs found\n\n",npair); else ajFmtPrintF(outf, "Closest primer pair to specified product is:\n\n"); if((maxprimerlen<26 && seqlen<999999 && !dolist)) ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n"); } for(i=0;i<npair;++i) { if(!targetrange) ajFmtPrintF(outf,"[%d]\n",i+1); ajListPop(pairlist,(void **)&pair); prodlen = pair->r->start - (pair->f->start + pair->f->primerlen); if((maxprimerlen<26 && seqlen<999999 && !dolist)) { v1 = pair->f->start; v2 = v1 + pair->f->primerlen -1; ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1), v2+begin); v1 = pair->r->start; v2 = v1 + pair->r->primerlen -1; ajStrAssignSubS(&p2,substr,v1,v2); ajSeqstrReverse(&p2); ajFmtPrintF(outf, "%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin); ajFmtPrintF(outf," Tm %.2f C (GC %.2f%%)\t\t ", pair->f->primerTm,pair->f->primGCcont*100.); ajFmtPrintF(outf,"Tm %.2f C (GC %.2f%%)\n", pair->r->primerTm,pair->r->primGCcont*100.); ajFmtPrintF(outf," Length: %-32dLength: %d\n", pair->f->primerlen,pair->r->primerlen); ajFmtPrintF(outf," Tma: %.2f C\t\t\t", ajAnneal(pair->f->primerTm,pair->f->prodTm)); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->r->primerTm,pair->f->prodTm)); ajFmtPrintF(outf," Product GC: %.2f%%\n", pair->f->prodGC * 100.0); ajFmtPrintF(outf," Product Tm: %.2f C\n", pair->f->prodTm); ajFmtPrintF(outf," Length: %d\n\n\n",prodlen); } else { ajFmtPrintF(outf," Product from %d to %d\n",pair->f->start+ pair->f->primerlen+begin,pair->r->start-1+begin); ajFmtPrintF(outf," Tm: %.2f C GC: %.2f%%\n", pair->f->prodTm,pair->f->prodGC*(float)100.); ajFmtPrintF(outf," Length: %d\n\n\n",prodlen); v1 = pair->f->start; v2 = v1 + pair->f->primerlen -1; ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf," Forward: 5' %s 3'\n",ajStrGetPtr(p1)); ajFmtPrintF(outf," Start: %d\n",v1+begin); ajFmtPrintF(outf," End: %d\n",v2+begin); ajFmtPrintF(outf," Tm: %.2f C\n", pair->f->primerTm); ajFmtPrintF(outf," GC: %.2f%%\n", pair->f->primGCcont*(float)100.); ajFmtPrintF(outf," Len: %d\n", pair->f->primerlen); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->f->primerTm,pair->f->prodTm)); v1 = pair->r->start; v2 = v1 + pair->r->primerlen -1; ajStrAssignSubS(&p2,substr,v1,v2); ajSeqstrReverse(&p2); ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf," Reverse: 5' %s 3'\n",ajStrGetPtr(p1)); ajFmtPrintF(outf," Start: %d\n",v1+begin); ajFmtPrintF(outf," End: %d\n",v2+begin); ajFmtPrintF(outf," Tm: %.2f C\n", pair->r->primerTm); ajFmtPrintF(outf," GC: %.2f%%\n", pair->r->primGCcont*(float)100.); ajFmtPrintF(outf," Len: %d\n", pair->r->primerlen); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->r->primerTm,pair->f->prodTm)); } prima_PrimerDel(&pair->f); prima_PrimerDel(&pair->r); AJFREE(pair); } ajStrDel(&seqstr); ajStrDel(&revstr); ajStrDel(&substr); ajStrDel(&p1); ajStrDel(&p2); ajListFree(&forlist); ajListFree(&revlist); ajListFree(&pairlist); ajFileClose(&outf); ajSeqDel(&sequence); AJFREE(entropy); AJFREE(enthalpy); AJFREE(energy); embExit(); return 0; }
int main(int argc, char **argv) { AjPAlign align; AjPSeqall seqall; AjPSeq a; AjPSeq b; AjPStr alga; AjPStr algb; AjPStr ss; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; ajint *compass; float* ix; float* iy; float* m; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; float endgapopen; float endgapextend; size_t maxarr = 1000; /* arbitrary. realloc'd if needed */ size_t len; float score; AjBool dobrief = ajTrue; AjBool endweight = ajFalse; /* should end gap penalties be applied */ float id = 0.; float sim = 0.; float idx = 0.; float simx = 0.; AjPStr tmpstr = NULL; embInit("needle", argc, argv); matrix = ajAcdGetMatrixf("datafile"); a = ajAcdGetSeq("asequence"); ajSeqTrim(a); seqall = ajAcdGetSeqall("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); endgapopen = ajAcdGetFloat("endopen"); endgapextend = ajAcdGetFloat("endextend"); dobrief = ajAcdGetBoolean("brief"); endweight = ajAcdGetBoolean("endweight"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW0(compass, maxarr); AJCNEW0(m, maxarr); AJCNEW0(ix, maxarr); AJCNEW0(iy, maxarr); alga = ajStrNew(); algb = ajStrNew(); ss = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); lena = ajSeqGetLen(a); while(ajSeqallNext(seqall,&b)) { ajSeqTrim(b); lenb = ajSeqGetLen(b); if(lenb > (LONG_MAX/(size_t)(lena+1))) ajDie("Sequences too big. Try 'stretcher'"); len = (size_t)lena*(size_t)lenb; if(len>maxarr) { AJCRESIZETRY0(compass,(size_t)maxarr,len); if(!compass) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY0(m,(size_t)maxarr,len); if(!m) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY0(ix,(size_t)maxarr,len); if(!ix) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY0(iy,(size_t)maxarr,len); if(!iy) ajDie("Sequences too big. Try 'stretcher'"); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&alga,""); ajStrAssignC(&algb,""); score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb, gapopen, gapextend, endgapopen, endgapextend, &start1, &start2, sub, cvt, m, ix, iy, compass, ajFalse, endweight); embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb, lena, lenb, &start1, &start2, compass); embAlignReportGlobal(align, a, b, alga, algb, start1, start2, gapopen, gapextend, score, matrix, ajSeqGetOffset(a), ajSeqGetOffset(b)); if(!dobrief) { embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,&sim,&idx, &simx); ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n", id); ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n", sim); ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n", idx); ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%", simx); ajAlignSetSubHeaderApp(align, tmpstr); } ajAlignWrite(align); ajAlignReset(align); } ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); ajSeqDel(&a); ajSeqDel(&b); AJFREE(compass); AJFREE(ix); AJFREE(iy); AJFREE(m); ajStrDel(&alga); ajStrDel(&algb); ajStrDel(&ss); ajStrDel(&tmpstr); embExit(); return 0; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPFile libr=NULL; AjPStr idformat = NULL; EmbPEntry entry; ajuint idtype = 0; ajuint idCount = 0; ajuint idDone; AjPList listInputFiles = NULL; void ** inputFiles = NULL; ajuint nfiles; ajuint ifile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; AjPRegexp regIdExp = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i; embInit("dbifasta", argc, argv); idformat = ajAcdGetListSingle("idformat"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint)maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); regIdExp = dbifasta_getExpr(idformat, &idtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listInputFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listInputFiles, &ajStrVcmp); nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) inputFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ for(ifile=0; ifile < nfiles; ifile++) { ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]); embDbiFlatOpenlib(curfilename, &libr); ajFilenameTrimPath(&curfilename); if(ajStrGetLen(curfilename) >= maxfilelen) maxfilelen = ajStrGetLen(curfilename) + 1; ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%F' ...\n", libr); ajStrAssignS(&divfiles[ifile], curfilename); if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, ifile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbifasta_NextFlatEntry(libr, ifile, regIdExp, idtype, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ embDbiMemEntry(idlist, fieldList, nfields, entry, ifile); entry = NULL; } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); AJFREE(entry); } else { embDbiEntryDel(&dbifastaGEntry); } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); } /* write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajStrDel(&idformat); ajStrDelarray(&fields); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&tmpfname); ajFileClose(&libr); ajFileClose(&logfile); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], &embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); AJFREE(maxFieldLen); AJFREE(countField); AJFREE(fieldTot); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(inputFiles); embDbiEntryDel(&dbifastaGEntry); ajStrDel(&dbifastaGRline); ajStrDel(&dbifastaGTmpId); if(dbifastaGFdl) { for(i=0; i < nfields; i++) ajListFree(&dbifastaGFdl[i]); AJFREE(dbifastaGFdl); } ajListMap(idlist, &embDbiEntryDelMap, NULL); ajListFree(&idlist); ajListstrFreeData(&listInputFiles); AJFREE(entryIds); ajRegFree(&dbifastaGIdexp); ajRegFree(&dbifastaGWrdexp); ajRegFree(®IdExp); ajStrDel(&dbifastaGTmpAc); ajStrDel(&dbifastaGTmpSv); ajStrDel(&dbifastaGTmpGi); ajStrDel(&dbifastaGTmpDb); ajStrDel(&dbifastaGTmpDes); ajStrDel(&dbifastaGTmpFd); ajStrDel(&curfilename); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }