int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPCod codon; AjPStr substr; ajint beg; ajint end; ajint ccnt; embInit("cusp", argc, argv); seqall = ajAcdGetSeqall("sequence"); outf = ajAcdGetOutfile("outfile"); ccnt = 0; substr = ajStrNew(); codon = ajCodNewCodenum(0); ajCodSetNameS(codon, ajFileGetPrintnameS(outf)); while(ajSeqallNext(seqall, &seq)) { beg = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1); ajCodSetTripletsS(codon,substr,&ccnt); } ajCodCalcUsage(codon,ccnt); ajCodSetDescC(codon, "CUSP codon usage file"); ajCodWrite(codon, outf); ajFileClose(&outf); ajStrDel(&substr); ajCodDel(&codon); ajSeqallDel(&seqall); ajSeqDel(&seq); embExit(); return 0; }
int main(ajint argc, char **argv) { AjPList pdb_path =NULL; /* Path of pdb files */ AjPStr pdb_name =NULL; /* Name of pdb file */ AjPDirout ccf_path =NULL; /* Path of ccf files */ AjPStr ccf_name =NULL; /* Name of ccf file */ AjPStr pdbid =NULL; /* PDB code */ AjPStr pdbid_temp =NULL; /* PDB code */ AjBool ccfnaming =ajFalse; /* True == use the pdbid code to name the output file, False== use the name of the original pdb file*/ /* Mask non-amino acid groups in protein chains that do not contain a C-alpha atom. The group will not appear in either the CO or SQ records of the clean coordinate file */ AjBool camask =ajFalse; /* Mask amino acids in protein chains that do not contain a C-alpha atom. The amino acid will appear not appear in the CO record but will still be present in the SQ record of the clean coordinate file */ AjBool camask1 =ajFalse; /* Mask residues or groups in protein chains with a single atom only */ AjBool atommask =ajFalse; AjPStr temp =NULL; /* Temp string */ AjPStr msg =NULL; /* Error message */ AjPStr base_name =NULL; /* Name of pdb file w/o path or extension */ AjPFile pdb_inf =NULL; /* pdb input file pointer */ AjPFile ccf_outf =NULL; /* ccf output file pointer */ AjPFile logf =NULL; /* log file pointer*/ AjPPdb pdb =NULL; /* Pdb structure (for parsed data) */ ajint min_chain_size=0; /* Minimum length of a SEQRES chain for it to be parsed */ ajint max_mismatch=0; /* Max. no. residues to trim when checking for missing C-terminal SEQRES residues. */ ajint max_trim=0; /* Maximum number of permissible mismatches between the ATOM and SEQRES sequences */ ajint pos =0; /* Location of the file extension in the pdb file name */ /* THIS_DIAGNOSTIC tempstr=ajStrNew(); ajStrAssignC(&tempstr, "diagnostics"); tempfile=ajFileNewOutNameS(tempstr); ajStrDel(&tempstr);*/ /* Initialise strings */ ccf_name = ajStrNew(); pdb_name = ajStrNew(); temp = ajStrNew(); msg = ajStrNew(); base_name = ajStrNew(); pdbid = ajStrNew(); pdbid_temp = ajStrNew(); /* Read data from acd */ embInitPV("pdbparse",argc,argv,"STRUCTURE",VERSION); pdb_path = ajAcdGetDirlist("pdbpath"); ccf_path = ajAcdGetOutdir("ccfoutdir"); logf = ajAcdGetOutfile("logfile"); min_chain_size=ajAcdGetInt("chnsiz"); max_mismatch =ajAcdGetInt("maxmis"); max_trim =ajAcdGetInt("maxtrim"); ccfnaming = ajAcdGetBoolean("ccfnaming"); camask = ajAcdGetBoolean("camask"); camask1 = ajAcdGetBoolean("camaska"); atommask = ajAcdGetBoolean("atommask"); /* Check directories*/ /*Start of main application loop*/ while(ajListPop(pdb_path,(void **)&temp)) { ajFmtPrint("Processing %S\n", temp); ajFmtPrintF(logf, "%S\n", temp); /* Read pdb file*/ if((pdb_inf=ajFileNewInNameS(temp))==NULL) { ajFmtPrintS(&msg, "Could not open for reading %S ", temp); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", temp); ajStrDel(&temp); continue; } /* Assign pdb id code from file name */ ajStrAssignS(&pdbid, temp); ajFilenameTrimPathExt(&pdbid); if(MAJSTRGETLEN(pdbid)>4) { /* The file name is longer than expected (and probably contains a prefix). Take the last four characters to be the pdbid code */ ajStrAssignSubS(&pdbid_temp, pdbid, pos-4, pos-1); ajStrAssignS(&pdbid, pdbid_temp); } else if(MAJSTRGETLEN(pdbid)<4) ajFatal("Could not determine pdbid code from file name (%S)", pdbid); /* Parse pdb file and write pdb structure */ if(!(pdb=ajPdbReadRawNew(pdb_inf, pdbid, min_chain_size, max_mismatch, max_trim, camask, camask1, atommask, logf))) { ajFmtPrintS(&msg, "Clean coordinate file not generated for %S", temp); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "NO_OUTPUT", temp); ajFileClose(&pdb_inf); ajStrDel(&temp); continue; } /* Open clean coordinate file for writing*/ if(ccfnaming) ajStrAssignS(&ccf_name, pdb->Pdb); else ajStrAssignS(&ccf_name, temp); ajStrFmtLower(&ccf_name); if(!(ccf_outf=ajFileNewOutNameDirS(ccf_name, ccf_path))) { ajFmtPrintS(&msg, "Could not open %S for writing", ccf_name); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", ccf_name); ajFileClose(&pdb_inf); ajPdbDel(&pdb); ajStrDel(&temp); continue; } /* Write pdb file */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajFmtPrintS(&msg, "Could not write file %S", ccf_name); ajWarn(ajStrGetPtr(msg)); ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_WRITE", ccf_name); ajFmtPrintS(&temp, "rm %S", ccf_name); ajFmtPrint("%S", temp); ajSysSystem(temp); ajFileClose(&pdb_inf); ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&temp); continue; } /* Tidy up*/ ajFileClose(&pdb_inf); ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&temp); ajFmtPrintF(logf, "//\n"); } /*End of main application loop*/ /*Tidy up */ ajListFree(&pdb_path); ajStrDel(&pdb_name); ajDiroutDel(&ccf_path); ajStrDel(&ccf_name); ajStrDel(&base_name); ajStrDel(&pdbid); ajStrDel(&pdbid_temp); ajStrDel(&msg); ajFileClose(&logf); /* DIAGNOSTIC ajFileClose(&tempfile); */ /* Return */ ajExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPCod codon; AjPStr substr; AjBool sum; ajint ccnt; ajint beg; ajint end; float Nc; double td; embInit("chips", argc, argv); seqall = ajAcdGetSeqall("seqall"); outf = ajAcdGetOutfile("outfile"); sum = ajAcdGetBoolean("sum"); codon = ajCodNewCodenum(0); ccnt = 0; substr = ajStrNew(); while(ajSeqallNext(seqall, &seq)) { beg = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1); ajStrFmtUpper(&substr); ajCodSetTripletsS(codon,substr,&ccnt); if(!sum) { ajCodCalcUsage(codon,ccnt); td = ajCodCalcNc(codon); Nc = (float) td; ajFmtPrintF(outf,"%-20s Nc = %.3f\n",ajSeqGetNameC(seq),Nc); ajCodClearData(codon); } } if(sum) { ajCodCalcUsage(codon,ccnt); td = ajCodCalcNc(codon); Nc = (float) td; ajFmtPrintF(outf,"# CHIPS codon usage statistics\n\n"); ajFmtPrintF(outf,"Nc = %.3f\n",Nc); } ajFileClose(&outf); ajCodDel(&codon); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); embExit(); return 0; }
int main(int argc, char **argv) { /* ** All pointers set to NULL for safety. ** Variables names and initialisation values aligned for clarity. */ AjBool boo = ajFalse; ajint n1 = 0; ajint n2 = 0; ajlong l1 = 0; /* long int */ float f1 = 0.0; double d1 = 0.0; /* there is no long double */ size_t size = 100; /* Reserved memory size. Could be any value you know in advance. */ embInit("demostringnew", argc, argv); demostringnew_msg("/* Starting string values */"); /* Functions with the prefix ajStr are for manipulating EMBOSS strings. Functions with the prefix ajChar are for manipulating C-type (char*) string See filesection and datasection sections in ajstr.c */ /* ** String constructor functions ** See "@section constructors" in ajstr.c */ /* Construct a new string with no starting value or reserved size. There is no equivlent function for C-type (char*) strings */ str0 = ajStrNew (); /* Construct a new string with a reserved size but no starting value */ txt1 = ajCharNewRes(size); str1 = ajStrNewRes (size); /* Construct a new C-type (char*) string with a starting value ... */ txt2 = ajCharNewC ("Starting value"); /* ... copied from a C-type (char*) string */ str2 = ajStrNewC (txt2); /* ... copied from a C-type (char*) string */ txt3 = ajCharNewS (str2); /* ... copied from a string */ str3 = ajStrNewS (str2); /* ... copied from a string */ /* Construct a new string with a reserved size and starting value ... */ txt4 = ajCharNewResC("Starting value, reserved size", size); /* ... copied from a C-type (char*) string)*/ str4 = ajStrNewResC (txt4, size); /* ... copied from a C-type (char*) string */ /* or str4 = ajStrNewResLenC(txt4, size, strlen(txt4)); to specify string length */ txt5 = ajCharNewResS(str4, size); /* ... copied from a string */ str5 = ajStrNewResS (str4, size); /* ... copied from a string */ demostringnew_msg("/* After string constructor functions */"); /* ** String destructor functions ** See "@section destructors" in ajstr.c) */ /* Destruct a string */ ajCharDel(&txt1); ajCharDel(&txt2); ajCharDel(&txt3); ajCharDel(&txt4); ajCharDel(&txt5); ajStrDel (&str0); ajStrDel (&str1); ajStrDel (&str3); ajStrDel (&str5); /* str2 & str4 still in memory */ demostringnew_msg("/* After string destructor functions */"); /* ** String (de)referencing functions ** See "@section destructors" in ajstr.c) */ str0 = ajStrNewRef(str2); /* or ajStrAssignRef(&str0, str2); */ demostringnew_msg("/* After string reference */"); ajStrDelStatic(&str0); demostringnew_msg("/* After string dereference */"); /* ** String assignment functions ** See "@section assignment" in ajstr.c) */ /* Still only str2 & str4 in memory */ /* Assign a string value using ... */ ajStrAssignC(&str1, "Assigned value"); /* ... a C-type (char*) string */ /* or ajStrAssignLenC(&str1, "Assigned value", strlen("Assigned value")); to specify string length. */ ajStrAssignS(&str3, str1); /* ... a string */ ajStrAssignK(&str5, 'A'); /* ... a character */ demostringnew_msg("/* After string assignment 1 */"); ajStrAssignSubC(&str1, "Assigned value", 0, 11); ajStrAssignSubS(&str3, str1, 0, 9); demostringnew_msg("/* After string assignment 2 */"); /* The assignment functions allocate memory if necessary so str1, str3 and str5 will be created for you. It's bad practice to use this mechanism however because it's not obvious the string has been allocated (and needs freeing). Much cleaner to call the construct (ajStrNew) explicitly. */ /* Assign a string with a reserved size and value using ... */ ajStrAssignResC(&str1, size, "Assigned value, reserved size"); /* ... a C-type (char*) string */ ajStrAssignResS(&str3, size, str1); /* ... a string */ demostringnew_msg("/* After string assignment 3 */"); /* Assign a string value only if the string is empty using ... */ str0 = ajStrNew(); ajStrAssignEmptyC(&str0, "New value if string was empty"); /* ... a C-type (char*) string */ ajStrAssignEmptyS(&str1, str0); /* ... a string */ demostringnew_msg("/* After string assignment 4 */"); /* Now str0-5 in memory. The above code is for illustrative purposes: it's much cleaner to put all the constructors / destructors at the top / bottom of the code where possible. */ /* Assign all strings intuitive values */ txt0 = ajCharNewResC("TEXT 0", 100); txt1 = ajCharNewResC("TEXT 1", 100); txt2 = ajCharNewResC("Text 2", 100); txt3 = ajCharNewResC("Text 3", 100); txt4 = ajCharNewResC("Text 4", 100); txt5 = ajCharNewResC("Text 5", 100); ajStrAssignC(&str0, "STRING 0"); ajStrAssignC(&str1, "STRING 1"); ajStrAssignC(&str2, "String 2"); ajStrAssignC(&str3, "String 3"); ajStrAssignC(&str4, "String 4 WITHSOMETEXTINABLOCK"); ajStrAssignC(&str5, "String 5 WITHSOMETEXTINABLOCK"); demostringnew_msg("/* After string assignment 5 */"); /* ** String formatting functions ** See "@section formatting" in ajstr.c */ ajCharFmtLower(txt0); ajCharFmtLower(txt1); ajStrFmtLower(&str0); ajStrFmtLowerSub(&str1, 0, 2); ajCharFmtUpper(txt2); ajCharFmtUpper(txt3); ajStrFmtUpper(&str2); ajStrFmtUpperSub(&str3, 0, 2); demostringnew_msg("/* After string formatting 1 */"); ajStrFmtTitle(&str0); ajStrFmtQuote(&str1); ajStrFmtBlock(&str4, 3); demostringnew_msg("/* After string formatting 2 */"); /* See also ajStrFmtWrap, ajStrFmtWrapLeft ... these need checking. */ /* ** String conversion functions ** See "@section datatype to string conversion" in ajstr.c */ n1 = n2 = l1 = 1; f1 = d1 = 0.5; ajStrFromBool( &str0, boo); ajStrFromInt(&str1, n1); ajStrFromLong(&str2, l1); ajStrFromFloat(&str3, f1, 5); ajStrFromDouble(&str4, d1, 5); ajStrFromDoubleExp(&str5, d1, 5); demostringnew_msg("/* After datatype to string conversion */"); /* ** String conversion functions ** See "@section string to datatype conversion" in ajstr.c */ ajStrToBool(str0, &boo); ajStrToInt(str1, &n1); ajStrToLong(str2, &l1); ajStrToDouble(str4, &d1); ajUser("/* After string to datatype conversion */\n" "boo (from str0): %B\nn1 (from str1): %d\nl1 (from str2): %d", boo, n1, l1); ajFmtPrint("f1 (from str3): %f\nd1 (from str4): %f\n", f1, d1); /* Check ajUser ... doesn't support %f */ /* See also ajStrToHex */ /* Assign all strings new values */ strcpy(txt0, "Text String"); strcpy(txt1, "TEXT STRING"); strcpy(txt2, "Text*"); strcpy(txt3, "Text"); strcpy(txt4, "Text String 4"); strcpy(txt5, "Text String 5"); ajStrAssignC(&str0, "String"); ajStrAssignC(&str1, "STRING"); ajStrAssignC(&str2, "String*"); ajStrAssignC(&str3, "*String"); ajStrAssignC(&str4, "String 4"); ajStrAssignC(&str5, "String 5"); demostringnew_msg("/* After resetting strings */"); /* ** String comparison functions ** See "@section comparison" in ajstr.c */ ajUserDumpC("/* String comparison functions */"); boo = ajCharMatchC(txt0, txt1); ajUser("ajCharMatchC(txt0 txt1); == %B", boo); boo = ajCharMatchCaseC(txt0, txt1); ajUser("ajCharMatchCaseC(txt0 txt1); == %B", boo); boo = ajCharMatchC(txt0, txt2); ajUser("ajCharMatchC(txt0,txt2); == %B", boo); boo = ajCharMatchWildC(txt0, txt2); ajUser("ajCharMatchWildC(txt0,txt2); == %B", boo); boo = ajCharMatchWildS(txt0, str2); ajUser("ajCharMatchWildS(txt0,str2); == %B", boo); /* See also ajCharMatchWildNextC, ajCharMatchWildWordC ... these need checking & documentation updated. */ boo = ajCharPrefixC(txt0, txt3); ajUser("ajCharPrefixC(txt0, txt3); == %B", boo); boo = ajCharPrefixS(txt0, str0); ajUser("ajCharPrefixS(txt0, str0); == %B", boo); boo = ajCharPrefixCaseC(txt5, txt1); ajUser("ajCharPrefixCaseC(txt5, txt1); == %B", boo); boo = ajCharPrefixCaseC(txt1, txt5); ajUser("ajCharPrefixCaseC(txt1, txt5); == %B", boo); boo = ajCharPrefixCaseS(txt0, str0); ajUser("ajCharPrefixCaseS(txt0, str0); == %B", boo); boo = ajCharSuffixC(txt0, txt3); ajUser("ajCharSuffixC(txt0, txt3); === %B", boo); boo = ajCharSuffixS(txt0, str0); ajUser("ajCharSuffixS(txt0, str0); == %B", boo); /* See also ajCharSuffixCaseC, ajCharSuffixCaseC, ajCharSuffixCaseS, ajCharSuffixCaseS ... these need checking. */ boo = ajStrMatchC (str0, txt0); ajUser("ajStrMatchC (str0, txt0); == %B", boo); boo = ajStrMatchS(str0, str1); ajUser("ajStrMatchS(str0, str1); == %B", boo); boo = ajStrMatchCaseC(str0, txt0); ajUser("ajStrMatchCaseC(str0, txt0); == %B", boo); boo = ajStrMatchCaseS(str0, str0); ajUser("ajStrMatchCaseS(str0, str0); == %B", boo); /* ajUser("== %B", boo); boo = ajStrMatchWildC(str2, const char* text); ajStrMatchWildS (const AjPStr thys, const AjPStr wild); ajStrMatchWildWordC (const AjPStr str, const char* text); ajStrMatchWildWordS (const AjPStr str, const AjPStr text); ajStrPrefixC(const AjPStr str, const char* txt2); ajStrPrefixS(const AjPStr str, const AjPStr str2); ajStrPrefixCaseC (const AjPStr str, const char* pref); ajStrPrefixCaseS (const AjPStr str, const AjPStr pref); ajStrSuffixC (const AjPStr thys, const char* suff); ajStrSuffixS (const AjPStr thys, const AjPStr suff); */ /**************************************************************************/ /* String substitution functions (See "@section substitution" in ajstr.c) */ /**************************************************************************/ /* AjBool ajStrExchangeCC(AjPStr* Pstr, const char* txt, const char* txtnew); AjBool ajStrExchangeCS(AjPStr* Pstr, const char* txt, const AjPStr strnew); AjBool ajStrExchangeKK(AjPStr* Pstr, char chr, char chrnew); AjBool ajStrExchangeSC(AjPStr* Pstr, const AjPStr str, const char* txtnew); AjBool ajStrExchangeSS(AjPStr* Pstr, const AjPStr str, const AjPStr strnew); AjBool ajStrExchangeSetCC(AjPStr* Pstr, const char* oldc, const char* newc); AjBool ajStrExchangeSetSS(AjPStr* Pstr, const AjPStr str, const AjPStr strnew); AjBool ajStrRandom(AjPStr *s); AjBool ajStrReverse(AjPStr* Pstr); */ embExit(); return 0; }
AjPStr embScopToPdbid(const AjPStr scop, AjPStr *pdb) { ajStrAssignSubS(pdb, scop, 1, 4); return *pdb; }
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternSeq pat, AjBool reverse) { const void *tidy; ajuint hits; ajuint i; AjPPatComp pattern; EmbPMatMatch m = NULL; AjPFeature sf = NULL; AjPSeq revseq = NULL; AjPList list = ajListNew(); AjPStr seqstr = ajStrNew(); AjPStr seqname = ajStrNew(); AjPStr tmp = ajStrNew(); ajint adj; ajint begin; AjBool isreversed; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); begin = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); ajStrAssignS(&seqname,ajSeqGetNameS(seq)); pattern = ajPatternSeqGetCompiled(pat); if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), begin-1,adj-1); ajSeqstrReverse(&seqstr); } else ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), begin-1,adj-1); ajStrFmtUpper(&seqstr); /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n" "'%S'\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq), seqstr);*/ ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n", pattern->pattern, pat->Protein, reverse); embPatFuzzSearchII(pattern,begin,seqname,seqstr,list, ajPatternSeqGetMismatch(pat),&hits,&tidy); ajDebug ("embPatternSeqSearch: found %d hits\n",hits); if(!reverse) ajListReverse(list); for(i=0;i<hits;++i) { ajListPop(list,(void **)&m); if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - m->start - m->len + begin + 1, adj - m->start + begin, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, m->start, m->start + m->len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, m->start, m->start + m->len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); /* ajUser("isrev: %B reverse: %B begin: %d adj: %d " "start: %d len: %d seqlen: %d %d..%d '%c'\n", isreversed, reverse, begin, adj, m->start, m->len, seqlen, sf->Start, sf->End, sf->Strand); */ ajFeatSetScore(sf, (float) (m->len - m->mm)); ajFmtPrintS(&tmp, "*pat %S: %S", ajPatternSeqGetName(pat), ajPatternSeqGetPattern(pat)); ajFeatTagAdd(sf,NULL,tmp); if(m->mm) { ajFmtPrintS(&tmp, "*mismatch %d", m->mm); ajFeatTagAdd(sf, NULL, tmp); } embMatMatchDel(&m); } ajStrDel(&seqname); ajStrDel(&seqstr); ajStrDel(&tmp); ajListFree(&list); if (reverse) ajSeqDel(&revseq); return; }
static void restover_printHits(const AjPSeq seq, const AjPStr seqcmp, AjPFile outf, AjPList l, const AjPStr name, ajint hits, ajint begin, ajint end, ajint mincut, ajint maxcut, AjBool plasmid, ajint sitelen, AjBool limit, const AjPTable table, AjBool alpha, AjBool frags, AjBool html) { EmbPMatMatch m = NULL; AjPStr ps = NULL; ajint *fa = NULL; ajint *fx = NULL; ajint fc = 0; ajint fn = 0; ajint fb = 0; ajint last = 0; AjPStr overhead = NULL; const AjPStr value = NULL; ajint i; ajint c = 0; ajint hang1; ajint hang2; ps = ajStrNew(); fn = 0; if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Restrict of %S from %d to %d\n",name,begin,end); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"#\n"); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Minimum cuts per enzyme: %d\n",mincut); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Maximum cuts per enzyme: %d\n",maxcut); if(html) ajFmtPrintF(outf,"<BR>"); ajFmtPrintF(outf,"# Minimum length of recognition site: %d\n", sitelen); if(html) ajFmtPrintF(outf,"<BR>"); hits = embPatRestrictRestrict(l,hits,!limit,alpha); if(frags) { fa = AJALLOC(hits*2*sizeof(ajint)); fx = AJALLOC(hits*2*sizeof(ajint)); } ajFmtPrintF(outf,"# Number of hits with any overlap: %d\n",hits); if(html) ajFmtPrintF(outf,"<BR>"); if(html) ajFmtPrintF(outf,"</p><table border cellpadding=4 " "bgcolor=\"#FFFFF0\">\n"); if(html) ajFmtPrintF(outf, "<th>Base Number</th><th>Enzyme</th><th>Site</th>" "<th>5'</th><th>3'</th><th>[5'</th><th>3']</th>\n"); else ajFmtPrintF(outf,"# Base Number\tEnzyme\t\tSite\t\t5'\t3'\t" "[5'\t3']\n"); for(i=0;i<hits;++i) { ajListPop(l,(void **)&m); ajDebug("hit %d start:%d cut1:%d cut2:%d\n", i, m->start, m->cut1, m->cut2); hang1 = (ajint)m->cut1 - (ajint)m->start; hang2 = (ajint)m->cut2 - (ajint)m->start; if(!plasmid && (hang1>100 || hang2>100)) { embMatMatchDel(&m); continue; } if(limit) { value=ajTableFetchS(table,m->cod); if(value) ajStrAssignS(&m->cod,value); } if(m->cut2 >= m->cut1) ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut1, m->cut2-1); else { ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut2, m->cut1-1); ajStrReverse(&overhead); } ajDebug("overhead:%S seqcmp:%S\n", overhead, seqcmp); /* Print out only those who have the same overhang. */ if(ajStrMatchCaseS(overhead, seqcmp)) { if(html) { ajFmtPrintF(outf, "<tr><td>%-d</td><td>%-16s</td><td>%-16s" "</td><td>%d</td><td>%d</td></tr>\n", m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat), m->cut1,m->cut2); } else ajFmtPrintF(outf,"\t%-d\t%-16s%-16s%d\t%d\t\n", m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat), m->cut1,m->cut2); } if(frags) fa[fn++] = m->cut1; if(m->cut3 || m->cut4) { if(m->cut4 >= m->cut3) ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut3, m->cut4-1); else { ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut4, m->cut3-1); ajStrReverse(&overhead); } if(ajStrMatchCaseS(overhead, seqcmp)) { if(html) ajFmtPrintF(outf, "<tr><td>%-d</td><td>%-16s</td><td>%-16s" "</td><td></td><td></td><td>%d</td><td>%d" "</td></tr>\n", m->start,ajStrGetPtr(m->cod), ajStrGetPtr(m->pat), m->cut1,m->cut2); else ajFmtPrintF(outf,"\t%-d\t%-16s%-16s\t\t%d\t%d\t\n", m->start,ajStrGetPtr(m->cod), ajStrGetPtr(m->pat), m->cut1,m->cut2); } } /* I am not sure what fragments are doing so I left it in ...*/ /* used in the report tail in restrict - restover does much the same */ if(m->cut3 || m->cut4) { if(frags) fa[fn++] = m->cut3; /* ajFmtPrintF(*outf,"%d\t%d",m->cut3,m->cut4);*/ } ajStrDel(&overhead); embMatMatchDel(&m); } if(frags) { ajSortIntInc(fa,fn); ajFmtPrintF(outf,"\n\nFragment lengths:\n"); if(!fn || (fn==1 && plasmid)) ajFmtPrintF(outf," %d\n",end-begin+1); else { last = -1; fb = 0; for(i=0;i<fn;++i) { if((c=fa[i])!=last) fa[fb++]=c; last = c; } fn = fb; /* Calc lengths */ for(i=0;i<fn-1;++i) fx[fc++] = fa[i+1]-fa[i]; if(!plasmid) { fx[fc++] = fa[0]-begin+1; fx[fc++] = end-fa[fn-1]; } else fx[fc++] = (fa[0]-begin+1)+(end-fa[fn-1]); ajSortIntDec(fx,fc); for(i=0;i<fc;++i) ajFmtPrintF(outf," %d\n",fx[i]); } AJFREE(fa); AJFREE(fx); } ajStrDel(&ps); if(html) ajFmtPrintF(outf,"</table>\n"); return; }
int main(int argc, char **argv) { AjPSeq seq; AjPGraph graph = 0; AjPFile outf = NULL; AjPFile file = NULL; AjPStr buffer = NULL; float twist[4][4][4]; float roll[4][4][4]; float tilt[4][4][4]; float rbend; float rcurve; float bendscale; float curvescale; float twistsum = (float) 0.0; float pi = (float) 3.14159; float pifac = (pi/(float) 180.0); float pi2 = pi/(float) 2.0; ajint *iseq = NULL; float *x; float *y; float *xave; float *yave; float *curve; float *bend; const char *ptr; ajint i; ajint ii; ajint k; ajint j; char residue[2]; float maxbend; float minbend; float bendfactor; float maxcurve; float mincurve; float curvefactor; float fxp; float fyp; float yincr; float yy1; ajint ixlen; ajint iylen; ajint ixoff; ajint iyoff; float ystart; float defheight; float currentheight; ajint count; ajint portrait = 0; ajint title = 0; ajint numres; ajint ibeg; ajint iend; ajint ilen; AjPStr sstr = NULL; ajint ipos; float dx; float dy; float rxsum; float rysum; float yp1; float yp2; double td; embInit("banana", argc, argv); seq = ajAcdGetSeq("sequence"); file = ajAcdGetDatafile("anglesfile"); outf = ajAcdGetOutfile("outfile"); graph = ajAcdGetGraph("graph"); numres = ajAcdGetInt("residuesperline"); ibeg = ajSeqGetBegin(seq); iend = ajSeqGetEnd(seq); ajStrAssignSubS(&sstr, ajSeqGetSeqS(seq), ibeg-1, iend-1); ilen = ajStrGetLen(sstr); AJCNEW0(iseq,ilen+1); AJCNEW0(x,ilen+1); AJCNEW0(y,ilen+1); AJCNEW0(xave,ilen+1); AJCNEW0(yave,ilen+1); AJCNEW0(curve,ilen+1); AJCNEW0(bend,ilen+1); ptr= ajStrGetPtr(sstr); for(ii=0;ii<ilen;ii++) { if(*ptr=='A' || *ptr=='a') iseq[ii+1] = 0; else if(*ptr=='T' || *ptr=='t') iseq[ii+1] = 1; else if(*ptr=='G' || *ptr=='g') iseq[ii+1] = 2; else if(*ptr=='C' || *ptr=='c') iseq[ii+1] = 3; else ajErr("%c is not an ATCG hence not valid",*ptr); ptr++; } if(!file) ajErr("Banana failed to open angle file"); ajReadline(file,&buffer); /* 3 junk lines */ ajReadline(file,&buffer); ajReadline(file,&buffer); for(k=0;k<4;k++) for(ii=0;ii<4;ii++) { if(ajReadline(file,&buffer)) { sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f", &twist[ii][0][k],&twist[ii][1][k],&twist[ii][2][k], &twist[ii][3][k]); } else ajErr("Error reading angle file"); for(j=0;j<4;j++) twist[ii][j][k] *= pifac; } for(k=0;k<4;k++) for(ii=0;ii<4;ii++) if(ajReadline(file,&buffer)) { sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&roll[ii][0][k], &roll[ii][1][k],&roll[ii][2][k],&roll[ii][3][k]); } else ajErr("Error reading angle file"); for(k=0;k<4;k++) for(ii=0;ii<4;ii++) if(ajReadline(file,&buffer)) sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&tilt[ii][0][k], &tilt[ii][1][k],&tilt[ii][2][k],&tilt[ii][3][k]); else ajErr("Error reading angle file"); if(ajReadline(file,&buffer)) sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&rbend,&rcurve, &bendscale,&curvescale); else ajErr("Error reading angle file"); ajFileClose(&file); ajStrDel(&buffer); for(ii=1;ii<ilen-1;ii++) { twistsum += twist[iseq[ii]][iseq[ii+1]][iseq[ii+2]]; dx = (roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum)) + (tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum-pi2)); dy = roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum) + tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum-pi2); x[ii+1] = x[ii]+dx; y[ii+1] = y[ii]+dy; } for(ii=6;ii<ilen-6;ii++) { rxsum = 0.0; rysum = 0.0; for(k=-4;k<=4;k++) { rxsum+=x[ii+k]; rysum+=y[ii+k]; } rxsum+=(x[ii+5]*(float)0.5); rysum+=(y[ii+5]*(float)0.5); rxsum+=(x[ii-5]*(float)0.5); rysum+=(y[ii-5]*(float)0.5); xave[ii] = rxsum*(float)0.1; yave[ii] = rysum*(float)0.1; } for(i=(ajint)rbend+1;i<=ilen-(ajint)rbend-1;i++) { td = sqrt(((x[i+(ajint)rbend]-x[i-(ajint)rbend])* (x[i+(ajint)rbend]-x[i-(ajint)rbend])) + ((y[i+(ajint)rbend]-y[i-(ajint)rbend])* (y[i+(ajint)rbend]-y[i-(ajint)rbend]))); bend[i] = (float) td; bend[i]*=bendscale; } for(i=(ajint)rcurve+6;i<=ilen-(ajint)rcurve-6;i++) { td = sqrt(((xave[i+(ajint)rcurve]- xave[i-(ajint)rcurve])*(xave[i+(ajint)rcurve]- xave[i-(ajint)rcurve]))+ ((yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])* (yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve]))); curve[i] = (float) td; } if(outf) { ajFmtPrintF(outf,"Base Bend Curve\n"); ptr = ajStrGetPtr(sstr); for(ii=1;ii<=ilen;ii++) { ajFmtPrintF(outf,"%c %6.1f %6.1f\n", *ptr, bend[ii], curve[ii]); ptr++; } ajFileClose(&outf); } if(graph) { maxbend = minbend = 0.0; maxcurve = mincurve = 0.0; for(ii=1;ii<=ilen;ii++) { if(bend[ii] > maxbend) maxbend = bend[ii]; if(bend[ii] < minbend) minbend = bend[ii]; if(curve[ii] > maxcurve) maxcurve = curve[ii]; if(curve[ii] < mincurve) mincurve = curve[ii]; } ystart = 75.0; ajGraphAppendTitleS(graph, ajSeqGetUsaS(seq)); ajGraphicsSetPagesize(960, 768); ajGraphOpenWin(graph,(float)-1.0, (float)numres+(float)10.0, (float)0.0, ystart+(float)5.0); ajGraphicsGetParamsPage(&fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff); if(portrait) { ixlen = 768; iylen = 960; } else { ixlen = 960; iylen = 768; } ajGraphicsGetCharsize(&defheight,¤theight); if(!currentheight) { defheight = currentheight = (float) 4.440072; currentheight = defheight * ((float)ixlen/ ((float)(numres)*(currentheight+(float)1.0))) /currentheight; } ajGraphicsSetCharscale(((float)ixlen/((float)(numres)* (currentheight+(float)1.0)))/ currentheight); ajGraphicsGetCharsize(&defheight,¤theight); yincr = (currentheight + (float)3.0)*(float)0.3; if(!title) yy1 = ystart; else yy1 = ystart-(float)5.0; count = 1; residue[1]='\0'; bendfactor = (3*yincr)/maxbend; curvefactor = (3*yincr)/maxcurve; ptr = ajStrGetPtr(sstr); yy1 = yy1-(yincr*((float)5.0)); for(ii=1;ii<=ilen;ii++) { if(count > numres) { yy1 = yy1-(yincr*((float)5.0)); if(yy1<1.0) { if(!title) yy1=ystart; else yy1 = ystart-(float)5.0; yy1 = yy1-(yincr*((float)5.0)); ajGraphNewpage(graph,AJFALSE); } count = 1; } residue[0] = *ptr; ajGraphicsDrawposTextAtend((float)(count)+(float)2.0,yy1,residue); if(ii>1 && ii < ilen) { yp1 = yy1+yincr + (bend[ii]*bendfactor); yp2 = yy1+yincr + (bend[ii+1]*bendfactor); ajGraphicsDrawposLine((float)count+(float)1.5,yp1, (float)(count)+(float)2.5,yp2); } ipos = ilen-(ajint)rcurve-7; if(ipos < 0) ipos = 0; if(ii>rcurve+5 && ii<ipos) { yp1 = yy1+yincr + (curve[ii]*curvefactor); yp2 = yy1+yincr + (curve[ii+1]*curvefactor); ajGraphicsDrawposLine((float)count+(float)1.7,yp1, (float)(count)+(float)2.3,yp2); } ajGraphicsDrawposLine((float)count+(float)1.5,yy1+yincr, (float)(count)+(float)2.5,yy1+yincr); count++; ptr++; } ajGraphicsClose(); } AJFREE(iseq); AJFREE(x); AJFREE(y); AJFREE(xave); AJFREE(yave); AJFREE(curve); AJFREE(bend); ajStrDel(&sstr); ajSeqDel(&seq); ajFileClose(&file); ajFileClose(&outf); ajGraphxyDel(&graph); embExit(); return 0; }
int main(int argc, char **argv) { AjPFile outf = NULL; AjPSeq sequence = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr revstr = NULL; AjPStr p1; AjPStr p2; PPrimer eric = NULL; PPrimer fred = NULL; PPrimer f; PPrimer r; PPair pair; AjPList forlist = NULL; AjPList revlist = NULL; AjPList pairlist = NULL; AjBool targetrange; AjBool isDNA = ajTrue; AjBool dolist = ajFalse; ajint primerlen = 0; ajint minprimerlen = 0; ajint maxprimerlen = 0; ajint minprodlen = 0; ajint maxprodlen = 0; ajint prodlen = 0; ajint seqlen = 0; ajint stepping_value = 1; ajint targetstart = 0; ajint targetend = 0; ajint limit = 0; ajint limit2 = 0; ajint lastpos = 0; ajint startpos = 0; ajint endpos = 0; ajint begin; ajint end; ajint v1; ajint v2; ajint overlap; float minpmGCcont = 0.; float maxpmGCcont = 0.; float minprodGCcont = 0.; float maxprodGCcont = 0.; float prodTm; float prodGC; ajint i; ajint j; ajint neric=0; ajint nfred=0; ajint npair=0; float minprimerTm = 0.0; float maxprimerTm = 0.0; float saltconc = 0.0; float dnaconc = 0.0; embInit ("prima", argc, argv); substr = ajStrNew(); forlist = ajListNew(); revlist = ajListNew(); pairlist = ajListNew(); p1 = ajStrNew(); p2 = ajStrNew(); sequence = ajAcdGetSeq("sequence"); outf = ajAcdGetOutfile("outfile"); minprimerlen = ajAcdGetInt("minprimerlen"); maxprimerlen = ajAcdGetInt("maxprimerlen"); minpmGCcont = ajAcdGetFloat("minpmGCcont"); maxpmGCcont = ajAcdGetFloat("maxpmGCcont"); minprimerTm = ajAcdGetFloat("mintmprimer"); maxprimerTm = ajAcdGetFloat("maxtmprimer"); minprodlen = ajAcdGetInt("minplen"); maxprodlen = ajAcdGetInt("maxplen"); minprodGCcont = ajAcdGetFloat("minpgccont"); maxprodGCcont = ajAcdGetFloat("maxpgccont"); saltconc = ajAcdGetFloat("saltconc"); dnaconc = ajAcdGetFloat("dnaconc"); targetrange = ajAcdGetToggle("targetrange"); targetstart = ajAcdGetInt("targetstart"); targetend = ajAcdGetInt("targetend"); overlap = ajAcdGetInt("overlap"); dolist = ajAcdGetBoolean("list"); seqstr = ajSeqGetSeqCopyS(sequence); ajStrFmtUpper(&seqstr); begin = ajSeqGetBegin(sequence); end = ajSeqGetEnd(sequence); seqlen = end-begin+1; ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1); revstr = ajStrNewC(ajStrGetPtr(substr)); ajSeqstrReverse(&revstr); AJCNEW0(entropy, seqlen); AJCNEW0(enthalpy, seqlen); AJCNEW0(energy, seqlen); /* Initialise Tm calculation arrays */ ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1, &entropy, &enthalpy, &energy); ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n"); ajFmtPrintF(outf, "*************\n\n"); if(targetrange) ajFmtPrintF (outf, "Prima of %s from positions %d to %d bps\n", ajSeqGetNameC(sequence),targetstart, targetend); else ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence)); ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n"); ajFmtPrintF (outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR "); ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n"); ajFmtPrintF(outf, "Primer size range is %d-%d\n",minprimerlen,maxprimerlen); ajFmtPrintF(outf, "Primer GC content range is %.2f-%.2f\n",minpmGCcont, maxpmGCcont); ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n", minprimerTm, maxprimerTm); ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n"); ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n", minprodGCcont, maxprodGCcont); ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc); ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc); if(targetrange) ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n", targetstart,targetend); else { ajFmtPrintF(outf,"Considering all suitable Primer pairs with "); ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen, maxprodlen); } ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n"); ajFmtPrintF(outf, "*****************************************\n\n"); if(seqlen-minprimerlen < 0) ajFatal("Sequence too short"); if(targetrange) { ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin); prodGC = ajMeltGC(substr,seqlen); prodTm = ajMeltTempProd(prodGC,saltconc,seqlen); if(prodGC<minprodGCcont || prodGC>maxprodGCcont) { ajFmtPrintF(outf, "Product GC content [%.2f] outside acceptable range\n", prodGC); embExitBad(); return 0; } prima_testtarget(substr, revstr, targetstart-begin, targetend-begin, minprimerlen, maxprimerlen, seqlen, minprimerTm, maxprimerTm, minpmGCcont, maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc, dnaconc, pairlist, &npair); } if(!targetrange) { limit = seqlen-minprimerlen-minprodlen+1; lastpos = seqlen-minprodlen; limit2 = maxprodlen-minprodlen; /* Outer loop selects all possible product start points */ for(i=minprimerlen; i<limit; ++i) { startpos = i; ajDebug("Position in sequence %d\n",startpos); endpos = i+minprodlen-1; /* Inner loop selects all possible product lengths */ for(j=0; j<limit2; ++j, ++endpos) { if(endpos>lastpos) break; v1 = endpos-startpos+1; ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos); prodGC = ajMeltGC(p1,v1); prodTm = ajMeltTempProd(prodGC,saltconc,v1); if(prodGC<minprodGCcont || prodGC>maxprodGCcont) continue; /* Only accept primers with acceptable Tm and GC */ neric = 0; nfred = 0; prima_testproduct(substr, startpos, endpos, primerlen, minprimerlen, maxprimerlen,minpmGCcont, maxpmGCcont, minprimerTm, maxprimerTm, minprodlen, maxprodlen, prodTm, prodGC, seqlen, &eric,&fred,forlist,revlist,&neric,&nfred, stepping_value, saltconc,dnaconc, isDNA, begin); if(!neric) continue; /* Now reject those primers with self-complementarity */ prima_reject_self(forlist,revlist,&neric,&nfred); if(!neric) continue; /* Reject any primers that could bind elsewhere in the sequence */ prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr, seqlen); /* Now select the least complementary pair (if any) */ prima_best_primer(forlist, revlist, &neric, &nfred); if(!neric) continue; AJNEW(pair); ajListPop(forlist,(void **)&f); ajListPop(revlist,(void **)&r); pair->f = f; pair->r = r; ++npair; ajListPush(pairlist,(void *)pair); } } } if(!targetrange) { /* Get rid of primer pairs nearby the top scoring ones */ prima_TwoSortscorepos(&pairlist); prima_prune_nearby(pairlist, &npair, maxprimerlen-1); ajListSort(pairlist,prima_PosCompare); prima_check_overlap(pairlist,&npair,overlap); } if(npair) { if(!targetrange) ajFmtPrintF(outf,"%d pairs found\n\n",npair); else ajFmtPrintF(outf, "Closest primer pair to specified product is:\n\n"); if((maxprimerlen<26 && seqlen<999999 && !dolist)) ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n"); } for(i=0;i<npair;++i) { if(!targetrange) ajFmtPrintF(outf,"[%d]\n",i+1); ajListPop(pairlist,(void **)&pair); prodlen = pair->r->start - (pair->f->start + pair->f->primerlen); if((maxprimerlen<26 && seqlen<999999 && !dolist)) { v1 = pair->f->start; v2 = v1 + pair->f->primerlen -1; ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1), v2+begin); v1 = pair->r->start; v2 = v1 + pair->r->primerlen -1; ajStrAssignSubS(&p2,substr,v1,v2); ajSeqstrReverse(&p2); ajFmtPrintF(outf, "%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin); ajFmtPrintF(outf," Tm %.2f C (GC %.2f%%)\t\t ", pair->f->primerTm,pair->f->primGCcont*100.); ajFmtPrintF(outf,"Tm %.2f C (GC %.2f%%)\n", pair->r->primerTm,pair->r->primGCcont*100.); ajFmtPrintF(outf," Length: %-32dLength: %d\n", pair->f->primerlen,pair->r->primerlen); ajFmtPrintF(outf," Tma: %.2f C\t\t\t", ajAnneal(pair->f->primerTm,pair->f->prodTm)); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->r->primerTm,pair->f->prodTm)); ajFmtPrintF(outf," Product GC: %.2f%%\n", pair->f->prodGC * 100.0); ajFmtPrintF(outf," Product Tm: %.2f C\n", pair->f->prodTm); ajFmtPrintF(outf," Length: %d\n\n\n",prodlen); } else { ajFmtPrintF(outf," Product from %d to %d\n",pair->f->start+ pair->f->primerlen+begin,pair->r->start-1+begin); ajFmtPrintF(outf," Tm: %.2f C GC: %.2f%%\n", pair->f->prodTm,pair->f->prodGC*(float)100.); ajFmtPrintF(outf," Length: %d\n\n\n",prodlen); v1 = pair->f->start; v2 = v1 + pair->f->primerlen -1; ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf," Forward: 5' %s 3'\n",ajStrGetPtr(p1)); ajFmtPrintF(outf," Start: %d\n",v1+begin); ajFmtPrintF(outf," End: %d\n",v2+begin); ajFmtPrintF(outf," Tm: %.2f C\n", pair->f->primerTm); ajFmtPrintF(outf," GC: %.2f%%\n", pair->f->primGCcont*(float)100.); ajFmtPrintF(outf," Len: %d\n", pair->f->primerlen); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->f->primerTm,pair->f->prodTm)); v1 = pair->r->start; v2 = v1 + pair->r->primerlen -1; ajStrAssignSubS(&p2,substr,v1,v2); ajSeqstrReverse(&p2); ajStrAssignSubS(&p1,substr,v1,v2); ajFmtPrintF(outf," Reverse: 5' %s 3'\n",ajStrGetPtr(p1)); ajFmtPrintF(outf," Start: %d\n",v1+begin); ajFmtPrintF(outf," End: %d\n",v2+begin); ajFmtPrintF(outf," Tm: %.2f C\n", pair->r->primerTm); ajFmtPrintF(outf," GC: %.2f%%\n", pair->r->primGCcont*(float)100.); ajFmtPrintF(outf," Len: %d\n", pair->r->primerlen); ajFmtPrintF(outf," Tma: %.2f C\n\n\n", ajAnneal(pair->r->primerTm,pair->f->prodTm)); } prima_PrimerDel(&pair->f); prima_PrimerDel(&pair->r); AJFREE(pair); } ajStrDel(&seqstr); ajStrDel(&revstr); ajStrDel(&substr); ajStrDel(&p1); ajStrDel(&p2); ajListFree(&forlist); ajListFree(&revlist); ajListFree(&pairlist); ajFileClose(&outf); ajSeqDel(&sequence); AJFREE(entropy); AJFREE(enthalpy); AJFREE(energy); embExit(); return 0; }
static void prima_reject_self(AjPList forlist,AjPList revlist, ajint *neric, ajint *nfred) { ajint count; ajint j; ajint i; PPrimer tmp; ajint len; ajint cut; AjPStr str1; AjPStr str2; ajint x; str1 = ajStrNew(); str2 = ajStrNew(); /* deal with forwards */ count = *neric; for(i=0;i<*neric;++i) { ajListPop(forlist,(void **)&tmp); len = tmp->primerlen; cut = (len/2)-1; ajStrAssignSubS(&str1,tmp->substr,0,cut); ajStrAssignSubS(&str2,tmp->substr,cut+1,len-1); x = prima_primalign(str1,str2); if(x<SIMLIMIT) ajListPushAppend(forlist,(void *)tmp); else { prima_PrimerDel(&tmp); --count; } } *neric = count; if (!*neric) { ajStrDel(&str1); ajStrDel(&str2); while(ajListPop(revlist,(void**)&tmp)) prima_PrimerDel(&tmp); *nfred=0; return; } /****** reverses ********/ count = *nfred; for(j=0; j<*nfred; ++j) { ajListPop(revlist,(void **)&tmp); len = tmp ->primerlen; cut = (len/2)-1; ajStrAssignSubS(&str1,tmp->substr,0,cut); ajStrAssignSubS(&str2,tmp->substr,cut+1,len-1); x = prima_primalign(str1,str2); if(x<SIMLIMIT) ajListPushAppend(revlist,(void *)tmp); else { --count; prima_PrimerDel(&tmp); } } *nfred = count; if(!*nfred) { while(ajListPop(forlist,(void**)&tmp)) prima_PrimerDel(&tmp); *neric=0; } ajStrDel(&str1); ajStrDel(&str2); return; }
static void prima_testtarget(const AjPStr seqstr, const AjPStr revstr, ajint targetstart, ajint targetend, ajint minprimerlen, ajint maxprimerlen, ajint seqlen, float minprimerTm, float maxprimerTm, float minpmGCcont, float maxpmGCcont, float minprodGCcont, float maxprodGCcont, float saltconc, float dnaconc, AjPList pairlist, ajint *npair) { AjPStr fstr; AjPStr rstr; AjPStr str1; AjPStr str2; PPrimer f; PPrimer r; PPair ppair; ajint i; ajint j; ajint forstart = 0; ajint forend; ajint revstart = 0; ajint revend; ajint Limit; ajint tnum; ajint thisplen; ajint cut; float primerTm = 0.0; float primGCcont = 0.0; float prodgc = 0.0; AjBool found = ajFalse; AjBool revfound = ajFalse; AjBool isDNA = ajTrue; ajint flen = 0; ajint rlen = 0; float ftm = 0.0; float rtm = 0.0; float fgc = 0.0; float rgc = 0.0; ajint fsc = 0; ajint rsc = 0; const char *s; const char *s2; const char *p; ajint pv; ajint plimit; ajint pcount; ajint k; (void) minprodGCcont; (void) maxprodGCcont; fstr = ajStrNew(); rstr = ajStrNew(); str1 = ajStrNew(); str2 = ajStrNew(); tnum=maxprimerlen-minprimerlen+1; /******FORWARDS *******/ for(i=targetstart-minprimerlen; i>-1; --i) { forstart = i; forend = i+minprimerlen-1; for(j=0; j<tnum; ++j,++forend) { if(forend==targetstart) break; ajStrAssignSubC(&fstr, ajStrGetPtr(seqstr), forstart, forend); thisplen = ajStrGetLen(fstr); primerTm = ajMeltTempSave("",forstart,thisplen, saltconc, dnaconc, isDNA, &entropy, &enthalpy, &energy); if(primerTm <minprimerTm || primerTm>maxprimerTm) continue; primGCcont= ajMeltGC(fstr, thisplen); if(primGCcont< minpmGCcont || primGCcont >maxpmGCcont) continue; /*instead of calling the self-reject function */ cut = (thisplen/2)-1; ajStrAssignSubS(&str1, fstr, 0, cut); ajStrAssignSubS(&str2, fstr, cut+1, thisplen-1); if((fsc=prima_primalign(str1, str2)) > SIMLIMIT) continue; /* Test for match with rest of sequence */ s = ajStrGetPtr(seqstr); s2 = ajStrGetPtr(revstr); p = ajStrGetPtr(fstr); pv = thisplen; pcount = 0; plimit = seqlen-pv+1; for(k=0;k<plimit && pcount<2;++k) { if(prima_seq_align(s+k,p,pv)>SIMLIMIT2) ++pcount; if(prima_seq_align(s2+k,p,pv)>SIMLIMIT2) ++pcount; } if(pcount<2) { found = ajTrue; flen = thisplen; ftm = primerTm; fgc = primGCcont; break; } } if(found) break; } /******* REVERSES IN TARGETRANGE *****/ Limit = seqlen-minprimerlen; if(found) for(i=targetend+1; i<Limit; ++i) { revstart = i; revend = i+minprimerlen-1; for(j=0; j<tnum; ++j,++revend) { if(revend==seqlen) break; ajStrAssignSubC(&rstr, ajStrGetPtr(seqstr), revstart, revend); ajSeqstrReverse(&rstr); thisplen = ajStrGetLen(rstr); primerTm = ajMeltTempSave("", revstart, thisplen, saltconc, dnaconc, 1, &entropy, &enthalpy, &energy); if(primerTm <minprimerTm || primerTm>maxprimerTm) continue; primGCcont= ajMeltGC(rstr, thisplen); if(primGCcont< minpmGCcont || primGCcont >maxpmGCcont) continue; /*instead of calling the self-reject function */ cut = (thisplen/2)-1; ajStrAssignSubS(&str1, rstr, 0, cut); ajStrAssignSubS(&str2, rstr, cut+1, thisplen-1); if((rsc=prima_primalign(str1, str2)) < SIMLIMIT) continue; /* Test for match with rest of sequence */ s = ajStrGetPtr(seqstr); s2 = ajStrGetPtr(revstr); p = ajStrGetPtr(rstr); pv = thisplen; pcount = 0; plimit = seqlen-pv+1; for(k=0;k<plimit && pcount<2;++k) { if(prima_seq_align(s+k,p,pv)>SIMLIMIT2) ++pcount; if(prima_seq_align(s2+k,p,pv)>SIMLIMIT2) ++pcount; } if(pcount<2) { revfound = ajTrue; rlen = thisplen; rtm = primerTm; rgc = primGCcont; break; } } if(revfound) break; } if(found && !revfound) { found = ajFalse; ajWarn("No reverse primers found in targetrange"); *npair = 0; return; } if(!found) { ajWarn("No forward primers found in targetrange"); *npair = 0; return; } ajStrAssignSubC(&str1,ajStrGetPtr(seqstr),forstart+flen,revstart-1); prodgc = ajMeltGC(str1,revstart-(forstart+flen)); AJNEW0(f); f->substr = ajStrNewC(ajStrGetPtr(fstr)); f->start = forstart; f->primerlen = flen; f->primerTm = ftm; f->primGCcont = fgc; f->score = fsc; f->prodGC = prodgc; f->prodTm = ajMeltTempProd(prodgc,saltconc,revstart-(forstart+flen)); AJNEW0(r); r->substr = ajStrNewC(ajStrGetPtr(rstr)); r->start = revstart; r->primerlen = rlen; r->primerTm = rtm; r->primGCcont = rgc; r->score = rsc; AJNEW0(ppair); ppair->f = f; ppair->r = r; ajListPush(pairlist,(void *)ppair); *npair = 1; return; }
static void domainalign_ProcessStampFile(AjPStr in, AjPStr out, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ ajint blk = 1; /* Count of the current block in the input file. Block 1 is the numbering and protein sequences, Block 2 is the secondary structure, Block 3 is the Very/Less/Post similar records*/ AjBool ok = ajFalse; /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessStampFile"); /* Start of code for reading input file. Ignore everything up to first line beginning with 'Number'. */ while((ajReadlineTrim(inf,&line))) { /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) { ok = ajTrue; break; } } /* Read rest of input file. */ if(ok) { /* Write DOMAIN classification records to file. */ if(!(outf=ajFileNewOutNameS(out))) ajFatal("Could not open output file in domainalign_ProcessStampFile"); if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ", 75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75, " \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75, " \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } while((ajReadlineTrim(inf,&line))) { /* Increment counter for block of file. */ if((ajStrGetCharPos(line, 1)=='\0')) { blk++; if(blk==4) blk=1; continue; } /* Block of numbering line and protein sequences. */ if(blk==1) { /* Print the number line out as it is. */ if(ajStrPrefixC(line,"Number")) ajFmtPrintF(outf,"\n# %7s %S\n"," ", line); else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S", &temp1); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment " "with 'X'\n"); ajStrFmtUpper(&temp3); /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-15S%7d %S%7d\n", temp2, 0, temp3, 0); } } /* Secondary structure filled with '????' (unwanted). */ else if(blk==2) { continue; } /* Similarity lines. */ else { if(ajStrPrefixC(line,"Post")) { /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); /* Write post similar line out. */ ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ", temp3); } /* Ignore Very and Less similar lines. */ else continue; } } } else /* ok == ajFalse. */ { ajWarn("\n***********************************************\n" "* STAMP was called but output file was EMPTY! *\n" "* NO OUTPUT FILE GENERATED FOR THIS NODE. *\n" "***********************************************\n"); ajFmtPrintF(logf, "STAMP called but output file empty. " "No output file for this node!"); } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
/* @funcstatic domainalign_ProcessTcoffeeFile ********************************* ** ** Parses tcoffee output. ** ** @param [r] in [AjPStr] Name of TCOFFEE input file ** @param [r] align [AjPStr] Name of sequence alignment file for output ** @param [r] domain [AjPDomain] Domain being aligned ** @param [r] noden [ajint] Node-level of alignment** ** @param [r] logf [AjPFile] Log file ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_ProcessTcoffeeFile(AjPStr in, AjPStr align, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile"); if(!(outf=ajFileNewOutNameS(align))) ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile"); /*Write DOMAIN classification records to file*/ if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } /* Start of code for reading input file. */ /*Ignore everything up to first line beginning with 'Number'*/ while((ajReadlineTrim(inf,&line))) /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) break; /* Read rest of input file. */ while((ajReadlineTrim(inf,&line))) { if((ajStrGetCharPos(line, 1)=='\0')) continue; /* Print the number line out as it is. */ else if(ajStrPrefixC(line,"CLUSTAL")) continue; else if(ajStrPrefixC(line," ")) ajFmtPrintF(outf,"\n"); /* write out a block of protein sequences. */ else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S %S", &temp1,&temp3); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajStrFmtUpper(&temp3);*/ /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3); } } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
int main(int argc, char **argv) { AjPSeqall nucseq; /* input nucleic sequences */ AjPSeqset protseq; /* input aligned protein sequences */ AjPSeqout seqout; AjPSeq nseq; /* next nucleic sequence to align */ const AjPSeq pseq; /* next protein sequence use in alignment */ AjPTrn trnTable; AjPSeq pep; /* translation of nseq */ AjPStr tablelist; ajint table; AjPSeqset outseqset; /* set of aligned nucleic sequences */ ajint proteinseqcount = 0; AjPStr degapstr = NULL; /* used to check if it matches with START removed */ AjPStr degapstr2 = NULL; AjPStr codon = NULL; /* holds temporary codon to check if is START */ char aa; /* translated putative START codon */ ajint type; /* returned type of the putative START codon */ /* start position of guide protein in translation */ ajlong pos = 0; AjPSeq newseq = NULL; /* output aligned nucleic sequence */ ajint frame; embInit("tranalign", argc, argv); nucseq = ajAcdGetSeqall("asequence"); protseq = ajAcdGetSeqset("bsequence"); tablelist = ajAcdGetListSingle("table"); seqout = ajAcdGetSeqoutset("outseq"); outseqset = ajSeqsetNew(); degapstr = ajStrNew(); /* initialise the translation table */ ajStrToInt(tablelist, &table); trnTable = ajTrnNewI(table); ajSeqsetFill(protseq); while(ajSeqallNext(nucseq, &nseq)) { if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL) ajErr("No guide protein sequence available for " "nucleic sequence %S", ajSeqGetNameS(nseq)); ajDebug("Aligning %S and %S\n", ajSeqGetNameS(nseq), ajSeqGetNameS(pseq)); /* get copy of pseq string with no gaps */ ajStrAssignS(°apstr, ajSeqGetSeqS(pseq)); ajStrRemoveGap(°apstr); /* ** for each translation frame look for subset of pep that ** matches pseq */ for(frame = 1; frame <4; frame++) { ajDebug("trying frame %d\n", frame); pep = ajTrnSeqOrig(trnTable, nseq, frame); degapstr2 = ajStrNew(); ajStrAssignRef(°apstr2, degapstr); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr); /* ** we might have a START codon that should be translated as 'M' ** we need to check if there is a match after a possible START ** codon */ if(pos == -1 && ajStrGetLen(degapstr) > 1 && (ajStrGetPtr(degapstr)[0] == 'M' || ajStrGetPtr(degapstr)[0] == 'm')) { /* see if pep minus the first character is a match */ ajStrCutStart(°apstr2, 1); pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); /* ** pos is >= 1 if we have a match that is after the first ** residue */ if(pos >= 1) { /* point back at the putative START Methionine */ pos--; /* test if first codon is a START */ codon = ajStrNew(); ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), (pos*3)+frame-1, (pos*3)+frame+2); type = ajTrnCodonstrTypeS(trnTable, codon, &aa); if(type != 1) { /* first codon is not a valid START, force a mismatch */ pos = -1; } ajStrDel(&codon); } else { /* force 'pos == 0' to be treated as a mismatch */ pos = -1; } } ajStrDel(°apstr2); ajSeqDel(&pep); if(pos != -1) break; } if(pos == -1) ajErr("Guide protein sequence %S not found in nucleic sequence %S", ajSeqGetNameS(pseq), ajSeqGetNameS(nseq)); else { ajDebug("got a match with frame=%d\n", frame); /* extract the coding region of nseq with gaps */ newseq = ajSeqNew(); ajSeqSetNuc(newseq); ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq)); ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq)); tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1); /* output the gapped nucleic sequence */ ajSeqsetApp(outseqset, newseq); ajSeqDel(&newseq); } ajStrRemoveWhiteExcess(°apstr); } ajSeqoutWriteSet(seqout, outseqset); ajSeqoutClose(seqout); ajTrnDel(&trnTable); ajSeqsetDel(&outseqset); ajStrDel(°apstr); ajStrDel(°apstr2); ajSeqallDel(&nucseq); ajSeqDel(&nseq); ajSeqoutDel(&seqout); ajSeqsetDel(&protseq); ajStrDel(&tablelist); embExit(); return 0; }
static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r, AjPAssemContig const * contigs, ajint ncontigs) { AjPAssemTag t = NULL; AjIList l = NULL; AjPStr qualstr = NULL; AjPStr tmp = NULL; ajint POS = 0; AjPStr CIGAR = NULL; const char* RNEXT = NULL; AjPStr SEQ = NULL; AjPStr QUAL = NULL; AjPStr SEQunpadded = NULL; AjPStr QUALunpadded = NULL; AjPStr consensus = NULL; AjBool rc= ajFalse; AjBool ret = ajTrue; const char* refseq = NULL; const AjPAssemContig contig = NULL; ajuint k = 0; if(r->Reference>=ncontigs) ajDie("assemoutWriteSamAlignment: reference sequence number" " '%d' is larger than or equal to known number of reference" " sequences '%d'. Problem while processing read '%S'.", r->Reference, ncontigs, r->Name); contig = (r->Reference==-1 ? NULL : contigs[r->Reference]); ajStrAssignRef(&SEQ, r->Seq); consensus = contig==NULL? NULL : contig->Consensus; if (r->Rnext==-1) RNEXT= "*"; else if(r->Rnext==r->Reference) RNEXT = "="; else RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name); if (r->Flag & BAM_FREVERSE) { rc = ajTrue; qualstr = ajStrNewS(r->SeqQ); if(!r->Reversed) { ajStrReverse(&qualstr); ajSeqstrReverse(&SEQ); } QUAL = qualstr; POS = r->y1; ajStrAssignSubS(&tmp, SEQ, ajStrGetLen(r->Seq) - r->y2, ajStrGetLen(r->Seq) - r->x2 ); } else { rc= ajFalse; POS = r->x1; QUAL = r->SeqQ; ajStrAssignSubS(&tmp, SEQ, r->x2-1, r->y2-1 ); } if(r->Cigar==NULL && consensus) { refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1); CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp)); SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ)); QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ)); for(k=0; k< ajStrGetLen(SEQ); k++) { if (ajStrGetCharPos(SEQ, k) == '*') continue; ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k)); ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k)); } ajDebug("cigar: %S\n", CIGAR); ajStrAssignS(&tmp, CIGAR); if(rc) { if(r->y2 < (ajint)ajStrGetLen(SEQ)) ajFmtPrintS(&CIGAR, "%dS%S", ajStrGetLen(SEQ) - r->y2, tmp); if(r->x2 > 1) ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1); } else { if(r->x2 > 1) ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp); if(r->y2 < (ajint)ajStrGetLen(SEQ)) ajFmtPrintAppS(&CIGAR, "%dS", ajStrGetLen(SEQ) - r->y2); } ajStrDel(&tmp); } else if(r->Cigar==NULL) { ajErr("both CIGAR string and consensus sequence not available"); ret = ajFalse; ajStrAssignK(&CIGAR, '*'); } else if(!ajStrGetLen(r->Cigar)) ajStrAssignK(&CIGAR, '*'); else if(ajStrGetLen(r->Cigar)) { if(!ajStrGetLen(SEQ)) ajStrAssignK(&SEQ, '*'); if(!ajStrGetLen(QUAL)) ajStrAssignK(&QUAL, '*'); } ajStrDel(&tmp); ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S", r->Name, r->Flag, (contig==NULL ? "*" : ajStrGetPtr(contig->Name)), POS, r->MapQ, (CIGAR ? CIGAR : r->Cigar), RNEXT, r->Pnext, r->Tlen, (r->Cigar ? SEQ : SEQunpadded), (r->Cigar ? QUAL : QUALunpadded)); l = ajListIterNewread(r->Tags); while (!ajListIterDone(l)) { t = ajListIterGet(l); /* TODO: array type, 'B' */ /* In SAM, all single integer types are mapped to int32_t [SAM spec] */ ajFmtPrintF(outf, "\t%S:%c:", t->Name, (t->type == 'c' || t->type == 'C' || t->type == 's' || t->type == 'S' || t->type == 'I') ? 'i' : t->type ); if(t->x1 || t->y1) ajFmtPrintF(outf, " %u %u", t->x1, t->y1); if(t->Comment && ajStrGetLen(t->Comment)>0) ajFmtPrintF(outf, "%S", t->Comment); } ajListIterDel(&l); ajFmtPrintF(outf, "\n"); if(qualstr) ajStrDel(&qualstr); ajStrDel(&SEQ); ajStrDel(&CIGAR); ajStrDel(&SEQunpadded); ajStrDel(&QUALunpadded); return ret; }
int main(int argc, char **argv) { AjPFile inf = NULL; AjPFile inf2 = NULL; AjPFeattable tab = NULL; AjPReport report = NULL; AjPSeq sequence = NULL; AjPStr redatanew = NULL; AjPStr str = NULL; AjPStr regexp = NULL; AjPStr temp = NULL; AjPStr text = NULL; AjPStr docdata = NULL; AjPStr data = NULL; AjPStr accession = NULL; AjPStr name = NULL; EmbPPatMatch match = NULL; AjPStr savereg = NULL; AjPStr fthit = NULL; AjBool full; AjBool prune; ajint i; ajint number; ajint start; ajint end; ajint length; ajint zstart; ajint zend; const char *p; ajint seqlength; AjPStr tmpstr = NULL; AjPStr tailstr = NULL; AjPFeature gf; embInit("patmatmotifs", argc, argv); ajStrAssignC(&fthit, "SO:0001067"); savereg = ajStrNew(); str = ajStrNew(); regexp = ajStrNew(); temp = ajStrNew(); data = ajStrNew(); accession = ajStrNew(); text = ajStrNew(); name = ajStrNew(); sequence = ajAcdGetSeq("sequence"); report = ajAcdGetReport("outfile"); full = ajAcdGetBoolean("full"); prune = ajAcdGetBoolean("prune"); ajSeqFmtUpper(sequence); /* prosite regexs are all upper case */ tab = ajFeattableNewSeq(sequence); ajStrAssignC(&tailstr, ""); seqlength = ajStrGetLen(str); str = ajSeqGetSeqCopyS(sequence); redatanew = ajStrNewC("PROSITE/prosite.lines"); docdata = ajStrNewC("PROSITE/"); inf = ajDatafileNewInNameS(redatanew); if(!inf) ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running"); ajFmtPrintAppS(&tmpstr, "Full: %B\n", full); ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune); ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf); ajReportSetHeaderS(report, tmpstr); while(ajReadlineTrim(inf, ®exp)) { p=ajStrGetPtr(regexp); if(*p && *p!=' ' && *p!='^') { p=ajSysFuncStrtok(p," "); ajStrAssignC(&name,p); if(prune) if(ajStrMatchCaseC(name,"myristyl") || ajStrMatchCaseC(name,"asn_glycosylation") || ajStrMatchCaseC(name,"camp_phospho_site") || ajStrMatchCaseC(name,"pkc_phospho_site") || ajStrMatchCaseC(name,"ck2_phospho_site") || ajStrMatchCaseC(name,"tyr_phospho_site")) { for(i=0;i<4;++i) ajReadlineTrim(inf, ®exp); continue; } p=ajSysFuncStrtok(NULL," "); ajStrAssignC(&accession,p); } if(ajStrPrefixC(regexp, "^")) { p = ajStrGetPtr(regexp); ajStrAssignC(&temp,p+1); ajStrAssignC(&savereg,p+1); match = embPatMatchFind(temp, str, ajFalse, ajFalse); number = embPatMatchGetNumber(match); for(i=0; i<number; i++) { seqlength = ajStrGetLen(str); start = 1+embPatMatchGetStart(match, i); end = 1+embPatMatchGetEnd(match, i); length = embPatMatchGetLen(match, i); gf = ajFeatNew(tab, NULL, fthit, start, end, (float) length, ' ', 0); ajFmtPrintS(&tmpstr, "*motif %S", name); ajFeatTagAddSS(gf, NULL, tmpstr); if(start-5<0) zstart = 0; else zstart = start-5; if(end+5> seqlength) zend = end; else zend = end+5; ajStrAssignSubS(&temp, str, zstart, zend); } if(full && number) { ajStrAssignC(&redatanew,ajStrGetPtr(docdata)); ajStrAppendC(&redatanew,ajStrGetPtr(accession)); inf2 = ajDatafileNewInNameS(redatanew); if(!inf2) continue; /* ** Insert Prosite documentation from files made by ** prosextract.c */ ajFmtPrintAppS(&tailstr, "Motif: %S\n", name); ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number); while(ajReadlineTrim(inf2, &text)) ajFmtPrintAppS(&tailstr, "%S\n", text); ajFmtPrintAppS(&tailstr, "\n***************\n\n"); ajFileClose(&inf2); } embPatMatchDel(&match); } } ajReportSetTailS(report,tailstr); ajReportWrite(report, tab, sequence); ajReportDel(&report); ajFeattableDel(&tab); ajStrDel(&temp); ajStrDel(®exp); ajStrDel(&savereg); ajStrDel(&str); ajStrDel(&data); ajStrDel(&docdata); ajStrDel(&text); ajStrDel(&redatanew); ajStrDel(&accession); ajSeqDel(&sequence); ajStrDel(&tailstr); ajStrDel(&fthit); ajStrDel(&name); ajStrDel(&tmpstr); ajFeattableDel(&tab); ajFileClose(&inf); embExit(); return 0; }
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
/* @funcstatic seqwords_keysearch ******************************************** ** ** Search swissprot with terms structure and writes a hitlist structure ** ** @param [r] inf [AjPFile] File pointer to swissprot database ** @param [r] terms [AjPTerms] Terms object pointer ** @param [w] hits [EmbPHitlist*] Hitlist object pointer ** ** @return [AjBool] True on success ** @@ ******************************************************************************/ static AjBool seqwords_keysearch(AjPFile inf, AjPTerms terms, EmbPHitlist *hits) { AjPStr line =NULL; /* Line of text. */ AjPStr id =NULL; /* Line of text. */ AjPStr temp =NULL; ajint s =0; /* Temp. start of hit value. */ ajint e =0; /* Temp. end of hit value. */ AjPInt start =NULL; /* Array of start of hit(s). */ AjPInt end =NULL; /* Array of end of hit(s). */ ajint nhits =0; /* Number of hits. */ ajint x =0; AjBool foundkw =ajFalse; AjBool foundft =ajFalse; /* Check for valid args. */ if(!inf) return ajFalse; /* Allocate strings and arrays. */ line = ajStrNew(); id = ajStrNew(); start = ajIntNew(); end = ajIntNew(); /* Start of main loop. */ while((ajReadlineTrim(inf,&line))) { /* Parse the AC line. */ if(ajStrPrefixC(line,"AC")) { /* Copy accesion number and remove the ';' from the end. */ ajFmtScanS(line, "%*s %S", &id); ajStrExchangeCC(&id, ";", "\0"); /* Reset flags & no. hits. */ foundkw=ajFalse; foundft=ajFalse; nhits=0; } /* Search the description and keyword lines with search terms. */ else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW")))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a DE or KW line begins with a search ** term, we must add a leading and trailing space to line. ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); ajStrInsertC(&line, 0, " "); for (x = 0; x < terms->N; x++) /* Search term is found. */ if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { foundkw=ajTrue; break; } } /* Search the feature table line with search terms. */ else if((ajStrPrefixC(line,"FT DOMAIN"))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a FT line ends with a search ** term, we must add a trailing space to line ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); for (x = 0; x < terms->N; x++) if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { /* Search term is found. */ foundft = ajTrue; nhits++; /* Assign start and end of hit. */ ajFmtScanS(line, "%*s %*s %d %d", &s, &e); ajIntPut(&start, nhits-1, s); ajIntPut(&end, nhits-1, e); break; } } /* Parse the sequence. */ else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) || (foundft == ajTrue)))) { /* Allocate memory for temp. sequence. */ temp = ajStrNew(); /* Read the sequence into hitlist structure. */ while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//")) /* Read sequence line into temp. */ ajStrAppendC(&temp,ajStrGetPtr(line)+3); /* Clean up temp. sequence. */ ajStrRemoveWhite(&temp); /*Priority is given to domain (rather than full length) sequence.*/ if(foundft) { for(x=0;x<nhits;x++) { /* Increment counter of hits for subsequent hits*/ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure. */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Assign start and end of hit. */ (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x); (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x); /* Extract sequence within specified range */ ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, (*hits)->hits[(*hits)->N - 1]->Start - 1, (*hits)->hits[(*hits)->N - 1]->End - 1); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } } else { /* Increment counter of hits */ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Extract whole sequence */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); (*hits)->hits[(*hits)->N - 1]->Start = 1; (*hits)->hits[(*hits)->N - 1]->End = ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } /* Free temp. sequence */ ajStrDel(&temp); } } /* Clean up */ ajStrDel(&line); ajStrDel(&id); ajIntDel(&start); ajIntDel(&end); return ajTrue; }
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info) { ajint n; ajint maxlen; ajint len; char **seqs; const AjPSeq seq = NULL; ajint i=0; const AjPStr fmt=NULL; const char *p=NULL; char c='\0'; /* char *q=NULL; AjPSelexseq sqdata=NULL; AjPSelexdata sdata=NULL; */ ajint cnt=0; info->name = NULL; info->rf=NULL; info->cs=NULL; info->desc=NULL; info->acc=NULL; info->au=NULL; info->flags=0; AjPStr tmpstr = NULL; ajSeqsetFill(seqset); fmt = ajSeqsetGetFormat(seqset); n = ajSeqsetGetSize(seqset); ajSeqsetFmtUpper(seqset); maxlen = ajSeqsetGetLen(seqset); /* First allocate and copy sequences */ AJCNEW0(seqs,n); for(i=0; i<n; ++i) { seqs[i] = ajCharNewRes(maxlen+1); strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i))); } info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; strcpy(info->sqinfo[i].name,""); strcpy(info->sqinfo[i].id,""); strcpy(info->sqinfo[i].acc,""); strcpy(info->sqinfo[i].desc,""); info->sqinfo[i].len = 0; info->sqinfo[i].start = 0; info->sqinfo[i].stop = 0; info->sqinfo[i].olen = 0; info->sqinfo[i].type = 0; info->sqinfo[i].ss = NULL; info->sqinfo[i].sa =NULL; } AJCNEW0(info->wgt,n); for(i=0; i<n; ++i) { info->sqinfo[i].flags = 0; info->wgt[i] = ajSeqsetGetseqWeight(seqset,i); } info->nseq = n; info->alen = maxlen; for(i=0; i<n; ++i) { seq = ajSeqsetGetseqSeq(seqset,i); if((len=ajStrGetLen(ajSeqGetNameS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len); strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ID; strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_NAME; } if((len=ajStrGetLen(ajSeqGetAccS(seq)))) { if(len>= SQINFO_NAMELEN) len = SQINFO_NAMELEN - 1; ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len); strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr)); info->sqinfo[i].flags |= SQINFO_ACC; } } seq = ajSeqsetGetseqSeq(seqset,0); info->cs = ajCharNewS(ajSeqGetSeqS(seq)); info->name = ajCharNewS(ajSeqGetNameS(seq)); info->acc = ajCharNewS(ajSeqGetAccS(seq)); info->desc = ajCharNewS(ajSeqGetDescS(seq)); info->rf = ajCharNewS(ajSeqGetSeqS(seq)); /* info->rf = ajCharNewS(seq); len = ajStrGetLen(seq->Selexdata->name); info->name = ajCharNewRes(len+1); strcpy(info->name,ajStrGetPtr(seq->Selexdata->name)); len = ajStrGetLen(seq->Selexdata->de); info->desc = ajCharNewRes(len+1); sdata = seq->Selexdata; strcpy(info->desc,ajStrGetPtr(sdata->de)); len = ajStrGetLen(sdata->ac); info->acc = ajCharNewRes(len+1); strcpy(info->acc,ajStrGetPtr(sdata->ac)); len = ajStrGetLen(sdata->au); info->au = ajCharNewRes(len+1); strcpy(info->au,ajStrGetPtr(sdata->au)); if(sdata->tc[0] || sdata->tc[1]) { info->flags |= AINFO_TC; info->tc1 = sdata->tc[0]; info->tc2 = sdata->tc[1]; } if(sdata->nc[0] || sdata->nc[1]) { info->flags |= AINFO_NC; info->nc1 = sdata->nc[0]; info->nc2 = sdata->nc[1]; } if(sdata->ga[0] || sdata->ga[1]) { info->flags |= AINFO_GA; info->ga1 = sdata->ga[0]; info->ga2 = sdata->ga[1]; } for(i=0;i<n;++i) { seq = ajSeqsetGetseqSeq(seqset,i); sqdata = seq->Selexdata->sq; if((len=ajStrGetLen(sqdata->name))) { if(len<64) strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name)); else strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63); info->sqinfo[i].name[63]='\0'; info->sqinfo[i].flags |= SQINFO_NAME; } / * if((len=ajStrGetLen(sqdata->id))) { if(len<64) strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id)); else strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63); info->sqinfo[i].id[63]='\0'; info->sqinfo[i].flags |= SQINFO_ID; } * / strcpy(info->sqinfo[i].id,info->sqinfo[i].name); info->sqinfo[i].flags |= SQINFO_ID; if((len=ajStrGetLen(sqdata->ac))) { if(len<64) strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac)); else strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63); info->sqinfo[i].acc[63]='\0'; info->sqinfo[i].flags |= SQINFO_ACC; } if((len=ajStrGetLen(sqdata->de))) { if(len<127) strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de)); else strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127); info->sqinfo[i].desc[127]='\0'; info->sqinfo[i].flags |= SQINFO_DESC; } if(sqdata->start || sqdata->stop || sqdata ->len) { info->sqinfo[i].start = sqdata->start; info->sqinfo[i].stop = sqdata->stop; info->sqinfo[i].olen = sqdata->len; info->sqinfo[i].flags |= SQINFO_START; info->sqinfo[i].flags |= SQINFO_STOP; info->sqinfo[i].flags |= SQINFO_OLEN; } if(ajStrGetLen(seq->Selexdata->ss)) { info->sqinfo[i].ss = ajCharNewRes(maxlen+1); p = ajStrGetPtr(seq->Selexdata->ss); q = info->sqinfo[i].ss; while((c==*p)) { if(c=='.' || c==' ' || c=='_' || c=='-') *q++ = c; ++p; } *q = '\0'; info->sqinfo[i].flags |= SQINFO_SS; } } } / * } */ for(i=0; i<n; ++i) { info->sqinfo[i].type = kOtherSeq; if(ajSeqsetIsDna(seqset)) info->sqinfo[i].type = kDNA; if(ajSeqsetIsRna(seqset)) info->sqinfo[i].type = kRNA; if(ajSeqsetIsProt(seqset)) info->sqinfo[i].type = kAmino; info->sqinfo[i].flags |= SQINFO_TYPE; seq = ajSeqsetGetseqSeq(seqset,i); p = ajSeqGetSeqC(seq); cnt = 0; while((c=*p)) { if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~')) ++cnt; ++p; } info->sqinfo[i].len = cnt; info->sqinfo[i].flags |= SQINFO_LEN; } *retseqs = seqs; ajStrDel(&tmpstr); return; }