static void acdrelations_readdatfile (AjPFile inf, PEdam *P) { AjPStr line = NULL; const AjPStr tok = NULL; const AjPStr subtok = NULL; AjPStr strtmp = NULL; AjPList strlist = NULL; AjPStr acdtype = NULL; AjPStr relations = NULL; PEdamdat dattmp = NULL; AjPList datlist = NULL; if(!P) ajFatal("Null arg error 1 in acdrelations_readdatfile"); if(!inf) ajFatal("Null arg error 3 in acdrelations_readdatfile"); /* Allocate memory */ line = ajStrNew(); acdtype = ajStrNew(); relations = ajStrNew(); datlist = ajListNew(); /* Read data from file */ while(ajReadline(inf,&line)) { /* Discard comment lines */ if(ajStrPrefixC(line,"#")) continue; /* Tokenise line, delimited by '|'. Parse first token (ACD datatype ) */ ajStrAssignS(&acdtype, ajStrParseC(line, "|")); /* Parse second token (EDAM relations: value ) */ ajStrAssignS(&relations, ajStrParseC(NULL, "|")); /* Parse third token (attribute:value strings block) */ tok = ajStrParseC(NULL, "|"); /* Create new string list */ strlist = ajListstrNew(); /* Tokenise third token itself into tokens delimited by ' ' (space) Parse tokens (individual attribute:value strings)*/ if((subtok=ajStrParseC(tok, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); while((subtok=ajStrParseC(NULL, ";"))) { strtmp = ajStrNew(); ajStrAssignS(&strtmp, subtok); ajStrRemoveWhite(&strtmp); ajListstrPushAppend(strlist, strtmp); } } /* Write PEdamdat structure & push onto list */ dattmp = ajEdamdatNew(); ajStrRemoveWhite(&acdtype); ajStrAssignS(&dattmp->acdtype, acdtype); ajStrAssignS(&dattmp->edam, relations); dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr); ajListPushAppend(datlist, dattmp); /* Clear nodes (but not strings) from string list */ ajListstrFree(&strlist); } /* Write PEdam structure */ ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat)); /* Free memory */ ajStrDel(&line); ajStrDel(&acdtype); ajStrDel(&relations); ajListFree(&datlist); return; }
/* @funcstatic acdrelations_writerelations ************************************ ** ** Writes relations: attribute for an ACD data definition ** The relations: values given in knowntypes.standard have highest precedence, ** then the values given in edamtoacd.dat ** Attribute values for a given datatype in edamtoacd.dat are in order of ** increasing precedence, i.e. the last line is highest and the relations: ** value will be used if all conditions are met. ** ** @param [r] outf [AjPFile] ACD output file ** @param [r] acdtype [AjPStr ] ACD datatype, e.g. "align" ** @param [r] strarr [AjPStr*] All ACD attribute lines (whitespace removed) ** for the the current ACD data item (of type ** acdtype). One line per array element. ** @param [r] n [ajint] Size of strarr ** @param [r] P [PEdam] edam object to write ** @param [r] T [PKtype] ktype object to read ** @return [void] ** @@ ******************************************************************************/ static void acdrelations_writerelations (AjPFile outf, AjPStr acdtype, AjPStr *strarr, ajint n, PEdam P, PKtype T) { ajint i = 0; ajint j = 0; ajint k = 0; ajint nmatch = 0; AjPStr relations = NULL; AjPStr ktype = NULL; /* Value of knowntype: attribute */ AjBool done = ajFalse; AjBool donetype = ajFalse; AjPStr tmpstr = NULL; if(!outf || !acdtype || !strarr || !n || !P) ajFatal("NULL args passed to acdrelations_writerelations"); /* Memory allocation */ relations = ajStrNew(); ktype = ajStrNew(); tmpstr = ajStrNew(); /* Loop through all lines in edamtoacd.dat */ for(i=0; i<P->n ;i++) { /* Found matching datatype */ if(ajStrMatchS(acdtype, P->dat[i]->acdtype)) { /* Copy first relations: string defined for this datatype (default) */ ajStrAssignS(&relations, P->dat[i]->edam); done = ajTrue; i++; /* Check next line in edamtoacd.dat */ for( ; i<P->n; i++) { /* Datatype still matches */ if(ajStrMatchS(acdtype, P->dat[i]->acdtype)) { /* Loop through all required attributes for this datatype */ for(nmatch=0, j=0; j<P->dat[i]->n; j++) { /* Loop through all attribute lines for the data defininition */ for(k=0; k<n; k++) if(ajStrMatchS(P->dat[i]->acdattr[j], strarr[k])) { nmatch++; /* ajFmtPrint("Found match %d: %S:%S\n", nmatch, P->dat[i]->acdattr[j], strarr[k]); */ break; } } /* All attribute values match */ if(nmatch == P->dat[i]->n) ajStrAssignS(&relations, P->dat[i]->edam); /* Should never happen */ else if (nmatch > P->dat[i]->n) ajFatal("Terminal weirdness in acdrelations_writerelations"); } else break; } break; } } /* Check for match of knowntype: attribute against knowntypes.standard. These have higher precedence than the rules defined in edamtoacd.dat */ for(donetype=ajFalse, i=0; i<n; i++) { if(ajStrPrefixC(strarr[i], "knowntype:")) { for(j=0;j<T->n; j++) { /* No check is made on the "Type" column in knowntypes.standard as these are not proper ACD datatype names To check these add if(ajStrMatchS(acdtype, T->dat[j]->acdtype)) */ ajFmtPrintS(&tmpstr, "knowntype:\"%S\"", T->dat[j]->ktype); if(ajStrMatchS(tmpstr, strarr[i])) { ajStrAssignS(&relations, T->dat[j]->edam); donetype=ajTrue; break; } } if(donetype) break; } else continue; } if(!done) ajFatal("No matching datatype (%S) in acdrelations_writerelations", acdtype); /* Write relations: attribute line to file */ ajFmtPrintF(outf, " relations:%S\n", relations); /* Free memory */ ajStrDel(&relations); ajStrDel(&ktype); ajStrDel(&tmpstr); return; }
/* @prog seqnr ************************************************************** ** ** Removes redundancy from DHF files (domain hits files) or other files of ** sequences. ** ****************************************************************************/ int main(int argc, char **argv) { /* Variable declarations */ AjPList in = NULL; /* Names of domain hits files (input). */ AjPStr inname = NULL; /* Full name of the current DHF file. */ AjPFile inf = NULL; /* Current DHF file. */ EmbPHitlist infhits = NULL; /* Hitlist from DHF file */ AjBool dosing = ajFalse; /* Filter using singlet sequences. */ AjPDir singlets = NULL; /* Singlets (input). */ AjBool dosets = ajFalse; /* Filter using sets of sequences. */ AjPDir insets = NULL; /* Sets (input). */ AjPStr mode = NULL; /* Mode of operation */ ajint moden = 0; /* Mode 1: single threshold for redundancy removal, 2: lower and upper thresholds for redundancy removal. */ float thresh = 0.0; /* Threshold for non-redundancy. */ float threshlow = 0.0; /* Threshold (lower limit). */ float threshup = 0.0; /* Threshold (upper limit). */ AjPMatrixf matrix = NULL; /* Substitution matrix. */ float gapopen = 0.0; /* Gap insertion penalty. */ float gapextend = 0.0; /* Gap extension penalty. */ AjPDirout out = NULL; /* Domain hits files (output). */ AjPFile outf = NULL; /* Current DHF file (output). */ AjBool dored = ajFalse; /* True if redundant hits are output. */ AjPDirout outred = NULL; /* DHF files for redundant hits (output).*/ AjPFile redf = NULL; /* Current DHF file redundancy (output). */ AjPStr outname = NULL; /* Name of output file (re-used). */ AjPFile logf = NULL; /* Log file pointer. */ AjBool ok = ajFalse; /* Housekeeping. */ AjPSeqset seqset = NULL; /* Seqset (re-used). */ AjPSeqin seqin = NULL; /* Seqin (re-used). */ AjPList seq_list = NULL; /* Main list for redundancy removal. */ EmbPDmxNrseq seq_tmp = NULL; /* Temp. pointer for making seq_list. */ ajint seq_siz = 0; /* Size of seq_list. */ AjPUint keep = NULL; /* 1: Sequence in seq_list was classed as non-redundant, 0: redundant. */ AjPUint nokeep = NULL; /* Inversion of keep array. */ ajint nseqnr = 0; /* No. non-redundant seqs. in seq_list. */ AjPStr filtername= NULL; /* Name of filter file (re-used). */ AjPFile filterf = NULL; /* Current filter file. */ EmbPHitlist hitlist = NULL; /* Hitlist from input file (re-used). */ AjPScopalg scopalg = NULL; /* Scopalg from input file. */ ajint x = 0; /* Housekeeping. */ /* Read data from acd. */ embInitPV("seqnr",argc,argv,"DOMSEARCH",VERSION); in = ajAcdGetDirlist("dhfinpath"); dosing = ajAcdGetToggle("dosing"); singlets = ajAcdGetDirectory("singletsdir"); dosets = ajAcdGetToggle("dosets"); insets = ajAcdGetDirectory("insetsdir"); mode = ajAcdGetListSingle("mode"); thresh = ajAcdGetFloat("thresh"); threshlow = ajAcdGetFloat("threshlow"); threshup = ajAcdGetFloat("threshup"); matrix = ajAcdGetMatrixf("matrix"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); out = ajAcdGetOutdir("dhfoutdir"); dored = ajAcdGetToggle("dored"); outred = ajAcdGetOutdir("redoutdir"); logf = ajAcdGetOutfile("logfile"); /* Housekeeping. */ filtername = ajStrNew(); outname = ajStrNew(); if(!(ajStrToInt(mode, &moden))) ajFatal("Could not parse ACD node option"); /* Process each DHF (input) in turn. */ while(ajListPop(in,(void **)&inname)) { ajFmtPrint("Processing %S\n", inname); ajFmtPrintF(logf, "//\n%S\n", inname); seq_list = ajListNew(); keep = ajUintNew(); nokeep = ajUintNew(); /**********************************/ /* Open DHF file */ /**********************************/ if((inf = ajFileNewInNameS(inname)) == NULL) ajFatal("Could not open DHF file %S", inname); /* Read DHF file. */ ok = ajFalse; if(!(infhits = embHitlistReadFasta(inf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqsearch_psialigned"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(infhits->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&inf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty input file %S\n", inname); ajFmtPrintF(logf, "Empty input file %S\n", inname); if(infhits) embHitlistDel(&infhits); if(seqset) ajSeqsetDel(&seqset); if(seqin) ajSeqinDel(&seqin); continue; } /* 1. Create list of sequences from the main input directory.. */ if(infhits) { for(x=0; x<infhits->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc,infhits->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,infhits->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } /**********************************/ /* Open singlets filter file */ /**********************************/ if(dosing) { /* Open singlets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(singlets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(singlets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DHF file %S", filtername); ajFmtPrint("Could not open singlets filter file %S", filtername); } else { /* Read DHF file. */ ok = ajFalse; if(!(hitlist = embHitlistReadFasta(filterf))) { ajWarn("embHitlistReadFasta call failed in seqnr"); ajFmtPrintF(logf, "embHitlistReadFasta call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(hitlist->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty singlets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty singlets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files). */ if(hitlist) { for(x=0; x<hitlist->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,hitlist->hits[x]->Acc); ajStrAssignS(&seq_tmp->Seq->Seq,hitlist->hits[x]->Seq); ajListPushAppend(seq_list,seq_tmp); } embHitlistDel(&hitlist); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /**********************************/ /* Open sets filter file */ /**********************************/ if(dosets) { /* Open sets file. */ ajStrAssignS(&filtername, inname); ajFilenameTrimPathExt(&filtername); ajStrInsertS(&filtername, 0, ajDirGetPath(insets)); ajStrAppendC(&filtername, "."); ajStrAppendS(&filtername, ajDirGetExt(insets)); if((filterf = ajFileNewInNameS(filtername)) == NULL) { ajWarn("Could not open DAF file %S", filtername); ajFmtPrint("Could not open sets filter file %S", filtername); } else { /* Read DAF file. */ ok = ajFalse; if(!(ajDmxScopalgRead(filterf, &scopalg))) { ajWarn("ajDmxScopalgRead call failed in seqnr"); ajFmtPrintF(logf, "ajDmxScopalgRead call failed in seqnr\n"); /* Read sequence set instead. */ seqset = ajSeqsetNew(); seqin = ajSeqinNew(); ajSeqinUsa(&seqin, inname); if(!(ajSeqsetRead(seqset, seqin))) ajFatal("SeqsetRead failed in seqnr"); if(ajSeqsetGetSize(seqset)) ok = ajTrue; } else if(scopalg->N) ok = ajTrue; /* Close DHF file. */ ajFileClose(&filterf); /* Process empty DHF files (should never occur). */ if(!ok) { ajWarn("Empty sets filter file %S\n", filtername); ajFmtPrintF(logf, "Empty sets filter file %S\n", filtername); /* No continue this time. */ } /* 2. Add sequences from filter directories to List but mark them up (they are considered in the redundancy calculation but never appear in the output files).. */ if(scopalg) { for(x=0; x<scopalg->N; x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc,scopalg->Codes[x]); ajStrAssignS(&seq_tmp->Seq->Seq,scopalg->Seqs[x]); /* Remove gap char's & whitespace. */ ajStrRemoveGap(&seq_tmp->Seq->Seq); ajListPushAppend(seq_list,seq_tmp); } ajDmxScopalgDel(&scopalg); } else { for(x=0;x<ajSeqsetGetSize(seqset);x++) { AJNEW0(seq_tmp); seq_tmp->Seq = ajSeqNew(); seq_tmp->Garbage = ajTrue; ajStrAssignS(&seq_tmp->Seq->Acc, ajSeqsetGetseqAccS(seqset, x)); ajStrAssignS(&seq_tmp->Seq->Seq, ajSeqsetGetseqSeqS(seqset, x)); ajListPushAppend(seq_list,seq_tmp); } ajSeqsetDel(&seqset); ajSeqinDel(&seqin); } } } /* 4. Identify redundant domains.. */ if(moden == 1) { if((!embDmxSeqNR(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, thresh, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } else { if((!embDmxSeqNRRange(seq_list, &keep, &nseqnr, matrix, gapopen, gapextend, threshlow, threshup, ajTrue))) ajFatal("embDmxSeqNR failure in seqnr"); } seq_siz = ajListGetLength(seq_list); for(x=0; x<seq_siz; x++) if(ajUintGet(keep, x) == 1) ajUintPut(&nokeep, x, 0); else ajUintPut(&nokeep, x, 1); /* Create output files. */ ajStrAssignS(&outname, inname); ajFilenameTrimPathExt(&outname); outf = ajFileNewOutNameDirS(outname, out); if(dored) redf = ajFileNewOutNameDirS(outname, outred); /* 5. Write non-redundant domains to main output directory. 6. If specified, write redundant domains to output directory. */ embHitlistWriteSubsetFasta(outf, infhits, keep); if(dored) embHitlistWriteSubsetFasta(redf, infhits, nokeep); embHitlistDel(&infhits); ajFileClose(&outf); ajFileClose(&redf); ajStrDel(&inname); while(ajListPop(seq_list, (void **) &seq_tmp)) { ajSeqDel(&seq_tmp->Seq); AJFREE(seq_tmp); } ajListFree(&seq_list); ajUintDel(&keep); ajUintDel(&nokeep); } /* Tidy up. */ ajListFree(&in); if(singlets) ajDirDel(&singlets); if(insets) ajDirDel(&insets); ajDiroutDel(&out); if(outred) ajDiroutDel(&outred); ajFileClose(&logf); ajMatrixfDel(&matrix); ajStrDel(&filtername); ajStrDel(&outname); ajStrDel(&mode); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqout outseq = NULL; AjPList list = NULL; AjPSeq seq = NULL; AjPStr insert = NULL; AjPStr seqstr = NULL; AjPStr* seqr = NULL; AjPFile data = NULL; ajint start = 0; ajint length = 0; ajint amount = 0; ajint scmax = 0; ajint extra = 0; embInit("makeprotseq", argc, argv); data = ajAcdGetInfile("pepstatsfile"); insert = ajAcdGetString("insert"); start = ajAcdGetInt("start"); length = ajAcdGetInt("length"); amount = ajAcdGetInt("amount"); outseq = ajAcdGetSeqoutall("outseq"); list = ajListstrNew(); /* this is checked by acd if(amount <=0 || length <= 0) ajFatal("Amount or length is 0 or less. " "Unable to create any sequences"); */ /* if insert, make sure sequence is large enough */ if(ajStrGetLen(insert)) { length -= ajStrGetLen(insert); /* start= start <= 1 ? 0 : --start; */ /* checked in acd */ start--; if(length <= 0) ajFatal("Sequence smaller than inserted part. " "Unable to create sequences."); } /* make the list of AjPStr to be used in sequence creation */ if(data) { ajDebug("Distribution datafile '%s' given checking type\n", ajFileGetPrintnameC(data)); seqstr = ajStrNew(); ajReadlineTrim(data,&seqstr); if(ajStrFindC(seqstr,"PEPSTATS") == 0) { makeprotseq_parse_pepstats(&list,data); } else { ajWarn("Not pepstats file. Making completely random sequences."); makeprotseq_default_chars(&list); } ajStrDel(&seqstr); ajFileClose(&data); } else makeprotseq_default_chars(&list); /* if insert, make sure type is correct */ /* typecheking code is not working, uncomment and test after it is if(ajStrGetLen(insert)) { seqstr = ajStrNew(); if(prot) ajStrAssignC(&seqstr,"pureprotein"); if(!ajSeqTypeCheckS(&insert,seqstr)) ajFatal("Insert not the same sequence type as sequence itself."); ajStrDel(&seqstr); } */ /* array allows fast creation of a sequences */ scmax = (ajuint) ajListstrToarray(list,&seqr); if(!scmax) ajFatal("No strings in list. No characters to make the sequence."); ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n", scmax,extra,seqr[0]); ajRandomSeed(); while(amount-- > 0) { seqstr = makeprotseq_random_sequence(seqr,scmax,length); if(ajStrGetLen(insert)) ajStrInsertS(&seqstr,start,insert); ajStrFmtLower(&seqstr); seq = ajSeqNew(); ajSeqAssignSeqS(seq, seqstr); ajSeqSetProt(seq); ajSeqoutWriteSeq(outseq, seq); ajSeqDel(&seq); ajStrDel(&seqstr); } ajSeqoutClose(outseq); ajSeqoutDel(&outseq); ajListstrFreeData(&list); ajStrDel(&insert); AJFREE(seqr); embExit(); return 0; }
static void domainalign_ProcessStampFile(AjPStr in, AjPStr out, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ ajint blk = 1; /* Count of the current block in the input file. Block 1 is the numbering and protein sequences, Block 2 is the secondary structure, Block 3 is the Very/Less/Post similar records*/ AjBool ok = ajFalse; /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessStampFile"); /* Start of code for reading input file. Ignore everything up to first line beginning with 'Number'. */ while((ajReadlineTrim(inf,&line))) { /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) { ok = ajTrue; break; } } /* Read rest of input file. */ if(ok) { /* Write DOMAIN classification records to file. */ if(!(outf=ajFileNewOutNameS(out))) ajFatal("Could not open output file in domainalign_ProcessStampFile"); if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ", 75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ", 75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75, " \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75, " \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX", domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } while((ajReadlineTrim(inf,&line))) { /* Increment counter for block of file. */ if((ajStrGetCharPos(line, 1)=='\0')) { blk++; if(blk==4) blk=1; continue; } /* Block of numbering line and protein sequences. */ if(blk==1) { /* Print the number line out as it is. */ if(ajStrPrefixC(line,"Number")) ajFmtPrintF(outf,"\n# %7s %S\n"," ", line); else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S", &temp1); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment " "with 'X'\n"); ajStrFmtUpper(&temp3); /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-15S%7d %S%7d\n", temp2, 0, temp3, 0); } } /* Secondary structure filled with '????' (unwanted). */ else if(blk==2) { continue; } /* Similarity lines. */ else { if(ajStrPrefixC(line,"Post")) { /* Read the sequence. */ ajStrAssignSubS(&temp3, line, 13, 69); /* Write post similar line out. */ ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ", temp3); } /* Ignore Very and Less similar lines. */ else continue; } } } else /* ok == ajFalse. */ { ajWarn("\n***********************************************\n" "* STAMP was called but output file was EMPTY! *\n" "* NO OUTPUT FILE GENERATED FOR THIS NODE. *\n" "***********************************************\n"); ajFmtPrintF(logf, "STAMP called but output file empty. " "No output file for this node!"); } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
/* Function: include_alignment() * Date: SRE, Sun Jul 5 15:25:13 1998 [St. Louis] * * Purpose: Given the name of a multiple alignment file, * align that alignment to the HMM, and add traces * to an existing array of traces. If do_mapped * is TRUE, we use the HMM's map file. If not, * we use P7ViterbiAlignAlignment(). * * Args: seqfile - name of alignment file * hmm - model to align to * do_mapped- TRUE if we're to use the HMM's alignment map * rsq - RETURN: array of rseqs to add to * dsq - RETURN: array of dsq to add to * sqinfo - RETURN: array of SQINFO to add to * tr - RETURN: array of traces to add to * nseq - RETURN: number of seqs * * Returns: new, realloc'ed arrays for rsq, dsq, sqinfo, tr; nseq is * increased to nseq+ainfo.nseq. */ void include_alignment(char *seqfile, struct plan7_s *hmm, int do_mapped, char ***rsq, char ***dsq, SQINFO **sqinfo, struct p7trace_s ***tr, int *nseq) { int format; /* format of alignment file */ char **aseq; /* aligned seqs */ char **newdsq; char **newrseq; AINFO ainfo; /* info that goes with aseq */ int idx; /* counter over aseqs */ struct p7trace_s *master; /* master trace */ struct p7trace_s **addtr; /* individual traces for aseq */ if (! SeqfileFormat(seqfile, &format, NULL)) switch (squid_errno) { case SQERR_NOFILE: ajFatal("Alignment file %s could not be opened for reading", seqfile); /*FALLTHRU*/ /* a white lie to shut lint up */ case SQERR_FORMAT: default: ajFatal("Failed to determine format of alignment file %s", seqfile); } /* read the alignment from file */ if (! ReadAlignment(seqfile, format, &aseq, &ainfo)) ajFatal("Failed to read aligned sequence file %s", seqfile); for (idx = 0; idx < ainfo.nseq; idx++) s2upper(aseq[idx]); /* Verify checksums before mapping */ if (do_mapped && GCGMultchecksum(aseq, ainfo.nseq) != hmm->checksum) ajFatal("The checksums for alignment file %s and the HMM alignment map don't match.", seqfile); /* Get a master trace */ if (do_mapped) master = MasterTraceFromMap(hmm->map, hmm->M, ainfo.alen); else master = P7ViterbiAlignAlignment(aseq, &ainfo, hmm); /* convert to individual traces */ ImposeMasterTrace(aseq, ainfo.nseq, master, &addtr); /* add those traces to existing ones */ *tr = MergeTraceArrays(*tr, *nseq, addtr, ainfo.nseq); /* additional bookkeeping: add to dsq, sqinfo */ *rsq = ReallocOrDie((*rsq), sizeof(char *) * (*nseq + ainfo.nseq)); DealignAseqs(aseq, ainfo.nseq, &newrseq); for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++) (*rsq)[idx] = newrseq[idx - (*nseq)]; free(newrseq); *dsq = ReallocOrDie((*dsq), sizeof(char *) * (*nseq + ainfo.nseq)); DigitizeAlignment(aseq, &ainfo, &newdsq); for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++) (*dsq)[idx] = newdsq[idx - (*nseq)]; free(newdsq); /* unnecessarily complex, but I can't be bothered... */ *sqinfo = ReallocOrDie((*sqinfo), sizeof(SQINFO) * (*nseq + ainfo.nseq)); for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++) SeqinfoCopy(&((*sqinfo)[idx]), &(ainfo.sqinfo[idx - (*nseq)])); *nseq = *nseq + ainfo.nseq; /* Cleanup */ P7FreeTrace(master); FreeAlignment(aseq, &ainfo); /* Return */ return; }
/* @funcstatic newcoils_read_matrix ******************************************* ** ** Reads the matrix and stores in a hept_pref structure ** ** @param [u] inf [AjPFile] matrix input file ** @return [struct hept_pref*] Matrix data for heptad preference ******************************************************************************/ static struct hept_pref* newcoils_read_matrix(AjPFile inf) { ajint i; ajint j; ajint pt; ajint aa_len; ajint win; float m_g; float sd_g; float m_cc; float sd_cc; float sc; float hept[NCHEPTAD]; AjPStr buff; const char *pbuff; struct hept_pref *h; buff = ajStrNew(); aa_len = strlen(NCAAs); AJNEW(h); AJCNEW(h->m,aa_len); for(i=0; i<aa_len; ++i) { AJCNEW(h->m[i],NCHEPTAD); for(j=0; j<NCHEPTAD; ++j) h->m[i][j] = -1; } AJNEW(h->f); h->n = 0; h->smallest = 1.0; while(ajReadlineTrim(inf,&buff)) { pbuff = ajStrGetPtr(buff); if(*pbuff != '%') { if((strncmp(pbuff,"uw ",3)==0) || (strncmp(pbuff,"w ",2)==0)) { i = h->n; if(strncmp(pbuff,"uw ",3)==0) h->f[i].w = 0; else h->f[i].w = 1; ajFmtScanS(buff,"%*s %d %f %f %f %f %f",&win,&m_cc, &sd_cc,&m_g,&sd_g,&sc); h->f[i].win = win; h->f[i].m_cc = (float)m_cc; h->f[i].sd_cc = (float)sd_cc; h->f[i].m_g = (float)m_g; h->f[i].sd_g = (float)sd_g; h->f[i].sc = (float)sc; h->n++; AJCRESIZE(h->f,(h->n)+1); if((h->n)>=9) ajFatal("Too many window parms in matrix file\n"); } else if(*pbuff>='A' && *pbuff<='Z') { /* AA data */ pt = (int)(pbuff[0]-'A'); if(h->m[pt][0]==-1) { ajFmtScanS(buff,"%*s%f%f%f%f%f%f%f",&hept[0], &hept[1],&hept[2],&hept[3],&hept[4], &hept[5],&hept[6]); for(i=0; i<NCHEPTAD; ++i) { h->m[pt][i] = (float)hept[i]; if(h->m[pt][i]>0) { if(h->m[pt][i]<h->smallest) h->smallest = h->m[pt][i]; } else h->m[pt][i]=-1; /* Don't permit zero values */ } } else ajWarn("multiple entries for AA %c in matrix file\n", *pbuff); } else { ajWarn("strange characters in matrix file\n"); ajWarn("Ignoring line: %S\n",buff); } } } ajStrDel(&buff); return h; }
/* @funcstatic domainalign_ProcessTcoffeeFile ********************************* ** ** Parses tcoffee output. ** ** @param [r] in [AjPStr] Name of TCOFFEE input file ** @param [r] align [AjPStr] Name of sequence alignment file for output ** @param [r] domain [AjPDomain] Domain being aligned ** @param [r] noden [ajint] Node-level of alignment** ** @param [r] logf [AjPFile] Log file ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_ProcessTcoffeeFile(AjPStr in, AjPStr align, AjPDomain domain, ajint noden, AjPFile logf) { AjPFile outf = NULL; /* Output file pointer. */ AjPFile inf = NULL; /* Input file pointer. */ AjPStr temp1 = NULL; /* Temporary string. */ AjPStr temp2 = NULL; /* Temporary string. */ AjPStr temp3 = NULL; /* Temporary string. */ AjPStr line = NULL; /* Line of text from input file. */ /* Initialise strings. */ line = ajStrNew(); temp1 = ajStrNew(); temp2 = ajStrNew(); temp3 = ajStrNew(); /* Open input and output files. */ if(!(inf=ajFileNewInNameS(in))) ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile"); if(!(outf=ajFileNewOutNameS(align))) ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile"); /*Write DOMAIN classification records to file*/ if((domain->Type == ajSCOP)) { ajFmtPrintF(outf,"# TY SCOP\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Scop->Class); ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } else { ajFmtPrintF(outf,"# TY CATH\n# XX\n"); ajFmtPrintF(outf,"# CL %S",domain->Cath->Class); ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP ",75," \t\n\r"); ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF ",75," \t\n\r"); ajFmtPrintF(outf,"# XX\n"); } if((domain->Type == ajSCOP)) { if(noden==1) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Class); else if(noden==2) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Fold); else if(noden==3) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Scop->Sunid_Superfamily); else if(noden==4) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Scop->Sunid_Family); else ajFatal("Node number error in domainalign_ProcessStampFile"); } else { if(noden==5) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Class_Id); else if(noden==6) ajFmtPrintF(outf,"# SI %d\n# XX\n", domain->Cath->Arch_Id); else if(noden==7) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Topology_Id); else if(noden==8) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Superfamily_Id); else if(noden==9) ajFmtPrintF(outf,"# SI %d\n# XX\n",domain->Cath->Family_Id); else ajFatal("Node number error in domainalign_ProcessStampFile"); } /* Start of code for reading input file. */ /*Ignore everything up to first line beginning with 'Number'*/ while((ajReadlineTrim(inf,&line))) /* ajFileReadLine will trim the tailing \n. */ if((ajStrGetCharPos(line, 1)=='\0')) break; /* Read rest of input file. */ while((ajReadlineTrim(inf,&line))) { if((ajStrGetCharPos(line, 1)=='\0')) continue; /* Print the number line out as it is. */ else if(ajStrPrefixC(line,"CLUSTAL")) continue; else if(ajStrPrefixC(line," ")) ajFmtPrintF(outf,"\n"); /* write out a block of protein sequences. */ else { /* Read only the 7 characters of the domain identifier code in. */ ajFmtScanS(line, "%S %S", &temp1,&temp3); ajStrAssignSubS(&temp2, temp1, 0, 6); /* Read the sequence ajStrAssignSubS(&temp3, line, 13, 69); ajStrExchangeSetCC(&temp3, " ", "X"); ajStrFmtUpper(&temp3);*/ /* Write domain id code and sequence out. */ ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3); } } /* Clean up and close input and output files. */ ajFileClose(&outf); ajFileClose(&inf); ajStrDel(&line); ajStrDel(&temp1); ajStrDel(&temp2); ajStrDel(&temp3); /* All done. */ return; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq = NULL; AjPFile inf = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPStr name = NULL; AjPStr mname = NULL; AjPStr tname = NULL; AjPStr pname = NULL; AjPStr line = NULL; AjPStr cons = NULL; AjPStr m = NULL; AjPStr n = NULL; AjPAlign align= NULL; /* JISON, replaces AjPOutfile outf */ ajint type; ajint begin; ajint end; ajulong len; ajint i; ajint j; float **fmatrix=NULL; ajint mlen; float maxfs; ajint thresh; float gapopen; float gapextend; float opencoeff; float extendcoeff; const char *p; ajulong maxarr = 1000; ajulong alen; float *path; ajint *compass; size_t stlen; embInit("prophet", argc, argv); seqall = ajAcdGetSeqall("sequence"); inf = ajAcdGetInfile("infile"); opencoeff = ajAcdGetFloat("gapopen"); extendcoeff = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); /*JISON replacing outfile */ opencoeff = ajRoundFloat(opencoeff, 8); extendcoeff = ajRoundFloat(extendcoeff, 8); substr = ajStrNew(); name = ajStrNew(); mname = ajStrNew(); tname = ajStrNew(); line = ajStrNew(); m = ajStrNewC(""); n = ajStrNewC(""); type = prophet_getType(inf,&tname); if(!type) ajFatal("Unrecognised profile/matrix file format"); prophet_read_profile(inf,&pname,&mname,&mlen,&gapopen,&gapextend,&thresh, &maxfs, &cons); ajAlignSetMatrixName(align, mname); AJCNEW(fmatrix, mlen); for(i=0;i<mlen;++i) { AJCNEW(fmatrix[i], AZ); if(!ajReadlineTrim(inf,&line)) ajFatal("Missing matrix line"); p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t"); for(j=0;j<AZ;++j) { sscanf(p,"%f",&fmatrix[i][j]); p = ajSysFuncStrtok(NULL," \t"); } } AJCNEW(path, maxarr); AJCNEW(compass, maxarr); while(ajSeqallNext(seqall, &seq)) { begin = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignC(&name,ajSeqGetNameC(seq)); strand = ajSeqGetSeqCopyS(seq); ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1); len = ajStrGetLen(substr); if(len > (ULONG_MAX/(ajulong)(mlen+1))) ajFatal("Sequences too big. Try 'supermatcher'"); alen = len*mlen; if(alen>maxarr) { stlen = (size_t) alen; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=alen; } ajStrAssignC(&m,""); ajStrAssignC(&n,""); /* JISON used to be prophet_scan_profile(substr,pname,name,mlen,fmatrix, outf,cons,opencoeff, extendcoeff,path,compass,&m,&n,len); */ /* JISON new call and reset align */ prophet_scan_profile(substr,name,pname,mlen,fmatrix, align,cons,opencoeff, extendcoeff,path,compass,&m,&n,(ajint)len); ajAlignReset(align); ajStrDel(&strand); } for(i=0;i<mlen;++i) AJFREE (fmatrix[i]); AJFREE (fmatrix); AJFREE(path); AJFREE(compass); ajStrDel(&line); ajStrDel(&cons); ajStrDel(&name); ajStrDel(&pname); ajStrDel(&mname); ajStrDel(&tname); ajStrDel(&substr); ajStrDel(&m); ajStrDel(&n); ajSeqDel(&seq); ajFileClose(&inf); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); embExit(); return 0; }
int main(int argc, char **argv) { AjPStr cl = NULL; AjPSeqset seqset = NULL; AjPSeqout seqout = NULL; AjPStr stmp = NULL; AjPStr email = NULL; AjPStr fmt = NULL; AjPStr trtab = NULL; const AjPStr ofn = NULL; AjPStr *applist = NULL; AjBool crc = ajFalse; AjBool alt = ajFalse; AjBool iprlook = ajFalse; AjBool goterms = ajFalse; ajint trlen = 0; ajuint i = 0; ajuint lcnt = 0; AjPStr fn = NULL; AjPFile outf = NULL; embInitPV("eiprscan", argc, argv, "IPRSCAN", VERSION); seqset = ajAcdGetSeqset("sequence"); email = ajAcdGetString("email"); crc = ajAcdGetBoolean("crc"); applist = ajAcdGetList("appl"); fmt = ajAcdGetListSingle("format"); trtab = ajAcdGetListSingle("trtable"); trlen = ajAcdGetInt("trlen"); alt = ajAcdGetBoolean("altjobs"); iprlook = ajAcdGetBoolean("iprlookup"); goterms = ajAcdGetBoolean("goterms"); outf = ajAcdGetOutfile("outfile"); stmp = ajStrNew(); cl = ajStrNewC("iprscan -cli"); fn = ajStrNew(); ajFilenameSetTempname(&fn); seqout = ajSeqoutNew(); if(!ajSeqoutOpenFilename(seqout, fn)) ajFatal("Cannot open temporary file %S",fn); ajSeqoutSetFormatC(seqout, "fasta"); ajSeqoutWriteSet(seqout,seqset); ajSeqoutClose(seqout); ajFmtPrintS(&stmp," -i %S",fn); ajStrAppendS(&cl,stmp); if(!ajSeqsetIsProt(seqset)) ajStrAppendC(&cl," -seqtype n"); if(!crc) ajStrAppendC(&cl," -nocrc"); if(ajStrGetLen(email)) { ajFmtPrintS(&stmp," -email %S",email); ajStrAppendS(&cl,stmp); } i = 0; lcnt = 0; while(applist[i]) { ++lcnt; ++i; } i = 0; while(applist[i]) { if(ajStrMatchC(applist[i],"all")) { if(lcnt != 1) ajFatal("Cannot specify 'all' with multiple " "applications"); ++i; continue; } ajFmtPrintS(&stmp," -appl %S",applist[i]); ajStrAppendS(&cl,stmp); ++i; } ajFmtPrintS(&stmp," -format %S",fmt); ajStrAppendS(&cl,stmp); ajFmtPrintS(&stmp," -trtable %S",trtab); ajStrAppendS(&cl,stmp); ajFmtPrintS(&stmp," -trlen %d",trlen); ajStrAppendS(&cl,stmp); if(alt) ajStrAppendC(&cl," -altjobs"); if(iprlook) ajStrAppendC(&cl," -iprlookup"); if(goterms) ajStrAppendC(&cl," -goterms"); ofn = ajFileGetNameS(outf); ajFmtPrintS(&stmp," -o %S",ofn); ajFileClose(&outf); ajStrAppendS(&cl,stmp); #if 0 ajFmtPrint("%S\n",cl); #endif #if 1 system(ajStrGetPtr(cl)); #endif ajSysFileUnlink(fn); ajStrDel(&cl); ajStrDel(&fn); ajStrDel(&stmp); ajStrDel(&email); ajStrDel(&fmt); ajStrDel(&trtab); ajStrDelarray(&applist); ajSeqoutDel(&seqout); ajSeqsetDel(&seqset); embExit(); return 0; }
static void prophet_read_profile(AjPFile inf, AjPStr *name, AjPStr *mname, ajint *mlen, float *gapopen, float *gapextend, ajint *thresh, float *maxs, AjPStr *cons) { const char *p; AjPStr line = NULL; line = ajStrNew(); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Name",4)) ajFatal("Incorrect profile/matrix file format"); p = ajSysFuncStrtok(p," \t"); p = ajSysFuncStrtok(NULL," \t"); ajStrAssignC(name,p); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Matrix",6)) ajFatal("Incorrect profile/matrix file format"); p = ajSysFuncStrtok(p," \t"); p = ajSysFuncStrtok(NULL," \t"); ajStrAssignC(mname,p); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Length",6)) ajFatal("Incorrect profile/matrix file format"); sscanf(p,"%*s%d",mlen); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Max_score",9)) ajFatal("Incorrect profile/matrix file format"); sscanf(p,"%*s%f",maxs); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Threshold",9)) ajFatal("Incorrect profile/matrix file format"); sscanf(p,"%*s%d",thresh); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Gap_open",8)) ajFatal("Incorrect profile/matrix file format"); sscanf(p,"%*s%f",gapopen); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Gap_extend",10)) ajFatal("Incorrect profile/matrix file format"); sscanf(p,"%*s%f",gapextend); if(!ajReadlineTrim(inf,&line)) ajFatal("Premature EOF in profile file"); p = ajStrGetPtr(line); if(strncmp(p,"Consensus",9)) ajFatal("Incorrect profile/matrix file format"); p = ajSysFuncStrtok(p," \t\n"); p = ajSysFuncStrtok(NULL," \t\n"); ajStrAssignC(cons,p); ajStrDel(&line); return; }
int main(int argc, char **argv) { AjPSeqset seqset = NULL; AjPStr cl = NULL; AjPSeqout seqout = NULL; float thresh = 0.; AjBool netphos = ajFalse; AjPStr format = NULL; AjBool plot = ajFalse; AjPStr fn = NULL; AjPStr stmp = NULL; AjPStr outfname = NULL; embInitPV("eyinoyang", argc, argv, "CBSTOOLS",VERSION); seqset = ajAcdGetSeqset("sequence"); outfname= ajAcdGetOutfileName("outfile"); plot = ajAcdGetBoolean("plot"); netphos = ajAcdGetBoolean("netphos"); thresh = ajAcdGetFloat("threshold"); format = ajAcdGetListSingle("format"); cl = ajStrNewS(ajAcdGetpathC("yinOyang")); fn = ajStrNew(); stmp = ajStrNew(); ajFilenameSetTempname(&fn); seqout = ajSeqoutNew(); if(!ajSeqoutOpenFilename(seqout, fn)) ajFatal("Cannot open temporary file %S",fn); ajSeqoutSetFormatC(seqout, "fasta"); ajSeqoutWriteSet(seqout,seqset); ajSeqoutClose(seqout); if(ajStrMatchC(format,"short")) ajStrAppendC(&cl," -f s"); else if(ajStrMatchC(format,"long")) ajStrAppendC(&cl," -f l"); if(plot) ajStrAppendC(&cl," -g"); if(netphos) ajStrAppendC(&cl," -y"); if(netphos) { ajFmtPrintS(&stmp," -t %f",thresh); ajStrAppendS(&cl,stmp); } ajFmtPrintS(&stmp," %S",fn); ajStrAppendS(&cl,stmp); #if 0 ajFmtPrint("%`S\n",cl); #endif #if 1 ajSysExecOutnameAppendS(cl, outfname); #endif ajSysFileUnlinkS(fn); ajStrDel(&cl); ajStrDel(&fn); ajStrDel(&stmp); ajStrDel(&format); ajSeqoutDel(&seqout); ajSeqsetDel(&seqset); ajStrDel(&outfname); embExit(); return 0; }
/* @header ******************************************************************** ** ******************************************************************************/ int main(int argc, char **argv) { ajFatal("Sorry no PLplot was found on compilation hence NO graph\n"); return 0; }
int main(ajint argc, char **argv) { /* Variable declarations */ AjPFile inf_edam = NULL; /* Name of EDAM data (input) file */ AjPFile acdoutf = NULL; /* Name of ACD (output) file */ AjPList acdinlist = NULL; /* List of ACD file names (input) */ AjPFile acdinf = NULL; /* Name of ACD (input) file */ AjPStr acdname = NULL; /* Name of current acd file */ AjPDirout acdoutdir = NULL; /* Directory for ACD files (output) */ AjPFile inf_ktype = NULL; /* Name of knowntypes.standard file */ PEdam edam = NULL; /* EDAM relations data */ PKtype ktype = NULL; /* Data from knowntype.standard */ /* Read data from acd. */ embInitP("acdrelations",argc,argv,"MYEMBOSS"); /* ACD data handling */ inf_edam = ajAcdGetDatafile("infileedam"); inf_ktype = ajAcdGetInfile("infiletype"); acdinlist = ajAcdGetDirlist("indir"); acdoutdir = ajAcdGetOutdir("outdir"); /* Read data file */ edam = ajEdamNew(); ktype = ajKtypeNew(); acdrelations_readdatfile(inf_edam, &edam); acdrelations_readtypefile(inf_ktype, &ktype); /* Main application loop. Process each ACD file in turn. */ while(ajListPop(acdinlist,(void **)&acdname)) { if(!(acdinf = ajFileNewInNameS(acdname))) ajFatal("Cannot open input ACD file %S\n", acdname); ajFilenameTrimPath(&acdname); if(!(acdoutf = ajFileNewOutNameDirS(acdname, acdoutdir))) ajFatal("Cannot open output ACD file %S\n", acdname); acdrelations_procacdfile(acdinf, acdoutf, edam, ktype); ajFileClose(&acdinf); ajFileClose(&acdoutf); } /* Clean up and exit */ ajFileClose(&inf_edam); ajFileClose(&inf_ktype); ajListFree(&acdinlist); ajDiroutDel(&acdoutdir); ajEdamDel(&edam); ajExit(); return 0; }
int main(int argc, char **argv) { embInitPV("kclustalw", argc, argv, "KBWS", "1.0.8"); struct soap soap; struct ns1__clustalwInputParams params; char* jobid; char* result; AjPSeqall seqall; AjPSeq seq; AjPFile outf; AjPFile outf_dnd; AjPStr substr; AjPStr inseq = NULL; AjPStr alignment; AjPStr output; AjPStr matrix; AjPStr outorder; ajint ktup; ajint window; ajint gapopen; float gapext; ajint gapdist; AjBool endgaps; ajint pairgap; ajint topdiags; AjPStr score; AjBool tossgaps; AjBool kimura; AjPStr outputtree; AjBool tree; AjBool quicktree; AjBool align; AjPStr clustering; ajint numiter; AjPStr iteration; alignment = ajAcdGetString("alignment"); output = ajAcdGetString("output"); matrix = ajAcdGetString("matrix"); outorder = ajAcdGetString("outorder"); ktup = ajAcdGetInt("ktup"); window = ajAcdGetInt("window"); gapopen = ajAcdGetInt("gapopen"); gapext = ajAcdGetFloat("gapext"); gapdist = ajAcdGetInt("gapdist"); endgaps = ajAcdGetBoolean("endgaps"); pairgap = ajAcdGetInt("pairgap"); topdiags = ajAcdGetInt("topdiags"); score = ajAcdGetString("score"); tossgaps = ajAcdGetBoolean("tossgaps"); kimura = ajAcdGetBoolean("kimura"); outputtree = ajAcdGetString("outputtree"); tree = ajAcdGetBoolean("tree"); quicktree = ajAcdGetBoolean("quicktree"); align = ajAcdGetBoolean("align"); clustering = ajAcdGetString("clustering"); numiter = ajAcdGetInt("numiter"); iteration = ajAcdGetString("iteration"); seqall = ajAcdGetSeqall("seqall"); outf = ajAcdGetOutfile("outfile"); outf_dnd = ajAcdGetOutfile("dndoutfile"); params.alignment = ajCharNewS(alignment); params.output = ajCharNewS(output); params.matrix = ajCharNewS(matrix); params.outorder = ajCharNewS(outorder); params.ktup = ktup; params.window = window; params.gapopen = gapopen; params.gapext = gapext; params.gapdist = gapdist; if (endgaps) { params.endgaps = xsd__boolean__true_; } else { params.endgaps = xsd__boolean__false_; } params.pairgap = pairgap; params.topdiags = topdiags; params.score = ajCharNewS(score); if (tossgaps) { params.tossgaps = xsd__boolean__true_; } else { params.tossgaps = xsd__boolean__false_; } if (kimura) { params.kimura = xsd__boolean__true_; } else { params.kimura = xsd__boolean__false_; } params.outputtree = ajCharNewS(outputtree); if (tree) { params.tree = xsd__boolean__true_; } else { params.tree = xsd__boolean__false_; } if (quicktree) { params.quicktree = xsd__boolean__true_; } else { params.quicktree = xsd__boolean__false_; } if (align) { params.align = xsd__boolean__true_; } else { params.align = xsd__boolean__false_; } params.clustering = ajCharNewS(clustering); params.numiter = numiter; params.iteration = ajCharNewS(iteration); AjPStr tmp = NULL; AjPStr tmpFileName = NULL; AjPSeqout fil_file; AjPStr line = NULL; /* if "AjPStr line; -> ajReadline is not success!" */ AjPStr sizestr = NULL; ajint thissize; ajint nb = 0; AjBool are_prot = ajFalse; ajint size = 0; AjPFile infile; tmp = ajStrNewC("fasta"); fil_file = ajSeqoutNew(); tmpFileName = getUniqueFileName(); if( !ajSeqoutOpenFilename(fil_file, tmpFileName) ) { embExitBad(); } ajSeqoutSetFormatS(fil_file, tmp); while (ajSeqallNext(seqall, &seq)) { if (!nb) { are_prot = ajSeqIsProt(seq); } ajSeqoutWriteSeq(fil_file, seq); ++nb; } ajSeqoutClose(fil_file); ajSeqoutDel(&fil_file); if (nb < 2) { ajFatal("Multiple alignments need at least two sequences"); } infile = ajFileNewInNameS(tmpFileName); while (ajReadline(infile, &line)) { ajStrAppendS(&inseq,line); ajStrAppendC(&inseq,"\n"); } soap_init(&soap); char* in0; in0 = ajCharNewS(inseq); if ( soap_call_ns1__runClustalw( &soap, NULL, NULL, in0, ¶ms, &jobid ) == SOAP_OK ) { fprintf(stderr,"Jobid: %s\n",jobid); } else { soap_print_fault(&soap, stderr); } int check = 0; while ( check == 0 ) { if ( soap_call_ns1__checkStatus( &soap, NULL, NULL, jobid, &check ) == SOAP_OK ) { fprintf(stderr,"*"); } else { soap_print_fault(&soap, stderr); } sleep(3); } fprintf(stderr,"\n"); char* type; type = "out"; if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) { substr = ajStrNewC(result); fprintf(stdout, "%s\n", ajStrGetPtr(substr)); } else { soap_print_fault(&soap, stderr); } type = "aln"; if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) { substr = ajStrNewC(result); ajFmtPrintF(outf,"%S\n",substr); } else { soap_print_fault(&soap, stderr); } type = "dnd"; if(soap_call_ns1__getMultiResult( &soap, NULL, NULL, jobid, type, &result )== SOAP_OK) { substr = ajStrNewC(result); ajFmtPrintF(outf_dnd,"%S\n",substr); } else { soap_print_fault(&soap, stderr); } ajSysFileUnlinkS(tmpFileName); soap_destroy(&soap); soap_end(&soap); soap_done(&soap); ajFileClose(&outf_dnd); ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&substr); embExit(); return 0; }
/* @funcstatic domainalign_stamp ********************************************** ** ** Call STAMP and process files. ** ** @param [r] prevdomain [AjPDomain] Previous domain. ** @param [r] domain [AjPDomain] This domain. ** @param [r] daf [AjPDirout] Domain alignment files. ** @param [r] super [AjPDirout] Superimposition files. ** @param [r] singlets [AjPDirout] Singlet files. ** @param [r] align [AjPStr] Align. ** @param [r] alignc [AjPStr] Alignc. ** @param [r] dom [AjPStr] Dom. ** @param [r] name [AjPStr] Name. ** @param [r] set [AjPStr] Name of set file. ** @param [r] scan [AjPStr] Name of scan file. ** @param [r] sort [AjPStr] Name of sort file. ** @param [r] log [AjPStr] Lof file name. ** @param [r] out [AjPStr] Out file name. ** @param [r] keepsinglets [AjBool] Keep singlet sequences or not. ** @param [r] moden [ajint] Mode number. ** @param [r] noden [ajint] Node number. ** @param [r] nset [ajint] Number in set. ** @param [r] logf [AjPFile] Lof file. ** ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_stamp(AjPDomain prevdomain, AjPDomain domain, AjPDirout daf, AjPDirout super, AjPDirout singlets, AjPStr align, AjPStr alignc, AjPStr dom, AjPStr name, AjPStr set, AjPStr scan, AjPStr sort, AjPStr log, AjPStr out, AjBool keepsinglets, ajint moden, ajint noden, ajint nset, AjPFile logf) { AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPFile clusterf = NULL; /* File pointer for log file. */ ajint ncluster = 0; /* Counter for the number of clusters. */ AjPStr line = NULL; /* Holds a line from the log file. */ AjPRegexp rexp = NULL; /* For parsing no. of clusters in log file */ AjPStr temp = NULL; /* A temporary string. */ ajint x = 0; /* Loop counter. */ exec = ajStrNew(); line = ajStrNew(); temp = ajStrNew(); rexp = ajRegCompC("^(Cluster:)"); ajDebug("domainalign_stamp name: '%S'\n", name); /* Call STAMP. */ ajFmtPrintS(&exec, "stamp -l %S -s -n 2 -slide 5 -prefix %S -d %S", dom, name, set); ajFmtPrint("\n%S\n\n", exec); ajSysSystem(exec); ajFmtPrintS(&exec, "sorttrans -f %S -s Sc 2.5", scan); ajFmtPrint("\n%S > %S\n\n", exec, sort); ajSysSystemOut(exec, sort); ajFmtPrintS(&exec, "stamp -l %S -prefix %S", sort, name); ajFmtPrint("\n%S > %S\n\n", exec, log); ajSysSystemOut(exec, log); ajFmtPrintS(&exec, "transform -f %S -g -o %S", sort, alignc); ajFmtPrint("\n%S\n\n", exec); ajSysSystem(exec); /* Count the number of clusters in the log file. */ if(!(clusterf=ajFileNewInNameS(log))) ajFatal("Could not open log file '%S'\n", log); ncluster=0; while(ajReadlineTrim(clusterf,&line)) if(ajRegExec(rexp,line)) ncluster++; ajFileClose(&clusterf); ajDebug("ncluster: %d\n", ncluster); /* Call STAMP ... calculate two fields for structural equivalence using threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */ ajFmtPrintS(&exec,"poststamp -f %S.%d -min 0.5", name, ncluster); ajFmtPrint("%S\n\n", exec); ajSysSystem(exec); /* Call STAMP ... convert block format alignment into clustal format. */ ajFmtPrintS(&exec,"ver2hor -f %S.%d.post", name, ncluster); ajFmtPrint("%S > %S\n\n", exec, out); ajSysSystemOut(exec, out); /* Process STAMP alignment file and generate alignment file for output. */ domainalign_ProcessStampFile(out, align, prevdomain, noden, logf); /* Remove all temporary files. */ for(x=1;x<ncluster+1;x++) { ajFmtPrintS(&temp, "%S.%d", name, x); ajSysFileUnlink(temp); } ajFmtPrintS(&temp, "%S.%d.post", name, ncluster); ajSysFileUnlink(temp); ajStrDel(&exec); ajStrDel(&line); ajStrDel(&temp); ajRegFree(&rexp); return; }
int main(int argc, char **argv) { AjPFile infdat = NULL; AjPFile infdoc = NULL; AjPFile outf = NULL; AjPFile outs = NULL; AjBool haspattern; const char *p; AjPStr line = NULL; AjPStr text = NULL; AjPStr dirname = NULL; AjPStr filename = NULL; AjPStr id = NULL; AjPStr ac = NULL; AjPStr de = NULL; AjPStr pa = NULL; AjPStr ps = NULL; AjPStr fn = NULL; AjPStr re = NULL; AjPStr fname = NULL; AjBool flag; AjBool isopen; AjBool goback; ajlong storepos = 0L; embInit("prosextract", argc, argv); dirname = ajAcdGetDirectoryName("prositedir"); line = ajStrNew(); text = ajStrNew(); id = ajStrNew(); ac = ajStrNew(); de = ajStrNew(); pa = ajStrNew(); ps = ajStrNew(); fn=ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.dat"); if(!(infdat=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fn=ajStrNewC("PROSITE/prosite.lines"); outf = ajDatafileNewOutNameS(fn); ajStrDel(&fn); haspattern = ajFalse; while(ajReadlineTrim(infdat, &line) ) { if(ajStrPrefixC(line, "ID")) { if(ajStrSuffixC(line,"PATTERN.")) { haspattern = ajTrue; /*save id*/ p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t;"); p = ajSysFuncStrtok(NULL," \t;"); ajStrAssignC(&id,p); ajFmtPrintF(outf, "%S ", id); continue; } else { haspattern = ajFalse; continue; } } if(!haspattern) continue; if(ajStrPrefixC(line, "AC") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t;"); p = ajSysFuncStrtok(NULL, " \t;"); ajStrAssignC(&ac,p); ajFmtPrintF(outf, "%S\n ", ac); continue; } if(ajStrPrefixC(line, "DE") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAssignC(&de,p); ajFmtPrintF(outf, "%S\n ", de); continue; } if(ajStrPrefixC(line, "PA")) { ajStrAssignC(&pa,""); while(ajStrPrefixC(line,"PA")) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAppendC(&pa,p); ajReadlineTrim(infdat, &line); } ajFmtPrintF(outf, "%S\n", pa); re = embPatPrositeToRegExp(pa); ajFmtPrintF(outf, "^%S\n\n", re); ajStrDel(&re); continue; } } /* Finished processing prosite.dat so look at prosite.doc */ fn = ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.doc"); if(!(infdoc=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fname = ajStrNewC("PROSITE/"); flag = ajFalse; isopen = ajFalse; goback = ajFalse; while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text, "{PS") && isopen && !goback) goback = ajTrue; if(ajStrPrefixC(text, "{PS") && !isopen) { storepos = ajFileResetPos(infdoc); /* save out the documentation text to acc numbered outfiles . */ p = ajStrGetPtr(text)+1; p = ajSysFuncStrtok(p, ";"); ajStrAssignS(&filename, fname); ajStrAppendC(&filename, p); outs = ajDatafileNewOutNameS(filename); flag = ajTrue; isopen = ajTrue; continue; } if(ajStrPrefixC(text, "{BEGIN}") && flag) { while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text,"{END}")) break; ajFmtPrintF(outs, "%S\n", text); } ajFileClose(&outs); isopen = ajFalse; if(goback) { goback = ajFalse; ajFileSeek(infdoc,storepos,0); } } } ajStrDel(&line); ajStrDel(&text); ajStrDel(&dirname); ajStrDel(&filename); ajStrDel(&id); ajStrDel(&ac); ajStrDel(&de); ajStrDel(&pa); ajStrDel(&re); ajStrDel(&ps); ajStrDel(&fname); ajFileClose(&infdat); ajFileClose(&infdoc); ajFileClose(&outf); embExit(); return 0; }
int main(int argc, char **argv) { ajint famn = 0; /* Counter for the families. */ ajint nset = 0; /* No. entries in family. */ ajint last_nodeid = 0; /* SCOP Sunid of last family that was processed. */ AjPStr last_node = NULL; /* Last family that was processed. */ AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPStr out = NULL; /* Name of stamp alignment file. */ AjPStr align = NULL; /* Name of sequence alignment file. */ AjPStr alignc = NULL; /* Name of structure alignment file. */ AjPStr log = NULL; /* Name of STAMP log file. */ AjPStr dom = NULL; /* Name of file containing single domain. */ AjPStr set = NULL; /* Name of file containing set of domains. */ AjPStr scan = NULL; /* Name of temp. file used by STAMP. */ AjPStr sort = NULL; /* Name of temp. file used by STAMP. */ AjPStr name = NULL; /* Base name of STAMP temp files. */ AjPStr pdbnames = NULL; /* Names of domain pdb files to be passed to TCOFFEEE. */ AjPDir pdb = NULL; /* Path of domain coordinate files (pdb format input). */ AjPDirout daf = NULL; /* Path of sequence alignment files for output. */ AjPDirout super = NULL; /* Path of structure alignment files for output. */ AjPDirout singlets = NULL; /* Path of FASTA singlet sequence files for output. */ AjPStr temp1 = NULL; /* A temporary string. */ AjPFile dcfin = NULL; /* File pointer for original Escop.dat file.*/ AjPFile domf = NULL; /* File pointer for single domain file. */ AjPFile setf = NULL; /* File pointer for domain set file. */ AjPFile logf = NULL; /* Log file. */ AjPDomain domain = NULL; /* Pointer to domain structure. */ AjPDomain prevdomain = NULL; /* Pointer to previous domain structure. */ ajint type = 0; /* Type of domain (ajSCOP or ajCATH) in the DCF file. */ AjPStr *node = NULL; /* Node of alignment . */ ajint noden = 0; /*1: Class (SCOP), 2: Fold (SCOP) etc, see ACD file. */ AjPStr *mode = NULL; /* Mode of operation from acd*/ ajint moden = 0; /* Program mode, 1: MODE_STAMP, 2: MODE_TCOFFEE (not yet implemented). */ AjBool keepsinglets= ajFalse; /*Whether to retain sequences of singlet families and write them to an output file. */ AjPStr temp = NULL; /* A temporary string. */ AjPStr cmd = NULL; /* The command line to execute t-coffee. */ /* Initialise strings etc*/ last_node = ajStrNew(); exec = ajStrNew(); out = ajStrNew(); align = ajStrNew(); alignc = ajStrNew(); log = ajStrNew(); dom = ajStrNew(); set = ajStrNew(); scan = ajStrNew(); sort = ajStrNew(); name = ajStrNew(); temp = ajStrNew(); temp1 = ajStrNew(); cmd = ajStrNew(); pdbnames = ajStrNew(); /* Read data from acd. */ embInitPV("domainalign",argc,argv,"DOMALIGN",VERSION); dcfin = ajAcdGetInfile("dcfinfile"); pdb = ajAcdGetDirectory("pdbdir"); daf = ajAcdGetOutdir("dafoutdir"); super = ajAcdGetOutdir("superoutdir"); singlets = ajAcdGetOutdir("singletsoutdir"); node = ajAcdGetList("node"); mode = ajAcdGetList("mode"); keepsinglets = ajAcdGetToggle("keepsinglets"); logf = ajAcdGetOutfile("logfile"); /* Convert the selected node and mode to an integer. */ if(!(ajStrToInt(node[0], &noden))) ajFatal("Could not parse ACD node option"); if(!(ajStrToInt(mode[0], &moden))) ajFatal("Could not parse ACD node option"); /* Initialise random number generator for naming of temp. files. */ ajRandomSeed(); ajFilenameSetTempname(&name); /* Create names for temp. files. */ ajStrAssignS(&log, name); ajStrAppendC(&log, ".log"); ajStrAssignS(&dom, name); ajStrAppendC(&dom, ".dom"); ajStrAssignS(&set, name); ajStrAppendC(&set, ".set"); ajStrAssignS(&scan, name); ajStrAppendC(&scan, ".scan"); ajStrAssignS(&sort, name); ajStrAppendC(&sort, ".sort"); ajStrAssignS(&out, name); ajStrAppendC(&out, ".out"); /* Initialise last_node with something that is not in SCOP. */ ajStrAssignC(&last_node,"!!!!!"); /* Open STAMP domain set file. */ if(moden == MODE_STAMP) { if(!(setf=ajFileNewOutNameS(set))) ajFatal("Could not open domain set file\n"); } /* Get domain type. */ type = ajDomainDCFType(dcfin); /* Start of main application loop. */ while((domain=(ajDomainReadCNew(dcfin, "*", type)))) { /* A new family. */ if(((domain->Type == ajSCOP) && (((noden==1) && (last_nodeid != domain->Scop->Sunid_Class)) || ((noden==2) && (last_nodeid != domain->Scop->Sunid_Fold)) || ((noden==3) && (last_nodeid != domain->Scop->Sunid_Superfamily))|| ((noden==4) && (last_nodeid != domain->Scop->Sunid_Family)))) || ((domain->Type == ajCATH) && (((noden==5) && (last_nodeid != domain->Cath->Class_Id)) || ((noden==6) && (last_nodeid != domain->Cath->Arch_Id)) || ((noden==7) && (last_nodeid != domain->Cath->Topology_Id)) || ((noden==8) && (last_nodeid != domain->Cath->Superfamily_Id)) || ((noden==9) && (last_nodeid != domain->Cath->Family_Id))))) { /* If we have done the first family. */ if(famn) { /* Create the output file for the alignment - the name will be the same as the Sunid for the DOMAIN family. */ domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc); if(moden == MODE_STAMP) { /* Close domain set file. */ ajFileClose(&setf); /* Call STAMP. */ /* Family with 2 or more entries. */ if(nset > 1) { domainalign_stamp(prevdomain, domain, daf, super, singlets, align, alignc, dom, name, set, scan, sort, log, out, keepsinglets, moden, noden, nset, logf); } else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); /* Open STAMP domain set file. */ if(!(setf=ajFileNewOutNameS(set))) ajFatal("Could not open domain set file\n"); } else { /* Call TCOFEE. */ if(nset > 1) domainalign_tcoffee(prevdomain, out, align, alignc, pdbnames, noden, logf); else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } /* Set the number of members of the new family to zero. */ nset = 0; /* Clear TCOFFEE argument. */ ajStrSetClear(&pdbnames); } /* Open, write and close STAMP domain file. */ if(moden == MODE_STAMP) { if(!(domf=ajFileNewOutNameS(dom))) ajFatal("Could not open domain file\n"); ajStrAssignS(&temp, ajDomainGetId(domain)); ajStrFmtLower(&temp); ajFmtPrintF(domf, "%S %S { ALL }\n", temp, temp); ajFileClose(&domf); } /* Copy current family name to last_node. */ domainalign_writelast(domain, noden, &last_node, &last_nodeid); /* Copy current domain pointer to prevdomain. */ ajDomainDel(&prevdomain); prevdomain=NULL; ajDomainCopy(&prevdomain, domain); /* Increment family counter. */ famn++; } ajStrAssignS(&temp, ajDomainGetId(domain)); ajStrFmtLower(&temp); /* Write STAMP domain set file. */ if(moden == MODE_STAMP) ajFmtPrintF(setf, "%S %S { ALL }\n", temp, temp); /* Write TCOFFEE argument. */ else { ajStrAppendS(&pdbnames, ajDirGetPath(pdb)); ajStrAppendS(&pdbnames, temp); ajStrAppendC(&pdbnames, "."); ajStrAppendS(&pdbnames, ajDirGetExt(pdb)); ajStrAppendC(&pdbnames, " "); } ajDomainDel(&domain); /* Increment number of members in family. */ nset++; } /* End of main application loop. */ domain=prevdomain; ajFmtPrint("\nProcessing node %d\n", last_nodeid); /* Create the output file for the alignment - the name will be the same as the Sunid for the DOMAIN family. */ domainalign_writeid(prevdomain, noden, daf, super, &align, &alignc); /* Code to process last family. */ if(moden == MODE_STAMP) { /*Close domain set file. */ ajFileClose(&setf); /* ajFmtPrint("\n***** SECOND CALL\n");. */ if(nset > 1) { domainalign_stamp(prevdomain, domain, daf, super, singlets, align, alignc, dom, name, set, scan, sort, log, out, keepsinglets, moden, noden, nset, logf); } else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } else { /* Call TCOFEE. */ if(nset > 1) domainalign_tcoffee(prevdomain, out, align, alignc, pdbnames, noden, logf); else if(keepsinglets) /* Singlet family. */ domainalign_keepsinglets(prevdomain, noden, singlets, logf); } /* Remove all temporary files. */ ajSysFileUnlink(log); ajSysFileUnlink(dom); ajSysFileUnlink(set); ajSysFileUnlink(scan); ajSysFileUnlink(sort); ajSysFileUnlink(out); ajStrAssignS(&temp, name); ajStrAppendC(&temp, ".mat"); ajSysFileUnlink(temp); /* Tidy up*/ ajDomainDel(&domain); ajFileClose(&dcfin); ajStrDel(&last_node); ajStrDel(&exec); ajStrDel(&log); ajStrDel(&dom); ajStrDel(&set); ajStrDel(&scan); ajStrDel(&sort); ajStrDel(&name); ajStrDel(&out); ajStrDel(&align); ajStrDel(&alignc); ajStrDel(&pdbnames); ajDirDel(&pdb); ajDiroutDel(&daf); ajDiroutDel(&super); ajDiroutDel(&singlets); ajStrDel(&temp); ajStrDel(&temp1); ajStrDel(&node[0]); AJFREE(node); ajStrDel(&mode[0]); AJFREE(mode); ajFileClose(&logf); ajExit(); return 0; }
static AjBool assemblyexceptionadaptorFetchAllBySQL( EnsPAssemblyexceptionadaptor aea, const AjPStr statement, AjPList aes) { ajint ori = 0; ajuint identifier = 0; ajuint erid = 0; ajuint srid = 0; ajuint erstart = 0; ajuint srstart = 0; ajuint erend = 0; ajuint srend = 0; AjPSqlstatement sqls = NULL; AjISqlrow sqli = NULL; AjPSqlrow sqlr = NULL; AjPStr typestr = NULL; EnsEAssemblyexceptionType type = ensEAssemblyexceptionTypeNULL; EnsPAssemblyexception ae = NULL; if(!aea) return ajFalse; if(!statement) return ajFalse; if(!aes) return ajFalse; sqls = ensDatabaseadaptorSqlstatementNew(aea->Adaptor, statement); sqli = ajSqlrowiterNew(sqls); while(!ajSqlrowiterDone(sqli)) { identifier = 0; srid = 0; srstart = 0; srend = 0; typestr = ajStrNew(); erid = 0; erstart = 0; erend = 0; ori = 0; type = ensEAssemblyexceptionTypeNULL; sqlr = ajSqlrowiterGet(sqli); ajSqlcolumnToUint(sqlr, &identifier); ajSqlcolumnToUint(sqlr, &srid); ajSqlcolumnToUint(sqlr, &srstart); ajSqlcolumnToUint(sqlr, &srend); ajSqlcolumnToStr(sqlr, &typestr); ajSqlcolumnToUint(sqlr, &erid); ajSqlcolumnToUint(sqlr, &erstart); ajSqlcolumnToUint(sqlr, &erend); ajSqlcolumnToInt(sqlr, &ori); /* Set the Assembly Exception type. */ type = ensAssemblyexceptionTypeFromStr(typestr); if(!type) ajFatal("assemblyexceptionadaptorFetchAllBySQL " "got unexpected Assembly Exception type '%S' " "from database.\n", typestr); ae = ensAssemblyexceptionNew(aea, identifier, srid, srstart, srend, erid, erstart, erend, ori, type); ajListPushAppend(aes, (void *) ae); ajStrDel(&typestr); } ajSqlrowiterDel(&sqli); ensDatabaseadaptorSqlstatementDel(aea->Adaptor, &sqls); return ajTrue; }
static ajint silent_restr_read(AjPList *relist,const AjPStr enzymes) { EmbPPatRestrict rptr = NULL; AjPFile fin = NULL; AjPStr refilename = NULL; register ajint RStotal = 0; PRinfo rinfo = NULL; AjBool isall = ajFalse; ajint ne = 0; ajint i; AjPStr *ea = NULL; refilename = ajStrNewC("REBASE/embossre.enz"); rptr = embPatRestrictNew(); *relist = ajListNew(); fin = ajDatafileNewInNameS(refilename); if(!fin) ajFatal("Aborting...restriction file '%S' not found", refilename); /* Parse the user-selected enzyme list */ if(!enzymes) isall = ajTrue; else { ne = ajArrCommaList(enzymes,&ea); for(i=0;i<ne;++i) ajStrRemoveWhite(&ea[i]); if(ajStrMatchCaseC(ea[0],"all")) isall = ajTrue; else isall = ajFalse; } while(!ajFileIsEof(fin)) { if(!embPatRestrictReadEntry(rptr,fin)) continue; if(!isall) { for(i=0;i<ne;++i) if(ajStrMatchCaseS(ea[i],rptr->cod)) break; if(i==ne) continue; } AJNEW(rinfo); /* reading in RE info into rinfo from EmbPPatRestrict structure */ rinfo->code = ajStrNewS(rptr->cod); rinfo->site = ajStrNewS(rptr->pat); rinfo->revsite = ajStrNewS(rptr->pat); ajSeqstrReverse(&rinfo->revsite); rinfo->ncuts = rptr->ncuts; rinfo->cut1 = rptr->cut1; rinfo->cut2 = rptr->cut2; rinfo->cut3 = rptr->cut3; rinfo->cut4 = rptr->cut4; ajListPush(*relist,(void *)rinfo); RStotal++; } for(i=0;i<ne;++i) ajStrDel(&ea[i]); AJFREE(ea); embPatRestrictDel(&rptr); ajFileClose(&fin); ajStrDel(&refilename); return RStotal; }
int main(int argc, char **argv) { const char *codons[]= { "TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */ "TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */ "GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */ "AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */ "ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */ "CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */ "TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */ "ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC" /* 56-63 */ }; const char *aa= "***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY"; AjPFile inf = NULL; AjPFile outf = NULL; char *entryname = NULL; AjPStr fname = NULL; AjPStr key = NULL; AjPStr tmpkey = NULL; AjBool allrecords = AJFALSE; AjPTable table = NULL; ajint i = 0; ajint j = 0; ajint k = 0; ajint x = 0; ajint savecount[3]; AjPStr *keyarray = NULL; CutgPValues *valarray = NULL; AjPCod codon = NULL; ajint sum = 0; char c; AjPList flist = NULL; AjPFile logf = NULL; AjPStr entry = NULL; AjPStr baseentry = NULL; AjPStr wild = NULL; AjPStr division = NULL; AjPStr release = NULL; AjPStr wildspecies = NULL; CutgPValues value = NULL; AjPStr docstr = NULL; AjPStr species = NULL; AjPStr filename = NULL; ajint nstops; embInit("cutgextract",argc,argv); tmpkey = ajStrNew(); fname = ajStrNew(); table = ajTablestrNewLen(TABLE_ESTIMATE); flist = ajAcdGetDirlist("directory"); wild = ajAcdGetString("wildspec"); release = ajAcdGetString("release"); logf = ajAcdGetOutfile("outfile"); wildspecies = ajAcdGetString("species"); filename = ajAcdGetString("filename"); allrecords = ajAcdGetBoolean("allrecords"); ajStrInsertC(&release, 0, "CUTG"); ajStrRemoveWhite(&release); while(ajListPop(flist,(void **)&entry)) { ajStrAssignS(&baseentry, entry); ajFilenameTrimPath(&baseentry); ajDebug("Testing file '%S'\n", entry); if(!ajStrMatchWildS(baseentry,wild)) { ajStrDel(&entry); continue; } ajDebug("... matched wildcard '%S'\n", wild); inf = ajFileNewInNameS(entry); if(!inf) ajFatal("cannot open file %S",entry); ajFmtPrintS(&division, "%F", inf); ajFilenameTrimAll(&division); while((entryname = cutgextract_next(inf, wildspecies, &species, &docstr))) { if(ajStrGetLen(filename)) ajStrAssignS(&tmpkey,filename); else ajStrAssignC(&tmpkey,entryname); /* See if organism is already in the table */ value = ajTableFetch(table,tmpkey); if(!value) /* Initialise */ { key = ajStrNewS(tmpkey); AJNEW0(value); ajStrAssignS(&value->Species,species); ajStrAssignS(&value->Division, division); ajTablePut(table,(void *)key,(void *)value); } for(k=0;k<3;k++) savecount[k] = value->Count[k]; nstops = cutgextract_readcodons(inf,allrecords, value->Count); if(nstops < 1) { value->Skip++; continue; } value->CdsCount++; if(nstops>1) { value->CdsCount += (nstops - 1); value->Warn++; ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'", nstops, value->Count[0] - savecount[0], value->Count[1] - savecount[1], value->Count[2] - savecount[2], cutgextractSavepid); } } ajStrDel(&entry); ajFileClose(&inf); } ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray); i = 0; while(keyarray[i]) { key = keyarray[i]; value = (CutgPValues) valarray[i++]; codon = ajCodNew(); sum = 0; for(j=0;j<CODONS;++j) { sum += value->Count[j]; x = ajCodIndexC(codons[j]); codon->num[x] = value->Count[j]; c = aa[j]; if(c=='*') codon->aa[x] = 27; else codon->aa[x] = c-'A'; } ajCodCalcUsage(codon,sum); ajStrAppendC(&key, ".cut"); if(allrecords) { if(value->Warn) ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n", key, value->CdsCount, value->Warn); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } else { if(value->Skip) ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n", key, value->CdsCount, value->Skip); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } ajFmtPrintS(&fname,"CODONS/%S",key); outf = ajDatafileNewOutNameS(fname); if(!outf) ajFatal("Cannot open output file %S",fname); ajCodSetNameS(codon, key); ajCodSetSpeciesS(codon, value->Species); ajCodSetDivisionS(codon, value->Division); ajCodSetReleaseS(codon, release); ajCodSetNumcds(codon, value->CdsCount); ajCodSetNumcodons(codon, sum); ajCodWrite(codon, outf); ajFileClose(&outf); ajStrDel(&key); ajStrDel(&value->Division); ajStrDel(&value->Doc); ajStrDel(&value->Species); AJFREE(value); ajCodDel(&codon); } AJFREE(keyarray); AJFREE(valarray); ajTableFree(&table); ajListFree(&flist); ajStrDel(&wild); ajStrDel(&release); ajStrDel(&wildspecies); ajStrDel(&filename); ajFileClose(&logf); ajStrDel(&cutgextractSavepid); ajStrDel(&cutgextractLine); ajStrDel(&cutgextractOrg); ajStrDel(&fname); ajStrDel(&tmpkey); ajStrDel(&species); ajStrDel(&docstr); ajStrDel(&division); ajStrDel(&baseentry); embExit(); return 0; }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ HMMFILE *hmmfp; /* opened hmmfile for reading */ const char *seqfile; /* file to read target sequence from */ char **rseq; /* raw, unaligned sequences */ SQINFO *sqinfo; /* info associated with sequences */ char **dsq; /* digitized raw sequences */ int nseq; /* number of sequences */ char **aseq; /* aligned sequences */ AINFO ainfo; /* alignment information */ float *wgt; /* per-sequence weights */ int i; struct plan7_s *hmm; /* HMM to align to */ struct p7trace_s **tr; /* traces for aligned sequences */ int be_quiet; /* TRUE to suppress verbose banner */ int matchonly; /* TRUE to show only match state syms */ const char *outfile; /* optional alignment output file */ FILE *ofp; /* handle on alignment output file */ AjPFile ajwithali; /* name of additional alignment file to align */ AjPFile ajmapali; /* name of additional alignment file to map */ AjBool ajmatch=ajFalse; AjPFile outf=NULL; AjPStr outfname=NULL; AjPFile inf=NULL; AjPStr infname=NULL; AjPSeqset seqset=NULL; AjPStr ajseqfile=NULL; char* mapali=NULL; char* withali=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ matchonly = FALSE; outfile = NULL; be_quiet = FALSE; withali = NULL; mapali = NULL; embInitPV("ohmmalign",argc,argv,"HMMER",VERSION); ajmatch = ajAcdGetBoolean("matchonly"); if(ajmatch) matchonly=TRUE; else matchonly=FALSE; ajmapali = ajAcdGetInfile("mapalifile"); if (ajmapali) mapali = ajCharNewS(ajFileGetNameS(ajmapali)); ajFileClose(&ajmapali); ajwithali = ajAcdGetInfile("withalifile"); if (ajwithali) withali = ajCharNewS(ajFileGetNameS(ajwithali)); ajFileClose(&ajwithali); be_quiet=TRUE; outf = ajAcdGetOutfile("outfile"); outfname = ajStrNewC((char *)ajFileGetNameC(outf)); if(*ajStrGetPtr(outfname)>31) ajFileClose(&outf); outfile = ajStrGetPtr(outfname); inf = ajAcdGetInfile("hmmfile"); infname = ajStrNewC((char *)ajFileGetNameC(inf)); ajFileClose(&inf); hmmfile = ajStrGetPtr(infname); seqset = ajAcdGetSeqset("sequences"); ajseqfile = ajStrNewC(ajStrGetPtr(seqset->Filename)); seqfile = ajStrGetPtr(ajseqfile); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. * * Currently hmmalign disallows the J state and * only allows one domain per sequence. To preserve * the S/W entry information, the J state is explicitly * disallowed, rather than calling a Plan7*Config() function. * this is a workaround in 2.1 for the 2.0.x "yo!" bug. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; P7Logoddsify(hmm, TRUE); /* do we have the map we might need? */ if (mapali != NULL && ! (hmm->flags & PLAN7_MAP)) ajFatal("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile); /*********************************************** * Open sequence file in current directory. * Read all seqs from it. ***********************************************/ /* if (! SeqfileFormat(seqfile, &format, NULL)) switch (squid_errno) { case SQERR_NOFILE: ajFatal("Sequence file %s could not be opened for reading", seqfile); case SQERR_FORMAT: default: ajFatal("Failed to determine format of sequence file %s", seqfile); } if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq)) ajFatal("Failed to read any sequences from file %s", seqfile); */ emboss_rseqs(seqset,&rseq,&sqinfo,&nseq); /*********************************************** * Show the banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { /* Banner(stdout, banner); */ printf( "HMM file: %s\n", hmmfile); printf( "Sequence file: %s\n", seqfile); printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); } /*********************************************** * Do the work ***********************************************/ /* Allocations and initializations. */ dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); /* Align each sequence to the model, collect traces */ for (i = 0; i < nseq; i++) { dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len); if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT) (void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); else (void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); } /* Include an aligned alignment, if desired. */ if (mapali != NULL) include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq); if (withali != NULL) include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq); /* Turn traces into a multiple alignment */ wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly, &aseq, &ainfo); /*********************************************** * Output the alignment ***********************************************/ if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL) { WriteSELEX(ofp, aseq, &ainfo, 50); printf("Alignment saved in file %s\n", outfile); fclose(ofp); } else WriteSELEX(stdout, aseq, &ainfo, 50); /*********************************************** * Cleanup and exit ***********************************************/ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); FreeSequence(rseq[i], &(sqinfo[i])); free(dsq[i]); } FreeAlignment(aseq, &ainfo); FreePlan7(hmm); free(sqinfo); free(rseq); free(dsq); free(wgt); free(tr); SqdClean(); ajStrDel(&outfname); ajStrDel(&infname); ajStrDel(&ajseqfile); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajSeqsetDel(&seqset); ajFileClose(&ajwithali); ajFileClose(&ajmapali); embExit(); return 0; }
int main(int argc, char **argv) { /* ACD data item variables */ AjPSeqset alignfile = NULL; AjPFile prior = NULL; AjPFile null = NULL; AjPFile pam = NULL; float pamwgt = 0.0; AjPStr nhmm = NULL; AjPStr strategy = NULL; ajint pbswitch = 0; float archpri = 0.0; AjBool binary = ajFalse; AjBool fast = ajFalse; float gapmax = 0.0; AjBool hand = ajFalse; float idlevel = 0.0; AjBool noeff = ajFalse; float swentry = 0.0; float swexit = 0.0; AjBool verbosity = ajFalse; AjPStr weighting = NULL; AjPFile hmmfile = NULL; AjPFile o = NULL; AjPFile cfile = NULL; /* Housekeeping variables */ AjPStr cmd = NULL; AjPStr rnd1 = NULL; AjPStr rnd2 = NULL; AjPStr fmt = NULL; char option; AjBool fmtok = ajFalse; AjPStr hmmfilename = NULL; /* ACD file processing */ embInitPV("ehmmbuild",argc,argv,"HMMERNEW",VERSION); alignfile = ajAcdGetSeqset("alignfile"); prior = ajAcdGetInfile("prior"); null = ajAcdGetInfile("null"); pam = ajAcdGetInfile("pam"); pamwgt = ajAcdGetFloat("pamwgt"); nhmm = ajAcdGetString("nhmm"); strategy = ajAcdGetListSingle("strategy"); pbswitch = ajAcdGetInt("pbswitch"); archpri = ajAcdGetFloat("archpri"); binary = ajAcdGetBoolean("binary"); fast = ajAcdGetBoolean("fast"); gapmax = ajAcdGetFloat("gapmax"); hand = ajAcdGetBoolean("hand"); idlevel = ajAcdGetFloat("sidlevel"); noeff = ajAcdGetBoolean("noeff"); swentry = ajAcdGetFloat("swentry"); swexit = ajAcdGetFloat("swexit"); verbosity = ajAcdGetBoolean("verbosity"); weighting = ajAcdGetListSingle("weighting"); hmmfile = ajAcdGetOutfile("hmmfile"); o = ajAcdGetOutfile("o"); cfile = ajAcdGetOutfile("cfile"); /* MAIN APPLICATION CODE */ /* 1. Housekeeping */ cmd = ajStrNew(); rnd1 = ajStrNew(); rnd2 = ajStrNew(); fmt = ajStrNew(); hmmfilename = ajStrNew(); ajStrAssignC(&hmmfilename, ajFileGetNameC(hmmfile)); /* 2. Ensure alignfile is in format HMMER can understand. These include FASTA, GENBANK,EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL and PHYLIP. EMBOSS name definitions are taken from seqInFormatDef in ajseqread.c and seqOutFormat in ajseqwrite.c */ fmtok=ajFalse; ajStrAssignS(&fmt, ajSeqsetGetFormat(alignfile)); if(ajStrMatchC(fmt, "fasta") || ajStrMatchC(fmt, "genbank") || ajStrMatchC(fmt, "embl") || ajStrMatchC(fmt, "gcg") || ajStrMatchC(fmt, "pir") || ajStrMatchC(fmt, "stockholm")|| ajStrMatchC(fmt, "selex") || ajStrMatchC(fmt, "msf") || ajStrMatchC(fmt, "clustal") || ajStrMatchC(fmt, "phylip")) fmtok = ajTrue; /* This could be replaced with code to reformat the file. */ if(!fmtok) ajFatal("Input alignment ('alignfile' ACD option) is not in format " "HMMER understands. Please use a a file in FASTA, GENBANK, " "EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL or PHYLIP format."); /* 3. Build hmmbuild command line */ /* Command line is built in this order: i. Application name. ii. HMMER 'options' (in order they appear in ACD file) iii.HMMER 'options' (that don't appear in ACD file) iv. HMMER & new parameters. */ ajStrAssignS(&cmd, ajAcdGetpathC("hmmbuild")); if(prior) ajFmtPrintAppS(&cmd, " --prior %s ", ajFileGetNameC(prior)); if(null) ajFmtPrintS(&cmd, " --null %s ", ajFileGetNameC(null)); if(pam) ajFmtPrintAppS(&cmd, " --pam %s --pamwgt %f ", ajFileGetNameC(pam), pamwgt); ajFmtPrintAppS(&cmd, " -n %S ", nhmm); /* ACD option only allows one selection */ option = ajStrGetCharFirst(strategy); if(option == 'F') ajStrAppendC(&cmd, " -f "); else if(option == 'G') ajStrAppendC(&cmd, " -g "); else if(option == 'S') ajStrAppendC(&cmd, " -s "); /* else go with default ('D' option in ACD file) */ ajFmtPrintAppS(&cmd, " --pbswitch %d ", pbswitch); ajFmtPrintAppS(&cmd, " --archpri %f ", archpri); if(binary) ajStrAppendC(&cmd, " --binary "); if(fast) ajFmtPrintAppS(&cmd, " --fast --gapmax %f ", gapmax); if(hand) ajStrAppendC(&cmd, " --hand "); ajFmtPrintAppS(&cmd, " --idlevel %f ", idlevel); if(noeff) ajStrAppendC(&cmd, " --noeff "); ajFmtPrintAppS(&cmd, " --swentry %f ", swentry); ajFmtPrintAppS(&cmd, " --swexit %f ", swexit); if(verbosity) ajStrAppendC(&cmd, " --verbose "); /* ACD option only allows one selection */ option = ajStrGetCharFirst(weighting); if(option == 'B') ajStrAppendC(&cmd, " --wblosum "); else if(option == 'G') ajStrAppendC(&cmd, " --wgsc "); else if(option == 'K') ajStrAppendC(&cmd, " --wme "); else if(option == 'W') ajStrAppendC(&cmd, " --wpb "); else if(option == 'V') ajStrAppendC(&cmd, " --wvoronoi "); else if(option == 'N') ajStrAppendC(&cmd, " --wnone "); if(o) ajFmtPrintAppS(&cmd, " -o %s ", ajFileGetNameC(o)); if(cfile) ajFmtPrintAppS(&cmd, " --cfile %s ", ajFileGetNameC(cfile)); /* -A (append) always set but file will be wiped by EMBOSS first unless append: "Y" is set for "hmmfile" in the ACD file. */ ajStrAppendC(&cmd, " -A -F "); ajFmtPrintAppS(&cmd, " %S %S", hmmfilename, ajSeqsetGetFilename(alignfile)); /* 4. Close ACD files */ ajSeqsetDel(&alignfile); ajFileClose(&prior); ajFileClose(&null); ajFileClose(&pam); ajFileClose(&hmmfile); ajFileClose(&o); ajFileClose(&cfile); /* 5. Call hmmbuild */ ajFmtPrint("\n%S\n", cmd); ajSysExecS(cmd); /* 6. Exit cleanly */ ajStrDel(&nhmm); ajStrDel(&cmd); ajStrDel(&rnd1); ajStrDel(&rnd2); ajStrDel(&fmt); ajStrDel(&hmmfilename); ajStrDel(&strategy); ajStrDel(&weighting); embExit(); return 0; }