Exemplo n.º 1
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPSeqset alignfile = NULL; 
    AjPFile       prior = NULL;
    AjPFile        null = NULL;
    AjPFile         pam = NULL;
    float        pamwgt = 0.0;
    AjPStr         nhmm = NULL;
    AjPStr     strategy = NULL;
    ajint      pbswitch = 0;
    float       archpri = 0.0;
    AjBool      binary  = ajFalse;
    AjBool         fast = ajFalse;
    float        gapmax = 0.0;
    AjBool         hand = ajFalse;
    float       idlevel = 0.0;
    AjBool        noeff = ajFalse;
    float       swentry = 0.0;
    float        swexit = 0.0;
    AjBool    verbosity = ajFalse;
    AjPStr    weighting = NULL;
    AjPFile     hmmfile = NULL;
    AjPFile           o = NULL;
    AjPFile       cfile = NULL;

    /* Housekeeping variables */
    AjPStr          cmd = NULL;
    AjPStr         rnd1 = NULL;
    AjPStr         rnd2 = NULL;
    AjPStr          tmp = NULL;
    AjPStr          fmt = NULL;
    char         option;
    AjBool        fmtok = ajFalse;
    AjPStr  hmmfilename = NULL;
    




    /* ACD file processing */
    embInitPV("ehmmbuild",argc,argv,"HMMERNEW",VERSION);

    alignfile = ajAcdGetSeqset("alignfile");
    prior     = ajAcdGetInfile("prior");
    null      = ajAcdGetInfile("null");
    pam       = ajAcdGetInfile("pam");
    pamwgt    = ajAcdGetFloat("pamwgt");
    nhmm      = ajAcdGetString("nhmm");
    strategy  = ajAcdGetListSingle("strategy");
    pbswitch  = ajAcdGetInt("pbswitch");
    archpri   = ajAcdGetFloat("archpri");
    binary    = ajAcdGetBoolean("binary");
    fast      = ajAcdGetBoolean("fast");
    gapmax    = ajAcdGetFloat("gapmax");
    hand      = ajAcdGetBoolean("hand");
    idlevel   = ajAcdGetFloat("sidlevel");
    noeff     = ajAcdGetBoolean("noeff");
    swentry   = ajAcdGetFloat("swentry");
    swexit    = ajAcdGetFloat("swexit");
    verbosity = ajAcdGetBoolean("verbosity");
    weighting = ajAcdGetListSingle("weighting");
    hmmfile   = ajAcdGetOutfile("hmmfile");
    o         = ajAcdGetOutfile("o");
    cfile     = ajAcdGetOutfile("cfile");





    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd  = ajStrNew();
    rnd1 = ajStrNew();
    rnd2 = ajStrNew();
    tmp  = ajStrNew();
    fmt  = ajStrNew();
    hmmfilename = ajStrNew();

    ajStrAssignC(&hmmfilename, ajFileGetNameC(hmmfile));


    /* 2. Ensure alignfile is in format HMMER can understand.  These include
       FASTA, GENBANK,EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL and PHYLIP.
       EMBOSS name definitions are taken from seqInFormatDef in ajseqread.c and
       seqOutFormat in ajseqwrite.c */
    fmtok=ajFalse;
    ajStrAssignS(&fmt, ajSeqsetGetFormat(alignfile));
    if(ajStrMatchC(fmt, "fasta")    ||
       ajStrMatchC(fmt, "genbank")  ||
       ajStrMatchC(fmt, "embl")     ||
       ajStrMatchC(fmt, "gcg")      ||
       ajStrMatchC(fmt, "pir")      ||
       ajStrMatchC(fmt, "stockholm")||
       ajStrMatchC(fmt, "selex")    ||
       ajStrMatchC(fmt, "msf")      ||
       ajStrMatchC(fmt, "clustal")  ||
       ajStrMatchC(fmt, "phylip"))
	fmtok = ajTrue;
    /* This could be replaced with code to reformat the file. */
    if(!fmtok)
	ajFatal("Input alignment ('alignfile' ACD option) is not in format "
		"HMMER understands. Please use a a file in FASTA, GENBANK, "
		"EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL or PHYLIP format.");
    

    /* 3. Build hmmbuild command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. HMMER 'options' (in order they appear in ACD file)
       iii.HMMER 'options' (that don't appear in ACD file)
       iv. HMMER & new parameters.
       */
    ajStrAssignS(&cmd, ajAcdGetpathC("hmmbuild"));
    if(prior)
	ajFmtPrintAppS(&cmd, " --prior %s ", ajFileGetNameC(prior));
    if(null)
	ajFmtPrintS(&cmd, " --null %s ", ajFileGetNameC(null));
    if(pam)
	ajFmtPrintAppS(&cmd, " --pam %s  --pamwgt %f ", ajFileGetNameC(pam), pamwgt);
    ajFmtPrintAppS(&cmd, " -n %S ", nhmm);

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(strategy);
    if(option == 'F')
	ajStrAppendC(&cmd, " -f ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " -g ");
    else if(option == 'S')
	ajStrAppendC(&cmd, " -s ");
    /* else go with default ('D' option in ACD file) */
    ajFmtPrintAppS(&cmd, " --pbswitch %d ", pbswitch);
    ajFmtPrintAppS(&cmd, " --archpri %f ", archpri);
    if(binary)
	ajStrAppendC(&cmd, " --binary ");
    if(fast)
	ajFmtPrintAppS(&cmd, " --fast --gapmax %f ", gapmax);
    if(hand)
	ajStrAppendC(&cmd, " --hand ");
    ajFmtPrintAppS(&cmd, " --idlevel %f ", idlevel);
    if(noeff)
	ajStrAppendC(&cmd, " --noeff ");
    ajFmtPrintAppS(&cmd, " --swentry %f ", swentry);
    ajFmtPrintAppS(&cmd, " --swexit %f ", swexit);
    if(verbosity)
	ajStrAppendC(&cmd, " --verbose ");

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(weighting);
    if(option == 'B')
	ajStrAppendC(&cmd, " --wblosum ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " --wgsc ");
    else if(option == 'K')
	ajStrAppendC(&cmd, " --wme ");
    else if(option == 'W')
	ajStrAppendC(&cmd, " --wpb ");
    else if(option == 'V')
	ajStrAppendC(&cmd, " --wvoronoi ");
    else if(option == 'N')
	ajStrAppendC(&cmd, " --wnone ");
    if(o)
	ajFmtPrintAppS(&cmd, " -o %s ", ajFileGetNameC(o));
    if(cfile)
	ajFmtPrintAppS(&cmd, " --cfile %s ", ajFileGetNameC(cfile));
    /* -A (append) always set but file will be wiped by EMBOSS first unless 
       append: "Y" is set for "hmmfile" in the ACD file. */
    ajStrAppendC(&cmd, " -A -F ");
    ajFmtPrintAppS(&cmd, " %S %S", hmmfilename, ajSeqsetGetFilename(alignfile));


    /* 4. Close ACD files */
    ajSeqsetDel(&alignfile);
    ajFileClose(&prior);
    ajFileClose(&null);
    ajFileClose(&pam);
    ajFileClose(&hmmfile);
    ajFileClose(&o);
    ajFileClose(&cfile);


    /* 5. Call hmmbuild */
    ajFmtPrint("\n%S\n", cmd); 
    system(ajStrGetPtr(cmd));    


    /* 6. Exit cleanly */
    ajStrDel(&nhmm);
    ajStrDel(&cmd);
    ajStrDel(&rnd1);
    ajStrDel(&rnd2);
    ajStrDel(&tmp);
    ajStrDel(&fmt);
    ajStrDel(&hmmfilename);
    ajStrDel(&strategy);
    ajStrDel(&weighting);
    
    embExit();

    return 0;
}
Exemplo n.º 2
0
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info)
{
    ajint n;
    ajint maxlen;
    ajint len;
    char **seqs;
    const AjPSeq seq = NULL;
    ajint i=0;
    const AjPStr fmt=NULL;
    const char *p=NULL;
    char  c='\0';
    /*
    char *q=NULL;
    AjPSelexseq   sqdata=NULL;
    AjPSelexdata sdata=NULL;
    */
    ajint cnt=0;
    info->name = NULL;
    info->rf=NULL;
    info->cs=NULL;
    info->desc=NULL;
    info->acc=NULL;
    info->au=NULL;
    info->flags=0;

    AjPStr tmpstr = NULL;

    ajSeqsetFill(seqset);

    fmt = ajSeqsetGetFormat(seqset);
    n = ajSeqsetGetSize(seqset);
    ajSeqsetFmtUpper(seqset);

    maxlen = ajSeqsetGetLen(seqset);


    /* First allocate and copy sequences */
    AJCNEW0(seqs,n);
    for(i=0; i<n; ++i)
    {
        seqs[i] = ajCharNewRes(maxlen+1);
        strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i)));
    }

    info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        strcpy(info->sqinfo[i].name,"");
        strcpy(info->sqinfo[i].id,"");
        strcpy(info->sqinfo[i].acc,"");
        strcpy(info->sqinfo[i].desc,"");
        info->sqinfo[i].len = 0;
        info->sqinfo[i].start = 0;
        info->sqinfo[i].stop = 0;
        info->sqinfo[i].olen = 0;
        info->sqinfo[i].type = 0;
        info->sqinfo[i].ss = NULL;
        info->sqinfo[i].sa =NULL;
    }

    AJCNEW0(info->wgt,n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        info->wgt[i] = ajSeqsetGetseqWeight(seqset,i);
    }
    info->nseq = n;
    info->alen = maxlen;

    for(i=0; i<n; ++i)
    {
        seq = ajSeqsetGetseqSeq(seqset,i);
        if((len=ajStrGetLen(ajSeqGetNameS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len);
            strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ID;
            strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_NAME;
        }
        if((len=ajStrGetLen(ajSeqGetAccS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len);
            strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ACC;
        }
    }
    seq = ajSeqsetGetseqSeq(seqset,0);
    info->cs = ajCharNewS(ajSeqGetSeqS(seq));
    info->name = ajCharNewS(ajSeqGetNameS(seq));
    info->acc = ajCharNewS(ajSeqGetAccS(seq));
    info->desc = ajCharNewS(ajSeqGetDescS(seq));
    info->rf = ajCharNewS(ajSeqGetSeqS(seq));

    /*
        info->rf = ajCharNewS(seq);

    	len = ajStrGetLen(seq->Selexdata->name);
    	info->name = ajCharNewRes(len+1);
    	strcpy(info->name,ajStrGetPtr(seq->Selexdata->name));
    	len = ajStrGetLen(seq->Selexdata->de);
    	info->desc = ajCharNewRes(len+1);

    	sdata = seq->Selexdata;
    	strcpy(info->desc,ajStrGetPtr(sdata->de));
    	len = ajStrGetLen(sdata->ac);
    	info->acc = ajCharNewRes(len+1);
    	strcpy(info->acc,ajStrGetPtr(sdata->ac));
    	len = ajStrGetLen(sdata->au);
    	info->au = ajCharNewRes(len+1);
    	strcpy(info->au,ajStrGetPtr(sdata->au));
    	if(sdata->tc[0] || sdata->tc[1])
    	{
    	    info->flags |= AINFO_TC;
    	    info->tc1 = sdata->tc[0];
    	    info->tc2 = sdata->tc[1];
    	}
    	if(sdata->nc[0] || sdata->nc[1])
    	{
    	    info->flags |= AINFO_NC;
    	    info->nc1 = sdata->nc[0];
    	    info->nc2 = sdata->nc[1];
    	}
    	if(sdata->ga[0] || sdata->ga[1])
    	{
    	    info->flags |= AINFO_GA;
    	    info->ga1 = sdata->ga[0];
    	    info->ga2 = sdata->ga[1];
    	}

    	for(i=0;i<n;++i)
    	{
    	    seq = ajSeqsetGetseqSeq(seqset,i);
    	    sqdata = seq->Selexdata->sq;
    	    if((len=ajStrGetLen(sqdata->name)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name));
    		else
    		    strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63);
    		info->sqinfo[i].name[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_NAME;
    	    }
    / *
    	    if((len=ajStrGetLen(sqdata->id)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id));
    		else
    		    strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63);
    		info->sqinfo[i].id[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ID;
    	    }
    * /

    	    strcpy(info->sqinfo[i].id,info->sqinfo[i].name);
    	    info->sqinfo[i].flags |= SQINFO_ID;
    	    if((len=ajStrGetLen(sqdata->ac)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac));
    		else
    		    strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63);
    		info->sqinfo[i].acc[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ACC;
    	    }
    	    if((len=ajStrGetLen(sqdata->de)))
    	    {
    		if(len<127)
    		    strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de));
    		else
    		    strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127);
    		info->sqinfo[i].desc[127]='\0';
    		info->sqinfo[i].flags |= SQINFO_DESC;
    	    }
    	    if(sqdata->start || sqdata->stop || sqdata ->len)
    	    {
    		info->sqinfo[i].start = sqdata->start;
    		info->sqinfo[i].stop  = sqdata->stop;
    		info->sqinfo[i].olen  = sqdata->len;
    		info->sqinfo[i].flags |= SQINFO_START;
    		info->sqinfo[i].flags |= SQINFO_STOP;
    		info->sqinfo[i].flags |= SQINFO_OLEN;
    	    }

    	    if(ajStrGetLen(seq->Selexdata->ss))
    	    {

    		info->sqinfo[i].ss = ajCharNewRes(maxlen+1);
    		p = ajStrGetPtr(seq->Selexdata->ss);
    		q = info->sqinfo[i].ss;
    		while((c==*p))
    		{
    		    if(c=='.' || c==' ' || c=='_' || c=='-')
    			*q++ = c;
    		    ++p;
    		}
    		*q = '\0';
    		info->sqinfo[i].flags |= SQINFO_SS;
    	    }
    	}
        }
    / *
        }
    */


    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].type = kOtherSeq;
        if(ajSeqsetIsDna(seqset))
            info->sqinfo[i].type = kDNA;
        if(ajSeqsetIsRna(seqset))
            info->sqinfo[i].type = kRNA;
        if(ajSeqsetIsProt(seqset))
            info->sqinfo[i].type = kAmino;
        info->sqinfo[i].flags |= SQINFO_TYPE;

        seq = ajSeqsetGetseqSeq(seqset,i);

        p = ajSeqGetSeqC(seq);
        cnt = 0;
        while((c=*p))
        {
            if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~'))
                ++cnt;
            ++p;
        }
        info->sqinfo[i].len = cnt;
        info->sqinfo[i].flags |= SQINFO_LEN;
    }


    *retseqs = seqs;
    ajStrDel(&tmpstr);

    return;
}