Ejemplo n.º 1
0
static void dbiblast_newname(AjPStr* nname, const AjPStr oname,
			     const char *suff)
{

    ajStrAssignS(nname, oname);
    if(ajStrGetCharFirst(oname)=='@')
	ajStrCutStart(nname, 1);
    ajStrAppendK(nname, '.');
    ajStrAppendC(nname, suff);

    return;
}
Ejemplo n.º 2
0
static void dbiblast_dbname(AjPStr* dbname,
			    const AjPStr oname, const char *suff)
{
    AjPStr suffix = NULL;

    ajFmtPrintS(&suffix, ".%s", suff);

    ajStrAssignS(dbname, oname);

    if(ajStrGetCharFirst(oname)=='@')
	ajStrCutStart(dbname, 1);

    if(!ajStrSuffixS(*dbname, suffix))
    {
	ajStrDel(&suffix);
	return;
    }

    ajStrCutEnd(dbname, ajStrGetLen(suffix));

    ajStrDel(&suffix);

    return;
}
Ejemplo n.º 3
0
int main(int argc, char **argv)
{

    AjPSeqall seqall = NULL;
    AjPFile dend_outfile = NULL;
    AjPStr tmp_dendfilename = NULL;
    AjPFile tmp_dendfile = NULL;

    AjPStr tmp_aln_outfile = NULL;
    AjPSeqset seqset = NULL;
    AjPSeqout seqout = NULL;
    AjPSeqin  seqin  = NULL;

    AjBool only_dend;
    AjBool are_prot = ajFalse;
    AjBool do_slow;
    AjBool use_dend;
    AjPFile dend_file = NULL;
    AjPStr dend_filename = NULL;

    ajint ktup;
    ajint gapw;
    ajint topdiags;
    ajint window;
    AjBool nopercent;

    AjPStr pw_matrix = NULL;
    AjPStr pw_dna_matrix  = NULL;
    AjPFile pairwise_matrix = NULL;
    float pw_gapc;
    float pw_gapv;

    AjPStr pwmstr = NULL;
    char   pwmc   = '\0';
    AjPStr pwdstr = NULL;
    char   pwdc   = '\0';

    AjPStr m1str = NULL;
    AjPStr m2str = NULL;
    char   m1c   = '\0';
    char   m2c   = '\0';

    AjPStr matrix = NULL;
    AjPStr dna_matrix = NULL;
    AjPFile ma_matrix = NULL;
    float gapc;
    float gapv;
    AjBool endgaps;
    AjBool norgap;
    AjBool nohgap;
    ajint gap_dist;
    ajint maxdiv;
    AjPStr hgapres = NULL;


    AjPSeqout fil_file = NULL;
    AjPSeq seq = NULL;

    AjPStr cmd = NULL;
    AjPStr tmp = NULL;
    AjPStr tmpFilename;
    AjPStr line = NULL;
    ajint nb = 0;


    /* get all the parameters */

    embInit("emma", argc, argv);

    pwmstr = ajStrNew();
    pwdstr = ajStrNew();
    m1str  = ajStrNew();
    m2str  = ajStrNew();


    seqall = ajAcdGetSeqall("sequence");
    seqout = ajAcdGetSeqoutset("outseq");

    dend_outfile = ajAcdGetOutfile("dendoutfile");

    only_dend = ajAcdGetToggle("onlydend");
    use_dend  = ajAcdGetToggle("dendreuse");
    dend_file = ajAcdGetInfile("dendfile");
    if (dend_file)
	ajStrAssignS(&dend_filename, ajFileGetPrintnameS(dend_file));
    ajFileClose(&dend_file);

    do_slow = ajAcdGetToggle("slowalign");

    ktup      = ajAcdGetInt("ktup");
    gapw      = ajAcdGetInt("gapw");
    topdiags  = ajAcdGetInt("topdiags");
    window    = ajAcdGetInt("window");
    nopercent = ajAcdGetBoolean("nopercent");

    pw_matrix = ajAcdGetListSingle("pwmatrix");
    pwmc = ajStrGetCharFirst(pw_matrix);

    if(pwmc=='b')
	ajStrAssignC(&pwmstr,"blosum");
    else if(pwmc=='p')
	ajStrAssignC(&pwmstr,"pam");
    else if(pwmc=='g')
	ajStrAssignC(&pwmstr,"gonnet");
    else if(pwmc=='i')
	ajStrAssignC(&pwmstr,"id");
    else if(pwmc=='o')
	ajStrAssignC(&pwmstr,"own");


    pw_dna_matrix = ajAcdGetListSingle("pwdnamatrix");
    pwdc = ajStrGetCharFirst(pw_dna_matrix);

    if(pwdc=='i')
	ajStrAssignC(&pwdstr,"iub");
    else if(pwdc=='c')
	ajStrAssignC(&pwdstr,"clustalw");
    else if(pwdc=='o')
	ajStrAssignC(&pwdstr,"own");

    pairwise_matrix = ajAcdGetInfile("pairwisedatafile");

    pw_gapc = ajAcdGetFloat( "pwgapopen");
    pw_gapv = ajAcdGetFloat( "pwgapextend");

    matrix = ajAcdGetListSingle( "matrix");
    m1c = ajStrGetCharFirst(matrix);

    if(m1c=='b')
	ajStrAssignC(&m1str,"blosum");
    else if(m1c=='p')
	ajStrAssignC(&m1str,"pam");
    else if(m1c=='g')
	ajStrAssignC(&m1str,"gonnet");
    else if(m1c=='i')
	ajStrAssignC(&m1str,"id");
    else if(m1c=='o')
	ajStrAssignC(&m1str,"own");


    dna_matrix = ajAcdGetListSingle( "dnamatrix");
    m2c = ajStrGetCharFirst(dna_matrix);

    if(m2c=='i')
	ajStrAssignC(&m2str,"iub");
    else if(m2c=='c')
	ajStrAssignC(&m2str,"clustalw");
    else if(m2c=='o')
	ajStrAssignC(&m2str,"own");


    ma_matrix = ajAcdGetInfile("mamatrixfile");
    gapc      = ajAcdGetFloat("gapopen");
    gapv      = ajAcdGetFloat("gapextend");
    endgaps   = ajAcdGetBoolean("endgaps");
    norgap    = ajAcdGetBoolean("norgap");
    nohgap    = ajAcdGetBoolean("nohgap");
    gap_dist  = ajAcdGetInt("gapdist");
    hgapres   = ajAcdGetString("hgapres");
    maxdiv    = ajAcdGetInt("maxdiv");

    tmp = ajStrNewC("fasta");

    /*
    ** Start by writing sequences into a unique temporary file
    ** get file pointer to unique file
    */


    fil_file = ajSeqoutNew();
    tmpFilename = emma_getUniqueFileName();
    if(!ajSeqoutOpenFilename( fil_file, tmpFilename))
	embExitBad();

    /* Set output format to fasta */
    ajSeqoutSetFormatS( fil_file, tmp);

    while(ajSeqallNext(seqall, &seq))
    {
        /*
        **  Check sequences are all of the same type
        **  Still to be done
        **  Write out sequences
        */
	if (!nb)
	    are_prot  = ajSeqIsProt(seq);
        ajSeqoutWriteSeq(fil_file, seq);
	++nb;
    }
    ajSeqoutClose(fil_file);

    if(nb < 2)
	ajFatal("Multiple alignments need at least two sequences");

    /* Generate clustalw command line */
    cmd = ajStrNewS(ajAcdGetpathC("clustalw"));

    /* add tmp file containing sequences */
    ajStrAppendC(&cmd, " -infile=");
    ajStrAppendS(&cmd, tmpFilename);

    /* add out file name */
    tmp_aln_outfile = emma_getUniqueFileName();
    ajStrAppendC(&cmd, " -outfile=");
    ajStrAppendS(&cmd, tmp_aln_outfile);


    /* calculating just the nj tree or doing full alignment */
    if(only_dend)
        ajStrAppendC(&cmd, " -tree");
    else
        if(!use_dend)
	    ajStrAppendC(&cmd, " -align");

    /* Set sequence type from information from acd file */
    if(are_prot)
        ajStrAppendC(&cmd, " -type=protein");
    else
        ajStrAppendC(&cmd, " -type=dna");


    /*
    **  set output to MSF format - will read in this file later and output
    **  user requested format
    */
    ajStrAppendC(&cmd, " -output=");
    ajStrAppendC(&cmd, "gcg");

    /* If going to do pairwise alignment */
    if(!use_dend)
    {
        /* add fast pairwise alignments*/
        if(!do_slow)
        {
            ajStrAppendC(&cmd, " -quicktree");
            ajStrAppendC(&cmd, " -ktuple=");
            ajStrFromInt(&tmp, ktup);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -window=");
            ajStrFromInt(&tmp, window);
            ajStrAppendS(&cmd, tmp);
            if(nopercent)
                ajStrAppendC(&cmd, " -score=percent");
            else
                ajStrAppendC(&cmd, " -score=absolute");
            ajStrAppendC(&cmd, " -topdiags=");
            ajStrFromInt(&tmp, topdiags);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pairgap=");
            ajStrFromInt(&tmp, gapw);
            ajStrAppendS(&cmd, tmp);
        }
        else
        {
            if(pairwise_matrix)
            {
		if(are_prot)
		    ajStrAppendC(&cmd, " -pwmatrix=");
		else
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		ajStrAppendS(&cmd, ajFileGetPrintnameS(pairwise_matrix));
            }
            else
            {
		if(are_prot)
		{
		    ajStrAppendC(&cmd, " -pwmatrix=");
		    ajStrAppendS(&cmd, pwmstr);
		}
		else
		{
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		    ajStrAppendS(&cmd, pwdstr);
		}
            }
            ajStrAppendC(&cmd, " -pwgapopen=");
            ajStrFromFloat(&tmp, pw_gapc, 3);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pwgapext=");
            ajStrFromFloat(&tmp, pw_gapv, 3);
            ajStrAppendS(&cmd, tmp);
        }
    }

    /* Multiple alignments */

    /* using existing tree or generating new tree? */
    if(use_dend)
    {
        ajStrAppendC(&cmd, " -usetree=");
        ajStrAppendS(&cmd, dend_filename);
    }
    else
    {
	/* use tmp file to hold dend file, will read back in later */
	tmp_dendfilename = emma_getUniqueFileName();
        ajStrAppendC(&cmd, " -newtree=");
        ajStrAppendS(&cmd, tmp_dendfilename);
    }

    if(ma_matrix)
    {
	if(are_prot)
	    ajStrAppendC(&cmd, " -matrix=");
	else
	    ajStrAppendC(&cmd, " -pwmatrix=");
	ajStrAppendS(&cmd, ajFileGetPrintnameS(ma_matrix));
    }
    else
    {
	if(are_prot)
	{
	    ajStrAppendC(&cmd, " -matrix=");
	    ajStrAppendS(&cmd, m1str);
	}
	else
	{
	    ajStrAppendC(&cmd, " -dnamatrix=");
	    ajStrAppendS(&cmd, m2str);
	}
    }

    ajStrAppendC(&cmd, " -gapopen=");
    ajStrFromFloat(&tmp, gapc, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapext=");
    ajStrFromFloat(&tmp, gapv, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapdist=");
    ajStrFromInt(&tmp, gap_dist);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -hgapresidues=");
    ajStrAppendS(&cmd, hgapres);

    if(!endgaps)
	ajStrAppendC(&cmd, " -endgaps");

    if(norgap)
	ajStrAppendC(&cmd, " -nopgap");

    if(nohgap)
	ajStrAppendC(&cmd, " -nohgap");

    ajStrAppendC(&cmd, " -maxdiv=");
    ajStrFromInt(&tmp, maxdiv);
    ajStrAppendS(&cmd, tmp);


    /*  run clustalw */

/*    ajFmtError("..%s..\n\n", ajStrGetPtr( cmd)); */
    ajDebug("Executing '%S'\n", cmd);

    ajSysExecS(cmd);

    /* produce alignment file only if one was produced */
    if(!only_dend)
    {
	/* read in tmp alignment output file to output through EMBOSS output */

	seqin = ajSeqinNew();
	/*
	**  add the Usa format to the start of the filename to tell EMBOSS
	**  format of file
	*/
	ajStrInsertC(&tmp_aln_outfile, 0, "msf::");
	ajSeqinUsa(&seqin, tmp_aln_outfile);
	seqset = ajSeqsetNew();
	if(ajSeqsetRead(seqset, seqin))
	{
	    ajSeqoutWriteSet(seqout, seqset);


	    ajSeqoutClose(seqout);
	    ajSeqinDel(&seqin);

	    /* remove the Usa from the start of the string */
	    ajStrCutStart(&tmp_aln_outfile, 5);
	}
	else
	    ajFmtError("Problem writing out EMBOSS alignment file\n");
    }


    /* read in new tmp dend file (if produced) to output through EMBOSS */
    if(tmp_dendfilename!=NULL)
    {
	tmp_dendfile = ajFileNewInNameS( tmp_dendfilename);

	if(tmp_dendfile!=NULL){
	while(ajReadlineTrim(tmp_dendfile, &line))
	    ajFmtPrintF(dend_outfile, "%s\n", ajStrGetPtr( line));

	ajFileClose(&tmp_dendfile);
	ajSysFileUnlinkS(tmp_dendfilename);
    }
    }


    ajSysFileUnlinkS(tmpFilename);

    if(!only_dend)
	ajSysFileUnlinkS(tmp_aln_outfile);

    ajStrDel(&pw_matrix);
    ajStrDel(&matrix);
    ajStrDel(&pw_dna_matrix);
    ajStrDel(&dna_matrix);
    ajStrDel(&tmp_dendfilename);
    ajStrDel(&dend_filename);
    ajStrDel(&tmp_aln_outfile);
    ajStrDel(&pwmstr);
    ajStrDel(&pwdstr);
    ajStrDel(&m1str);
    ajStrDel(&m2str);
    ajStrDel(&hgapres);
    ajStrDel(&cmd);
    ajStrDel(&tmp);
    ajStrDel(&tmpFilename);
    ajStrDel(&line);

    ajFileClose(&dend_outfile);
    ajFileClose(&tmp_dendfile);
    ajFileClose(&dend_file);
    ajFileClose(&pairwise_matrix);
    ajFileClose(&ma_matrix);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajSeqoutDel(&fil_file);
    ajSeqinDel(&seqin);

    embExit();

    return 0;
}
Ejemplo n.º 4
0
AjPPatlistRegex ajPatlistRegexRead (const AjPStr patspec,
				    const AjPStr patname,
				    const AjPStr fmt,
				    ajuint type, AjBool upper, AjBool lower)
{
    AjPPatlistRegex patlist = NULL;
    AjPStr line = NULL;
    AjPStr pat  = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPStr patstr = NULL;
    ajuint ifmt;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "regex");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistRegexNewType(type);

    ifmt = patternRegexFormat(fmt);

    if(ajStrGetCharFirst(patspec) ==  '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
        {
	    ajErr("Unable to open regular expression file '%S'", patstr);
	    return NULL;
	}

	line = ajStrNew();
	pat  = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);

		if(lower)
		    ajStrFmtLower(&pat);

		if(upper)
		    ajStrFmtUpper(&pat);

		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternRegexNewList(patlist,name,pat);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrFindC(line,">")>-1)
		{
		    npat++;

		    if (ajStrGetLen(name))
		    {
			if(lower)
			    ajStrFmtLower(&pat);

			if(upper)
			    ajStrFmtUpper(&pat);

			ajPatternRegexNewList(patlist,name,pat);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
		    }
		    ajStrCutStart(&line,1);
		    ajStrAssignS (&name,line);

		    if(!ajStrGetLen(name))
			ajFmtPrintS(&name, "%S%u", namestr, npat);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternRegexNewList(patlist,name,pat);
	    ajStrSetClear(&pat);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
	ajStrAssignS(&pat, patspec);

	if(lower)
	    ajStrFmtLower(&pat);

	if(upper)
	    ajStrFmtUpper(&pat);

	ajStrAssignS(&name, namestr);
	ajPatternRegexNewList(patlist,name,pat);
    }

    ajStrDel(&name);
    ajStrDel(&namestr);
    ajStrDel(&patstr);
    ajStrDel(&line);
    ajStrDel(&pat);

    return patlist;
}
Ejemplo n.º 5
0
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec,
				const AjPStr patname,
				const AjPStr fmt,
				AjBool protein, ajuint mismatches)
{
    AjPPatlistSeq patlist = NULL;
    AjPStr line = NULL;
    AjPStr name = NULL;
    AjPFilebuff infile = NULL;
    AjPRegexp mismreg = NULL;
    AjPStr patstr = NULL;
    AjPStr pat = NULL;
    ajuint mismatch = 0;
    ajint ifmt = 0;
    ajuint npat = 0;
    AjPStr namestr = NULL;

    ajStrAssignS(&namestr, patname);
    ajStrAssignEmptyC(&namestr, "pattern");

    ajStrAssignS(&patstr, patspec);

    patlist = ajPatlistSeqNewType(protein);

    ifmt = patternSeqFormat(fmt);

    ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' "
	    "protein: %B mismatches: %d\n",
	    patspec, patname, protein, mismatches);

    if(ajStrGetCharFirst(patstr) == '@')
    {
	ajStrCutStart(&patstr, 1);
	infile = ajFilebuffNewNameS(patstr);

	if(!infile)
	{
	    ajErr("Unable to open pattern file '%S'", patstr);

	    return NULL;
	}

	line = ajStrNew();
	name = ajStrNew();

	if(!ifmt)
	{
	    ajBuffreadLineTrim(infile,&line);

	    if(ajStrPrefixC(line, ">"))
		ifmt = 2;
	    else
		ifmt = 1;
	    ajFilebuffReset(infile);
	}
	
	switch(ifmt)
	{
	case 1:
	    while (ajBuffreadLineTrim(infile,&line))
	    {
		npat++;
		ajStrAppendS (&pat,line);
		ajFmtPrintS(&name, "%S%u", namestr, npat);
		ajPatternSeqNewList(patlist,name,pat,mismatches);
		ajStrSetClear(&pat);
	    }
	    break;
	default:
	    mismreg = ajRegCompC("<mismatch=(\\d+)>");

	    while (ajBuffreadLineTrim(infile,&line))
	    {
		if (ajStrGetCharFirst(line) == '>')
		{
		    if (ajStrGetLen(name))
		    {
			ajPatternSeqNewList(patlist,name,pat,
					    mismatch);
			ajStrSetClear(&name);
			ajStrSetClear(&pat);
			mismatch=mismatches;
		    }

		    ajStrCutStart(&line,1);

		    if (ajRegExec(mismreg,line))
		    {
			ajRegSubI(mismreg,1,&name);
			ajStrToUint(name,&mismatch);
			ajStrTruncateLen(&line,ajRegOffset(mismreg));
			ajStrTrimWhiteEnd(&line);
		    }
		    ajStrAssignS (&name,line);
		    ajStrAssignEmptyS(&name, patname);
		}
		else
		    ajStrAppendS (&pat,line);
	    }

	    ajStrAssignEmptyS(&name, patname);
	    ajPatternSeqNewList(patlist,name,pat,mismatch);
	    ajRegFree(&mismreg);
	    break;
	}

	ajFilebuffDel(&infile);
    }
    else
    {
        ajStrAssignS(&name, namestr);
	ajPatternSeqNewList(patlist,name,patstr,mismatches);
    }

    ajStrDel(&name);
    ajStrDel(&line);
    ajStrDel(&pat);
    ajStrDel(&namestr);
    ajStrDel(&patstr);

    return patlist;
}
Ejemplo n.º 6
0
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
Ejemplo n.º 7
0
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
Ejemplo n.º 8
0
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf)
{
    AjPStr line = NULL;
    ajlong pos  = 0L;
    ajuint seqlen = 0;
    ajuint qlen = 0;
    AjPStr tmpfd  = NULL;
    AjPStr str = NULL;
    AjPStr de = NULL;
    AjBool ok;

    if(!dbxflat_wrdexp)
	dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)");

    line = ajStrNewC("");
    
    pos = ajFileResetPos(inf);

    if(!ajReadlineTrim(inf,&line))
    {
        ajStrDel(&line);
        return ajFalse;
    }

    /* first line of entry */

    if(!ajStrPrefixC(line,"@"))
        return ajFalse;

    entry->fpos = pos;
    ajStrCutStart(&line, 1);
    ajStrExtractFirst(line, &de, &entry->id);

    if(desfield && ajStrGetLen(de))
    {
	while(ajRegExec(dbxflat_wrdexp,de))
	{
	    ajRegSubI(dbxflat_wrdexp, 1, &tmpfd);
	    str = ajStrNew();
	    ajStrAssignS(&str,tmpfd);
	    ajListPush(desfield->data,(void *)str);
	    ajRegPost(dbxflat_wrdexp, &de);
	}
    }

/* now read sequence */
    ok = ajReadlineTrim(inf,&line);
    while(ok && !ajStrPrefixC(line, "+"))
    {
        ajStrRemoveWhite(&line);
        seqlen += MAJSTRGETLEN(line);
        ok = ajReadlineTrim(inf,&line);
    }

    if(!ok)
        return ajFalse;

    ok = ajReadlineTrim(inf,&line);
    while(ok)
    {
        qlen += MAJSTRGETLEN(line);
        if(qlen < seqlen)
            ok = ajReadlineTrim(inf,&line);
        else
            ok = ajFalse;
    }

    ajStrDel(&de);
    ajStrDel(&tmpfd);
    ajStrDel(&line);
    
    return ajTrue;
}