示例#1
0
static void extractfeat_GetRegionPad(const AjPSeq seq, AjPStr *featstr,
				     ajint start, ajint end, AjBool sense,
				     AjBool beginning)
{
    ajint tmp;
    ajint pad;

    AjPStr result;

    ajDebug("In extractfeat_GetRegionPad start=%d, end=%d\n", start, end);

    result = ajStrNew();


    if(start > end)
    	return;

    if(start < 0)
    {
        pad = -start;
        if(ajSeqIsNuc(seq))
            ajStrAppendCountK(&result, 'N', pad);
        else
            ajStrAppendCountK(&result, 'X', pad);
        start = 0;
    }

    if(end > (ajint) ajSeqGetLen(seq)-1)
    	tmp = ajSeqGetLen(seq)-1;
    else
    	tmp = end;

    if(start <= (ajint) ajSeqGetLen(seq) && tmp >= 0)
    {
        ajDebug("Get subsequence %d-%d\n", start, tmp);
        ajStrAppendSubS(&result, ajSeqGetSeqS(seq), start, tmp);
        ajDebug("result=%S\n", result);
    }

    if(end > (ajint) ajSeqGetLen(seq)-1)
    {
        pad = end - ajSeqGetLen(seq)+1;
        if(ajSeqIsNuc(seq))
            ajStrAppendCountK(&result, 'N', pad);
        else
            ajStrAppendCountK(&result, 'X', pad);
        ajDebug("result=%S\n", result);
    }


    /* if feature was in reverse sense, then get reverse complement */
    if(!sense)
    {
	ajDebug("get reverse sense of subsequence\n");
    	ajSeqstrReverse(&result);
	ajDebug("result=%S\n", result);
    }

    if(beginning)
    {
	ajDebug("Prepend to featstr: %S\n", result);
        ajStrInsertS(featstr, 0, result);
    }
    else
    {
	ajDebug("Append to featstr: %S\n", result);
    	ajStrAppendS(featstr, result);
    }
    ajDebug("featstr=%S\n", *featstr);


    ajStrDel(&result);

    return;
}
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
			  const AjPPatternSeq pat, AjBool reverse)
{
    const void *tidy;
    ajuint hits;
    ajuint i;
    AjPPatComp pattern;
    EmbPMatMatch m = NULL;
    AjPFeature sf  = NULL;
    AjPSeq revseq  = NULL;
    AjPList list   = ajListNew();
    AjPStr seqstr  = ajStrNew();
    AjPStr seqname = ajStrNew();
    AjPStr tmp     = ajStrNew();
    ajint adj;
    ajint begin;
    AjBool isreversed;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    begin = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    ajStrAssignS(&seqname,ajSeqGetNameS(seq));
    pattern = ajPatternSeqGetCompiled(pat);

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
			begin-1,adj-1);
        ajSeqstrReverse(&seqstr);
    }
    else
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
			begin-1,adj-1);

    ajStrFmtUpper(&seqstr);
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
	   "'%S'\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
	   seqstr);*/

    ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
	    pattern->pattern, pat->Protein, reverse);
    embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
                       ajPatternSeqGetMismatch(pat),&hits,&tidy);

    ajDebug ("embPatternSeqSearch: found %d hits\n",hits);

    if(!reverse)
	ajListReverse(list);

    for(i=0;i<hits;++i)
    {
        ajListPop(list,(void **)&m);

 	if (reverse)
	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                           adj - m->start - m->len + begin + 1,
                           adj - m->start + begin,
                           0.0, '-', 0);
	else
        {
	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                   m->start,
                                   m->start + m->len - 1,
                                   0.0);
            else
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                               m->start,
                               m->start + m->len - 1,
                               0.0, '.', 0);
        }
        
	if(isreversed)
	    ajFeatReverse(sf, seqlen);

	/*
	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
	       sf->Start, sf->End, sf->Strand);
	*/

	ajFeatSetScore(sf, (float) (m->len - m->mm));

        ajFmtPrintS(&tmp, "*pat %S: %S",
                    ajPatternSeqGetName(pat),
                    ajPatternSeqGetPattern(pat));
        ajFeatTagAdd(sf,NULL,tmp);

        if(m->mm)
        {
            ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
            ajFeatTagAdd(sf, NULL, tmp);
        }

        embMatMatchDel(&m);
    }

    ajStrDel(&seqname);
    ajStrDel(&seqstr);
    ajStrDel(&tmp);
    ajListFree(&list);

    if (reverse)
        ajSeqDel(&revseq);

    return;
}
示例#3
0
static void primersearch_scan_seq(const Primer primdata,
			     const AjPSeq seq, AjBool reverse)
{
    AjPStr seqstr = NULL;
    AjPStr revstr = NULL;
    AjPStr seqname = NULL;
    ajuint fhits = 0;
    ajuint rhits = 0;
    AjPList fhits_list = NULL;
    AjPList rhits_list = NULL;

    /* initialise variables for search */
    ajStrAssignC(&seqname,ajSeqGetNameC(seq));
    ajStrAssignS(&seqstr, ajSeqGetSeqS(seq));
    ajStrAssignS(&revstr, ajSeqGetSeqS(seq));
    ajStrFmtUpper(&seqstr);
    ajStrFmtUpper(&revstr);
    ajSeqstrReverse(&revstr);
    fhits_list = ajListNew();
    rhits_list = ajListNew();

    if(!reverse)
    {
	/* test OligoA against forward sequence, and OligoB against reverse */
	embPatFuzzSearch(primdata->forward->type,
			 ajSeqGetBegin(seq),
			 primdata->forward->patstr,
			 seqname,
			 seqstr,
			 fhits_list,
			 primdata->forward->len,
			 primdata->forward->mm,
			 primdata->forward->amino,
			 primdata->forward->carboxyl,
			 primdata->forward->buf,
			 primdata->forward->off,
			 primdata->forward->sotable,
			 primdata->forward->solimit,
			 primdata->forward->re,
			 primdata->forward->skipm,
			 &fhits,
			 primdata->forward->real_len,
			 &(primdata->forward->tidy));

	if(fhits)
	    embPatFuzzSearch(primdata->reverse->type,
			     ajSeqGetBegin(seq),
			     primdata->reverse->patstr,
			     seqname,
			     revstr,
			     rhits_list,
			     primdata->reverse->len,
			     primdata->reverse->mm,
			     primdata->reverse->amino,
			     primdata->reverse->carboxyl,
			     primdata->reverse->buf,
			     primdata->reverse->off,
			     primdata->reverse->sotable,
			     primdata->reverse->solimit,
			     primdata->reverse->re,
			     primdata->reverse->skipm,
			     &rhits,
			     primdata->reverse->real_len,
			     &(primdata->reverse->tidy));
    }
    else
    {
	/*test OligoB against forward sequence, and OligoA against reverse  */
	embPatFuzzSearch(primdata->reverse->type,
			 ajSeqGetBegin(seq),
			 primdata->reverse->patstr,
			 seqname,
			 seqstr,
			 fhits_list,
			 primdata->reverse->len,
			 primdata->reverse->mm,
			 primdata->reverse->amino,
			 primdata->reverse->carboxyl,
			 primdata->reverse->buf,
			 primdata->reverse->off,
			 primdata->reverse->sotable,
			 primdata->reverse->solimit,
			 primdata->reverse->re,
			 primdata->reverse->skipm,
			 &fhits,
			 primdata->reverse->real_len,
			 &(primdata->reverse->tidy));

	if(fhits)
	    embPatFuzzSearch(primdata->forward->type,
			     ajSeqGetBegin(seq),
			     primdata->forward->patstr,
			     seqname,
			     revstr,
			     rhits_list,
			     primdata->forward->len,
			     primdata->forward->mm,
			     primdata->forward->amino,
			     primdata->forward->carboxyl,
			     primdata->forward->buf,
			     primdata->forward->off,
			     primdata->forward->sotable,
			     primdata->forward->solimit,
			     primdata->forward->re,
			     primdata->forward->skipm,
			     &rhits,
			     primdata->forward->real_len,
			     &(primdata->forward->tidy));
    }

    if(fhits && rhits)
	/* get amplimer length(s) and write out the hit */
	primersearch_store_hits(primdata, fhits_list, rhits_list,
				seq, reverse);

    /* tidy up */
    primersearch_clean_hitlist(&fhits_list);
    primersearch_clean_hitlist(&rhits_list);

    ajStrDel(&seqstr);
    ajStrDel(&revstr);
    ajStrDel(&seqname);

    return;
}
示例#4
0
static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r,
					AjPAssemContig const * contigs,
					ajint ncontigs)
{
    AjPAssemTag    t = NULL;
    AjIList l = NULL;
    AjPStr qualstr = NULL;
    AjPStr tmp  = NULL;
    ajint  POS  = 0;
    AjPStr CIGAR = NULL;
    const char* RNEXT = NULL;
    AjPStr SEQ  = NULL;
    AjPStr QUAL = NULL;
    AjPStr SEQunpadded  = NULL;
    AjPStr QUALunpadded = NULL;
    AjPStr consensus = NULL;
    AjBool rc= ajFalse;
    AjBool ret = ajTrue;
    const char* refseq = NULL;
    const AjPAssemContig contig = NULL;

    ajuint k = 0;

    if(r->Reference>=ncontigs)
	ajDie("assemoutWriteSamAlignment: reference sequence number"
		" '%d' is larger than or equal to known number of reference"
		" sequences '%d'. Problem while processing read '%S'.",
		r->Reference,
		ncontigs,
		r->Name);

    contig = (r->Reference==-1 ? NULL : contigs[r->Reference]);

    ajStrAssignRef(&SEQ, r->Seq);
    consensus = contig==NULL? NULL : contig->Consensus;

    if (r->Rnext==-1)
	RNEXT= "*";
    else if(r->Rnext==r->Reference)
	RNEXT = "=";
    else
	RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name);

    if (r->Flag & BAM_FREVERSE)
    {
	rc = ajTrue;
	qualstr = ajStrNewS(r->SeqQ);

	if(!r->Reversed)
	{
	    ajStrReverse(&qualstr);
	    ajSeqstrReverse(&SEQ);
	}

	QUAL = qualstr;
	POS = r->y1;
	ajStrAssignSubS(&tmp, SEQ,
		ajStrGetLen(r->Seq) - r->y2,
		ajStrGetLen(r->Seq) - r->x2
	);

    }
    else
    {
	rc= ajFalse;
	POS = r->x1;
	QUAL = r->SeqQ;
	ajStrAssignSubS(&tmp, SEQ,
		r->x2-1,
		r->y2-1
	);
    }

    if(r->Cigar==NULL && consensus)
    {
	refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1);

	CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp));

	SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ));
	QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ));

	for(k=0; k< ajStrGetLen(SEQ); k++)
	{
	    if (ajStrGetCharPos(SEQ, k) == '*')
		continue;

	    ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k));
	    ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k));
	}

	ajDebug("cigar: %S\n", CIGAR);

	ajStrAssignS(&tmp, CIGAR);

	if(rc)
	{
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintS(&CIGAR, "%dS%S",
		            ajStrGetLen(SEQ) - r->y2, tmp);
	    if(r->x2 > 1)
		ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1);
	}
	else
	{
	    if(r->x2 > 1)
		ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp);
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintAppS(&CIGAR, "%dS",
		               ajStrGetLen(SEQ) - r->y2);
	}
	ajStrDel(&tmp);
    }
    else if(r->Cigar==NULL)
    {
	ajErr("both CIGAR string and consensus sequence not available");
	ret = ajFalse;
	ajStrAssignK(&CIGAR, '*');
    }
    else if(!ajStrGetLen(r->Cigar))
	ajStrAssignK(&CIGAR, '*');
    else if(ajStrGetLen(r->Cigar))
    {
	if(!ajStrGetLen(SEQ))
	    ajStrAssignK(&SEQ, '*');

	if(!ajStrGetLen(QUAL))
	    ajStrAssignK(&QUAL, '*');
    }

    ajStrDel(&tmp);

    ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S",
	    r->Name,
	    r->Flag,
	    (contig==NULL ? "*" : ajStrGetPtr(contig->Name)),
	    POS,
	    r->MapQ,
	    (CIGAR ? CIGAR : r->Cigar),
	    RNEXT,
	    r->Pnext,
	    r->Tlen,
	    (r->Cigar ? SEQ  : SEQunpadded),
	    (r->Cigar ? QUAL : QUALunpadded));

    l = ajListIterNewread(r->Tags);
    while (!ajListIterDone(l))
    {
	t = ajListIterGet(l);

	/* TODO: array type, 'B' */

	/* In SAM, all single integer types are mapped to int32_t [SAM spec] */
	ajFmtPrintF(outf, "\t%S:%c:",
		t->Name,
		(t->type == 'c' || t->type == 'C' ||
		 t->type == 's' || t->type == 'S'
				|| t->type == 'I') ? 'i' : t->type
	);

	if(t->x1 || t->y1)
	    ajFmtPrintF(outf, " %u %u", t->x1, t->y1);

	if(t->Comment && ajStrGetLen(t->Comment)>0)
	    ajFmtPrintF(outf, "%S", t->Comment);

    }
    ajListIterDel(&l);

    ajFmtPrintF(outf, "\n");

    if(qualstr)
	ajStrDel(&qualstr);

    ajStrDel(&SEQ);
    ajStrDel(&CIGAR);
    ajStrDel(&SEQunpadded);
    ajStrDel(&QUALunpadded);

    return ret;
}
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
示例#6
0
int main(int argc, char **argv)
{
    AjPSeq seq    = NULL;
    AjPReport report = NULL;
    AjPFeattable feat=NULL;
    AjPStr sstr   = NULL;

    const AjPStr sname   = NULL;
    AjPStr revcomp = NULL;
    ajint RStotal;
    AjPStr enzymes = NULL;                /* string for RE selection */

    AjPList relist = NULL;
    ajint begin;
    ajint end;
    ajint radj;
    ajint start;
    AjBool sshow;
    AjBool tshow;
    AjBool allmut;

    AjPList results1 = NULL;              /* for forward strand */
    AjPList results2 = NULL;              /* for reverse strand */
    AjPList shits;
    AjPList nshits;
    AjPStr tailstr = NULL;



    embInit("silent", argc, argv);

    seq     = ajAcdGetSeq("sequence");
    enzymes = ajAcdGetString("enzymes");
    sshow   = ajAcdGetBoolean("sshow");
    tshow   = ajAcdGetBoolean("tshow");
    allmut  = ajAcdGetBoolean("allmut");
    report = ajAcdGetReport ("outfile");

    shits  = ajListNew();
    nshits = ajListNew();

    /*calling function to read in RE info*/
    RStotal = silent_restr_read(&relist,enzymes);

    begin = ajSeqGetBegin(seq);             /* returns the seq start posn, or 1
                                            if no start has been set */
    end   = ajSeqGetEnd(seq);               /* returns the seq end posn, or seq
                                            length if no end has been set */
    radj=begin+end+1;                    /* posn adjustment for complementary
                                            strand */


    ajStrAssignSubC(&sstr,ajSeqGetSeqC(seq),--begin,--end);
    ajStrFmtUpper(&sstr);

    sname = ajSeqGetNameS(seq);
    ajStrAssignC(&revcomp,ajStrGetPtr(sstr));
    ajSeqstrReverse(&revcomp);
    start  = begin+1;

    feat = ajFeattableNewDna(ajSeqGetNameS(seq));

    if(sshow)
    {
        silent_fmt_sequence("SEQUENCE", sstr,&tailstr,start,ajTrue);
    }

    results1 = silent_mismatch(sstr,relist,&tailstr,sname,RStotal,begin,radj,
			       ajFalse,end,tshow);

    silent_split_hits(&results1,&shits,&nshits,allmut);

    ajReportSetHeaderC(report,
		       "KEY:\n"
		       "EnzymeName: Enzyme name\n"
		       "RS-Pattern: Restriction enzyme recognition site "
		       "pattern\n"
		       "Base-Posn: Position of base to be mutated\n"
		       "AAs: Amino acid. Original sequence(.)After mutation\n"
		       "Silent: Yes for unchanged amino acid\n"
		       "Mutation: The base mutation to perform\n\n"
		       "Creating silent and non-silent mutations\n");

    silent_fmt_hits(shits,feat, ajTrue, ajFalse);
    if(allmut)
    {
	silent_fmt_hits(nshits,feat, ajFalse, ajFalse);
    }

    if(sshow)
    {
	silent_fmt_sequence("REVERSE SEQUENCE", revcomp,&tailstr,start,ajTrue);
    }

    results2 = silent_mismatch(revcomp,relist,&tailstr,
			       sname,RStotal,begin,radj,
			       ajTrue,end,tshow);

    silent_split_hits(&results2,&shits,&nshits,allmut);

    silent_fmt_hits(shits,feat, ajTrue, ajTrue);
    if(allmut)
    {
	silent_fmt_hits(nshits,feat, ajFalse, ajTrue);
    }

    ajReportSetStatistics(report, 1, ajSeqGetLenTrimmed(seq));
    ajReportSetTailS(report, tailstr);
    (void) ajReportWrite (report,feat,seq);
    ajFeattableDel(&feat);

    ajStrDel(&revcomp);
    ajStrDel(&enzymes);

    ajListFree(&results1);
    ajListFree(&results2);
    ajListFree(&shits);
    ajListFree(&nshits);

    ajReportClose(report);
    ajReportDel(&report);
    ajSeqDel(&seq);
    ajStrDel(&sstr);

    silent_relistdel(&relist);
    ajStrDel(&tailstr);

    embExit();

    return 0;
}
int main(int argc, char **argv)
{

    AjPSeqall seqall;
    AjPFile primfile;
    AjPStr rdline = NULL;

    Primer primdata;
    AjPStrTok handle = NULL;

    AjPList primList = NULL;

    embInit("stssearch", argc, argv);

    primfile = ajAcdGetInfile("infile");
    out      = ajAcdGetOutfile("outfile");
    seqall   = ajAcdGetSeqall("seqall");

    while(ajReadlineTrim(primfile, &rdline))
    {
	if(ajStrGetCharFirst(rdline) == '#')
	    continue;
	if(ajStrSuffixC(rdline, ".."))
	    continue;

	AJNEW(primdata);
	primdata->Name   = NULL;
	primdata->Oligoa = NULL;
	primdata->Oligob = NULL;

	handle = ajStrTokenNewC(rdline, " \t");
	ajStrTokenNextParse(&handle, &primdata->Name);

	if(!(nprimers % 1000))
	    ajDebug("Name [%d]: '%S'\n", nprimers, primdata->Name);

	ajStrTokenNextParse(&handle, &primdata->Oligoa);
	ajStrFmtUpper(&primdata->Oligoa);
	primdata->Prima = ajRegComp(primdata->Oligoa);

	ajStrTokenNextParse(&handle, &primdata->Oligob);
	ajStrFmtUpper(&primdata->Oligob);
	primdata->Primb = ajRegComp(primdata->Oligob);
	ajStrTokenDel(&handle);

	if(!nprimers)
	    primList = ajListNew();

	ajListPushAppend(primList, primdata);
	nprimers++;
    }

    if(!nprimers)
	ajFatal("No primers read\n");

    ajDebug("%d primers read\n", nprimers);

    while(ajSeqallNext(seqall, &seq))
    {
	ajSeqFmtUpper(seq);
	ajStrAssignS(&seqstr, ajSeqGetSeqS(seq));
	ajStrAssignS(&revstr, ajSeqGetSeqS(seq));
	ajSeqstrReverse(&revstr);
	ajDebug("Testing: %s\n", ajSeqGetNameC(seq));
	ntests = 0;
	ajListMap(primList, stssearch_primTest, NULL);
    }

    ajFileClose(&out);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajFileClose(&out);
    ajStrDel(&revstr);
    ajStrDel(&seqstr);
    ajFileClose(&primfile);
    ajListMap(primList, stssearch_primDel, NULL);
    ajListFree(&primList);
    ajStrDel(&rdline);


    embExit();

    return 0;
}
示例#8
0
static void prima_testproduct(const AjPStr seqstr,
			      ajint startpos, ajint endpos,
			      ajint primerlen, ajint minprimerlen,
			      ajint maxprimerlen, float minpmGCcont,
			      float maxpmGCcont, ajint minprimerTm,
			      ajint maxprimerTm, ajint minprodlen,
			      ajint maxprodlen, float prodTm, float prodGC,
			      ajint seqlen, PPrimer *eric,
			      PPrimer *fred, AjPList forlist,
			      AjPList revlist, ajint *neric,
			      ajint *nfred, ajint stepping_value,
			      float saltconc, float dnaconc,
			      AjBool isDNA, ajint begin)
{
    AjPStr substr = NULL;
    PPrimer rubbish = NULL;
    ajint forpstart;
    ajint forpend;
    ajint revpstart;
    ajint revpend;
    ajint i;
    ajint tnum = 0;
    ajint thisplen;

    float primerTm   = 0.0;
    float primGCcont = 0.0;

    (void) primerlen;
    (void) minprodlen;
    (void) maxprodlen;
    (void) stepping_value;

    forpend   = startpos -1;
    revpstart = endpos +1;

    tnum = maxprimerlen-minprimerlen+1;


    substr=ajStrNew();

    /* FORWARD PRIMERS */

    forpstart = forpend-minprimerlen+1;

    for(i=0; i<tnum; ++i,--forpstart)
    {
	if(forpstart<0)
	    break;

	ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),forpstart,forpend);
	thisplen = minprimerlen + i;

	primerTm = ajMeltTempSave("",forpstart,thisplen, saltconc,
                                  dnaconc, isDNA,
                                  &entropy, &enthalpy, &energy);

	/* If temp out of range ignore rest of loop iteration */
	if(primerTm<minprimerTm || primerTm>maxprimerTm)
	    continue;

	primGCcont = ajMeltGC(substr, thisplen);

	/* If GC content out of range ignore rest of loop iteration */
	if(primGCcont<minpmGCcont || primGCcont>maxpmGCcont)
	    continue;

	/*
	**  This is a valid primer as far as Tm & GC is concerned
	**  so push it to the storage list
        */
	AJNEW0(*eric);

	(*eric)->substr     = ajStrNewC(ajStrGetPtr(substr));
	(*eric)->start      = forpstart+begin;
	(*eric)->primerlen  = thisplen;
	(*eric)->primerTm   = primerTm;
	(*eric)->primGCcont = primGCcont;
	(*eric)->prodTm     = prodTm;
	(*eric)->prodGC     = prodGC;
	ajListPush(forlist, (void*)*eric);
	(*neric)++;
    }

    if(!*neric)
    {
	ajStrDel(&substr);
	return;
    }

    /* REVERSE PRIMERS */
    revpend=revpstart + minprimerlen-1;
    for(i=0; i<tnum; ++i,++revpend)
    {
	if(revpend>seqlen)
	    break;

	ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),revpstart,revpend);
	ajSeqstrReverse(&substr);

	thisplen = minprimerlen + i;

	primerTm = ajMeltTempSave("",revpstart,thisplen, saltconc,
                                  dnaconc, isDNA,
                                  &entropy, &enthalpy, &energy);
	/* If temp out of range ignore rest of loop iteration */
	if(primerTm<minprimerTm || primerTm>maxprimerTm)
	    continue;

	primGCcont = ajMeltGC(substr, thisplen);
	/* If GC content out of range ignore rest of loop iteration */
	if(primGCcont<minpmGCcont || primGCcont>maxpmGCcont)
	    continue;

	/*
	**  This is a valid primer as far as Tm & GC is concerned
	**  so push it to the reverse primer storage list
        */
	AJNEW0(*fred);
	(*fred)->substr     = ajStrNewC(ajStrGetPtr(substr));
	(*fred)->start      = revpstart+begin;
	(*fred)->primerlen  = thisplen;
	(*fred)->primerTm   = primerTm;
	(*fred)->primGCcont = primGCcont;
	ajListPush(revlist, (void*)*fred);
	(*nfred)++;
    }

    ajStrDel(&substr);


    if(!*nfred)
    {
	*neric = 0;
	while(ajListPop(forlist,(void**)&rubbish))
	    prima_PrimerDel(&rubbish);
    }

    return;
}
示例#9
0
int main(int argc, char **argv)
{
    AjPFile outf = NULL;

    AjPSeq sequence = NULL;
    AjPStr substr   = NULL;
    AjPStr seqstr = NULL;
    AjPStr revstr = NULL;

    AjPStr p1;
    AjPStr p2;

    PPrimer eric = NULL;
    PPrimer fred = NULL;

    PPrimer f;
    PPrimer r;

    PPair pair;

    AjPList forlist  = NULL;
    AjPList revlist  = NULL;
    AjPList pairlist = NULL;

    AjBool targetrange;
    AjBool isDNA  = ajTrue;
    AjBool dolist = ajFalse;

    ajint primerlen    = 0;
    ajint minprimerlen = 0;
    ajint maxprimerlen = 0;
    ajint minprodlen   = 0;
    ajint maxprodlen   = 0;
    ajint prodlen      = 0;

    ajint seqlen = 0;
    ajint stepping_value = 1;

    ajint targetstart = 0;
    ajint targetend   = 0;

    ajint limit    = 0;
    ajint limit2   = 0;
    ajint lastpos  = 0;
    ajint startpos = 0;
    ajint endpos   = 0;

    ajint begin;
    ajint end;
    ajint v1;
    ajint v2;

    ajint overlap;

    float minpmGCcont   = 0.;
    float maxpmGCcont   = 0.;
    float minprodGCcont = 0.;
    float maxprodGCcont = 0.;
    float prodTm;
    float prodGC;

    ajint i;
    ajint j;

    ajint neric=0;
    ajint nfred=0;
    ajint npair=0;

    float minprimerTm = 0.0;
    float maxprimerTm = 0.0;

    float saltconc = 0.0;
    float dnaconc  = 0.0;

    embInit ("prima", argc, argv);

    substr = ajStrNew();

    forlist  = ajListNew();
    revlist  = ajListNew();
    pairlist = ajListNew();

    p1 = ajStrNew();
    p2 = ajStrNew();


    sequence = ajAcdGetSeq("sequence");
    outf     = ajAcdGetOutfile("outfile");

    minprimerlen = ajAcdGetInt("minprimerlen");
    maxprimerlen = ajAcdGetInt("maxprimerlen");
    minpmGCcont  = ajAcdGetFloat("minpmGCcont");
    maxpmGCcont  = ajAcdGetFloat("maxpmGCcont");
    minprimerTm  = ajAcdGetFloat("mintmprimer");
    maxprimerTm  = ajAcdGetFloat("maxtmprimer");

    minprodlen    = ajAcdGetInt("minplen");
    maxprodlen    = ajAcdGetInt("maxplen");
    minprodGCcont = ajAcdGetFloat("minpgccont");
    maxprodGCcont = ajAcdGetFloat("maxpgccont");

    saltconc = ajAcdGetFloat("saltconc");
    dnaconc  = ajAcdGetFloat("dnaconc");

    targetrange = ajAcdGetToggle("targetrange");
    targetstart = ajAcdGetInt("targetstart");
    targetend   = ajAcdGetInt("targetend");

    overlap = ajAcdGetInt("overlap");
    dolist  = ajAcdGetBoolean("list");

    seqstr = ajSeqGetSeqCopyS(sequence);
    ajStrFmtUpper(&seqstr);

    begin  = ajSeqGetBegin(sequence);
    end    = ajSeqGetEnd(sequence);
    seqlen = end-begin+1;

    ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1);
    revstr = ajStrNewC(ajStrGetPtr(substr));
    ajSeqstrReverse(&revstr);

    AJCNEW0(entropy, seqlen);
    AJCNEW0(enthalpy, seqlen);
    AJCNEW0(energy, seqlen);

    /* Initialise Tm calculation arrays */
    ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1,
	  &entropy, &enthalpy, &energy);


    ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n");
    ajFmtPrintF(outf, "*************\n\n");

    if(targetrange)
	ajFmtPrintF
	    (outf, "Prima of %s from positions %d to %d bps\n",
	     ajSeqGetNameC(sequence),targetstart, targetend);
    else
	ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence));

    ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n");
    ajFmtPrintF
	(outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR ");
    ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n");
    ajFmtPrintF(outf,
		"Primer size range is %d-%d\n",minprimerlen,maxprimerlen);
    ajFmtPrintF(outf,
		"Primer GC content range is %.2f-%.2f\n",minpmGCcont,
		maxpmGCcont);
    ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n",
		minprimerTm, maxprimerTm);

    ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n");

    ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n",
		minprodGCcont, maxprodGCcont);

    ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc);
    ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc);



    if(targetrange)
	ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n",
		    targetstart,targetend);
    else
    {
	ajFmtPrintF(outf,"Considering all suitable Primer pairs with ");
	ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen,
		    maxprodlen);
    }


    ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n");
    ajFmtPrintF(outf, "*****************************************\n\n");


    if(seqlen-minprimerlen < 0)
	ajFatal("Sequence too short");

    if(targetrange)
    {
	ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin);

	prodGC = ajMeltGC(substr,seqlen);
	prodTm = ajMeltTempProd(prodGC,saltconc,seqlen);

	if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
	{
	    ajFmtPrintF(outf,
			"Product GC content [%.2f] outside acceptable range\n",
			prodGC);
	    embExitBad();
	    return 0;
	}

	prima_testtarget(substr, revstr, targetstart-begin, targetend-begin,
			 minprimerlen, maxprimerlen,
			 seqlen, minprimerTm, maxprimerTm, minpmGCcont,
			 maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc,
			 dnaconc, pairlist, &npair);
    }



    if(!targetrange)
    {

    limit   = seqlen-minprimerlen-minprodlen+1;
    lastpos = seqlen-minprodlen;
    limit2  = maxprodlen-minprodlen;

    /* Outer loop selects all possible product start points */
    for(i=minprimerlen; i<limit; ++i)
    {
	startpos = i;
	ajDebug("Position in sequence %d\n",startpos);
	endpos = i+minprodlen-1;
	/* Inner loop selects all possible product lengths  */
	for(j=0; j<limit2; ++j, ++endpos)
	{
	    if(endpos>lastpos)
		break;

	    v1 = endpos-startpos+1;
	    ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos);
	    prodGC = ajMeltGC(p1,v1);
	    prodTm = ajMeltTempProd(prodGC,saltconc,v1);

	    if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
		continue;

	    /* Only accept primers with acceptable Tm and GC */
	    neric = 0;
	    nfred = 0;
	    prima_testproduct(substr, startpos, endpos, primerlen,
			      minprimerlen, maxprimerlen,minpmGCcont,
			      maxpmGCcont, minprimerTm, maxprimerTm,
			      minprodlen, maxprodlen, prodTm, prodGC, seqlen,
			      &eric,&fred,forlist,revlist,&neric,&nfred,
			      stepping_value, saltconc,dnaconc, isDNA, begin);
	    if(!neric)
		continue;



	    /* Now reject those primers with self-complementarity */

	    prima_reject_self(forlist,revlist,&neric,&nfred);
	    if(!neric)
		continue;

	    /* Reject any primers that could bind elsewhere in the
               sequence */
	    prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr,
			     seqlen);



	    /* Now select the least complementary pair (if any) */
	    prima_best_primer(forlist, revlist, &neric, &nfred);
	    if(!neric)
		continue;

	    AJNEW(pair);
	    ajListPop(forlist,(void **)&f);
	    ajListPop(revlist,(void **)&r);
	    pair->f = f;
	    pair->r = r;
	    ++npair;
	    ajListPush(pairlist,(void *)pair);
	}
     }

  }


    if(!targetrange)
    {
	/* Get rid of primer pairs nearby the top scoring ones */
	prima_TwoSortscorepos(&pairlist);
	prima_prune_nearby(pairlist, &npair, maxprimerlen-1);
	ajListSort(pairlist,prima_PosCompare);
	prima_check_overlap(pairlist,&npair,overlap);
    }



    if(npair)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"%d pairs found\n\n",npair);
	else
	    ajFmtPrintF(outf,
			"Closest primer pair to specified product is:\n\n");

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	    ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n");
    }



    for(i=0;i<npair;++i)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"[%d]\n",i+1);

	ajListPop(pairlist,(void **)&pair);


	prodlen = pair->r->start - (pair->f->start + pair->f->primerlen);

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	{
	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;

	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1),
			v2+begin);


	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajFmtPrintF(outf,
			"%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin);


	    ajFmtPrintF(outf,"       Tm  %.2f C  (GC %.2f%%)\t\t       ",
			pair->f->primerTm,pair->f->primGCcont*100.);
	    ajFmtPrintF(outf,"Tm  %.2f C  (GC %.2f%%)\n",
			pair->r->primerTm,pair->r->primGCcont*100.);

	    ajFmtPrintF(outf,"             Length: %-32dLength: %d\n",
			pair->f->primerlen,pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:    %.2f C\t\t\t",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));
	    ajFmtPrintF(outf,"     Tma:    %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));


	    ajFmtPrintF(outf,"       Product GC: %.2f%%\n",
			pair->f->prodGC * 100.0);
	    ajFmtPrintF(outf,"       Product Tm: %.2f C\n",
			pair->f->prodTm);
	    ajFmtPrintF(outf,"       Length:     %d\n\n\n",prodlen);
	}
	else
	{
	    ajFmtPrintF(outf,"    Product from %d to %d\n",pair->f->start+
			pair->f->primerlen+begin,pair->r->start-1+begin);
	    ajFmtPrintF(outf,"                 Tm: %.2f C   GC: %.2f%%\n",
			pair->f->prodTm,pair->f->prodGC*(float)100.);
	    ajFmtPrintF(outf,"                 Length: %d\n\n\n",prodlen);


	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Forward: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->f->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->f->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->f->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));

	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Reverse: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->r->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->r->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));
	}

	prima_PrimerDel(&pair->f);
	prima_PrimerDel(&pair->r);
	AJFREE(pair);
    }



    ajStrDel(&seqstr);
    ajStrDel(&revstr);
    ajStrDel(&substr);
    ajStrDel(&p1);
    ajStrDel(&p2);

    ajListFree(&forlist);
    ajListFree(&revlist);
    ajListFree(&pairlist);

    ajFileClose(&outf);
    ajSeqDel(&sequence);

    AJFREE(entropy);
    AJFREE(enthalpy);
    AJFREE(energy);

    embExit();

    return 0;
}
示例#10
0
static void prima_test_multi(AjPList forlist, AjPList revlist, ajint *neric,
			     ajint *nfred, const AjPStr seq,
			     const  AjPStr rseq, ajint len)
{
    PPrimer tmp;
    AjPStr st;

    ajint i;
    ajint j;
    ajint v;
    ajint pc;
    ajint count;
    ajint limit;

    const char *s;
    const char *r;
    const char *p;

    st = ajStrNew();

    s = ajStrGetPtr(seq);
    r = ajStrGetPtr(rseq);

    pc = *neric;

    for(i=0;i<*neric;++i)
    {
	ajListPop(forlist,(void **)&tmp);
	count = 0;
	v = tmp->primerlen;
	limit = len-v+1;
	p = ajStrGetPtr(tmp->substr);
	for(j=0;j<limit && count<2;++j)
	{
	    if(prima_seq_align(s+j,p,v)>SIMLIMIT2)
		++count;

	    if(prima_seq_align(r+j,p,v)>SIMLIMIT2)
		++count;
	}


	if(count>1)
	{
	    prima_PrimerDel(&tmp);
	    --pc;
	}
	else
	    ajListPushAppend(forlist,(void *)tmp);
    }

    *neric = pc;
    if(!*neric)
    {
	while(ajListPop(revlist,(void **)&tmp))
	    prima_PrimerDel(&tmp);
	*nfred = 0;
	ajStrDel(&st);
	return;
    }


    pc = *nfred;
    for(i=0;i<*nfred;++i)
    {
	ajListPop(revlist,(void **)&tmp);
	count = 0;
	v = tmp->primerlen;
	limit = len-v+1;
	ajStrAssignC(&st,ajStrGetPtr(tmp->substr));
	ajSeqstrReverse(&st);
	p = ajStrGetPtr(st);
	for(j=0;j<limit && count<2;++j)
	{
	    if(prima_seq_align(s+j,p,v)>SIMLIMIT2)
		++count;

	    if(prima_seq_align(r+j,p,v)>SIMLIMIT2)
		++count;
	}


	if(count>1)
	{
	    prima_PrimerDel(&tmp);
	    --pc;
	}
	else
	    ajListPushAppend(revlist,(void *)tmp);
    }

    *nfred = pc;
    if(!*nfred)
    {
	while(ajListPop(forlist,(void **)&tmp))
	    prima_PrimerDel(&tmp);
	*neric = 0;
    }

    ajStrDel(&st);

    return;
}
示例#11
0
static void prima_testtarget(const AjPStr seqstr, const AjPStr revstr,
			     ajint targetstart,
			     ajint targetend, ajint minprimerlen,
			     ajint maxprimerlen, ajint seqlen,
			     float minprimerTm, float maxprimerTm,
			     float minpmGCcont, float maxpmGCcont,
			     float minprodGCcont, float maxprodGCcont,
			     float saltconc, float dnaconc,
			     AjPList pairlist, ajint *npair)
{


    AjPStr fstr;
    AjPStr rstr;

    AjPStr str1;
    AjPStr str2;
    PPrimer f;
    PPrimer r;

    PPair ppair;

    ajint i;
    ajint j;
    ajint forstart = 0;
    ajint forend;
    ajint revstart = 0;
    ajint revend;
    ajint Limit;
    ajint tnum;
    ajint thisplen;
    ajint cut;

    float primerTm = 0.0;
    float primGCcont = 0.0;
    float prodgc = 0.0;

    AjBool found = ajFalse;
    AjBool revfound = ajFalse;
    AjBool isDNA = ajTrue;

    ajint flen = 0;
    ajint rlen = 0;

    float ftm = 0.0;
    float rtm = 0.0;
    float fgc = 0.0;
    float rgc = 0.0;
    ajint fsc = 0;
    ajint rsc = 0;

    const char *s;
    const char *s2;
    const char *p;
    ajint  pv;
    ajint  plimit;
    ajint  pcount;
    ajint  k;

    (void) minprodGCcont;
    (void) maxprodGCcont;

    fstr = ajStrNew();
    rstr = ajStrNew();
    str1 = ajStrNew();
    str2 = ajStrNew();



    tnum=maxprimerlen-minprimerlen+1;

    /******FORWARDS  *******/

    for(i=targetstart-minprimerlen; i>-1; --i)
    {
	forstart = i;
	forend = i+minprimerlen-1;


	for(j=0; j<tnum; ++j,++forend)
	{
	    if(forend==targetstart)
		break;

	    ajStrAssignSubC(&fstr, ajStrGetPtr(seqstr), forstart, forend);

	    thisplen = ajStrGetLen(fstr);
	    primerTm = ajMeltTempSave("",forstart,thisplen,
                                      saltconc, dnaconc, isDNA,
                                      &entropy, &enthalpy, &energy);

	    if(primerTm <minprimerTm || primerTm>maxprimerTm)
		continue;

	    primGCcont= ajMeltGC(fstr, thisplen);
	    if(primGCcont< minpmGCcont || primGCcont >maxpmGCcont)
		continue;


	    /*instead of calling the self-reject function */
	    cut = (thisplen/2)-1;

	    ajStrAssignSubS(&str1, fstr, 0, cut);
	    ajStrAssignSubS(&str2, fstr, cut+1, thisplen-1);

	    if((fsc=prima_primalign(str1, str2)) > SIMLIMIT)
		continue;

	    /* Test for match with rest of sequence */
	    s  = ajStrGetPtr(seqstr);
	    s2 = ajStrGetPtr(revstr);
	    p  = ajStrGetPtr(fstr);
	    pv = thisplen;
	    pcount = 0;
	    plimit = seqlen-pv+1;
	    for(k=0;k<plimit && pcount<2;++k)
	    {
		if(prima_seq_align(s+k,p,pv)>SIMLIMIT2)
		    ++pcount;
		if(prima_seq_align(s2+k,p,pv)>SIMLIMIT2)
		    ++pcount;
	    }

	    if(pcount<2)
	    {
		found = ajTrue;
		flen  = thisplen;
		ftm   = primerTm;
		fgc   = primGCcont;
		break;
	    }
	}

	if(found)
	    break;
    }



    /******* REVERSES IN TARGETRANGE *****/


    Limit = seqlen-minprimerlen;

    if(found)
	for(i=targetend+1; i<Limit; ++i)
	{
	    revstart = i;
	    revend = i+minprimerlen-1;

	    for(j=0; j<tnum; ++j,++revend)
	    {
		if(revend==seqlen)
		    break;

		ajStrAssignSubC(&rstr, ajStrGetPtr(seqstr), revstart, revend);
		ajSeqstrReverse(&rstr);

		thisplen = ajStrGetLen(rstr);
		primerTm = ajMeltTempSave("", revstart, thisplen,
                                          saltconc, dnaconc, 1,
                                          &entropy, &enthalpy, &energy);

		if(primerTm <minprimerTm || primerTm>maxprimerTm)
		    continue;

		primGCcont= ajMeltGC(rstr, thisplen);
		if(primGCcont< minpmGCcont || primGCcont >maxpmGCcont)
		    continue;

		/*instead of calling the self-reject function */
		cut = (thisplen/2)-1;

		ajStrAssignSubS(&str1, rstr, 0, cut);
		ajStrAssignSubS(&str2, rstr, cut+1, thisplen-1);

		if((rsc=prima_primalign(str1, str2)) < SIMLIMIT)
		    continue;

		/* Test for match with rest of sequence */
		s  = ajStrGetPtr(seqstr);
		s2 = ajStrGetPtr(revstr);
		p  = ajStrGetPtr(rstr);
		pv = thisplen;
		pcount = 0;
		plimit = seqlen-pv+1;
		for(k=0;k<plimit && pcount<2;++k)
		{
		    if(prima_seq_align(s+k,p,pv)>SIMLIMIT2)
			++pcount;

		    if(prima_seq_align(s2+k,p,pv)>SIMLIMIT2)
			++pcount;
		}

		if(pcount<2)
		{
		    revfound = ajTrue;
		    rlen     = thisplen;
		    rtm      = primerTm;
		    rgc      = primGCcont;
		    break;
		}
	    }

	    if(revfound)
		break;
	}


    if(found && !revfound)
    {
	found = ajFalse;
	ajWarn("No reverse primers found in targetrange");
	*npair = 0;
	return;
    }



    if(!found)
    {
	ajWarn("No forward primers found in targetrange");
	*npair = 0;
	return;
    }

    ajStrAssignSubC(&str1,ajStrGetPtr(seqstr),forstart+flen,revstart-1);
    prodgc = ajMeltGC(str1,revstart-(forstart+flen));



    AJNEW0(f);
    f->substr     = ajStrNewC(ajStrGetPtr(fstr));
    f->start      = forstart;
    f->primerlen  = flen;
    f->primerTm   = ftm;
    f->primGCcont = fgc;
    f->score      = fsc;
    f->prodGC     = prodgc;
    f->prodTm     = ajMeltTempProd(prodgc,saltconc,revstart-(forstart+flen));


    AJNEW0(r);
    r->substr     = ajStrNewC(ajStrGetPtr(rstr));
    r->start      = revstart;
    r->primerlen  = rlen;
    r->primerTm   = rtm;
    r->primGCcont = rgc;
    r->score      = rsc;


    AJNEW0(ppair);
    ppair->f = f;
    ppair->r = r;
    ajListPush(pairlist,(void *)ppair);
    *npair = 1;

    return;
}
示例#12
0
文件: silent.c 项目: WenchaoLin/JAMg
static ajint silent_restr_read(AjPList *relist,const AjPStr enzymes)
{
    EmbPPatRestrict rptr = NULL;
    AjPFile fin = NULL;

    AjPStr refilename = NULL;
    register ajint RStotal = 0;
    PRinfo rinfo = NULL;
    AjBool isall = ajFalse;
    ajint ne = 0;
    ajint i;
    AjPStr *ea = NULL;

    refilename = ajStrNewC("REBASE/embossre.enz");
    rptr       = embPatRestrictNew();
    *relist    = ajListNew();

    fin = ajDatafileNewInNameS(refilename);
    if(!fin)
	ajFatal("Aborting...restriction file '%S' not found", refilename);

    /* Parse the user-selected enzyme list */
    if(!enzymes)
	isall = ajTrue;
    else
    {
	ne = ajArrCommaList(enzymes,&ea);
        for(i=0;i<ne;++i)
	    ajStrRemoveWhite(&ea[i]);

        if(ajStrMatchCaseC(ea[0],"all"))
            isall = ajTrue;
        else
            isall = ajFalse;
    }

    while(!ajFileIsEof(fin))
    {
        if(!embPatRestrictReadEntry(rptr,fin))
	    continue;

     	if(!isall)
	{
		for(i=0;i<ne;++i)
		if(ajStrMatchCaseS(ea[i],rptr->cod))
			break;
	    	if(i==ne)
			continue;
        }

        AJNEW(rinfo);
        /* reading in RE info into rinfo from EmbPPatRestrict structure */
        rinfo->code  = ajStrNewS(rptr->cod);
	rinfo->site  = ajStrNewS(rptr->pat);
	rinfo->revsite  = ajStrNewS(rptr->pat);
        ajSeqstrReverse(&rinfo->revsite);
        rinfo->ncuts = rptr->ncuts;
        rinfo->cut1  = rptr->cut1;
        rinfo->cut2  = rptr->cut2;
        rinfo->cut3  = rptr->cut3;
        rinfo->cut4  = rptr->cut4;
	ajListPush(*relist,(void *)rinfo);
	RStotal++;
    }

    for(i=0;i<ne;++i)
	ajStrDel(&ea[i]);
    AJFREE(ea);

    embPatRestrictDel(&rptr);
    ajFileClose(&fin);
    ajStrDel(&refilename);

    return RStotal;
}