Esempio n. 1
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq     = NULL;
    AjPStr seqcmp  = NULL;
    AjPStr enzymes = NULL;
    AjPFile outf   = NULL;
    ajint begin;
    ajint end;
    ajint min;
    ajint max;
    ajint sitelen;
    AjBool alpha;
    AjBool single;
    AjBool blunt;
    AjBool ambiguity;
    AjBool sticky;
    AjBool plasmid;
    AjBool threeprime;
    AjBool commercial;
    AjBool html;
    AjBool limit;
    AjBool frags;
    AjBool methyl;
    AjPFile dfile;

    AjPFile enzfile  = NULL;
    AjPFile equfile  = NULL;
    AjPFile methfile = NULL;
    
    AjPStr name = NULL;

    AjPTable table = NULL;

    ajint hits;

    AjPList l = NULL;

    embInit("restover", argc, argv);

    seqall    = ajAcdGetSeqall("sequence");
    seqcmp    = ajAcdGetString("seqcomp");
    ajStrFmtUpper(&seqcmp);
    outf      = ajAcdGetOutfile("outfile");

    /*
    ** Some of these are not needed but I left them in case someone wants to
    ** use them some time ...
    */
    enzymes   = ajStrNewC("all");

    min        = ajAcdGetInt("min");
    max        = ajAcdGetInt("max");
    sitelen    = 2;
    threeprime = ajAcdGetBoolean("threeprime");
    blunt      = ajAcdGetBoolean("blunt");
    sticky     = ajAcdGetBoolean("sticky");
    single     = ajAcdGetBoolean("single");
    html       = ajAcdGetBoolean("html");
    alpha      = ajAcdGetBoolean("alphabetic");
    ambiguity  = ajAcdGetBoolean("ambiguity");
    plasmid    = ajAcdGetBoolean("plasmid");
    commercial = ajAcdGetBoolean("commercial");
    limit      = ajAcdGetBoolean("limit");
    frags      = ajAcdGetBoolean("fragments");
    methyl     = ajAcdGetBoolean("methylation");
    dfile      = ajAcdGetDatafile("datafile");
    methfile   = ajAcdGetDatafile("mfile");

    if(single)
	max = min = 1;

    table = ajTablestrNew(EQUGUESS);
    l = ajListNew();

    if(threeprime)
	ajStrReverse(&seqcmp);

    /* read the local file of enzymes names */
    restover_read_file_of_enzyme_names(&enzymes);

    if(!dfile)
    {
	enzfile = ajDatafileNewInNameC(ENZDATA);
	if(!enzfile)
	    ajFatal("Cannot locate enzyme file. Run REBASEEXTRACT");
    }
    else
    {
	enzfile = dfile;
    }



    if(limit)
    {
	equfile = ajDatafileNewInNameC(EQUDATA);
	if(!equfile)
	    limit=ajFalse;
	else
	{
	    restover_read_equiv(equfile,table);
	    ajFileClose(&equfile);
	}
    }



    while(ajSeqallNext(seqall, &seq))
    {
	begin = ajSeqallGetseqBegin(seqall);
	end   = ajSeqallGetseqEnd(seqall);
	ajFileSeek(enzfile,0L,0);
	ajSeqFmtUpper(seq);

	hits = embPatRestrictMatch(seq,begin,end,enzfile,methfile,enzymes,
                                   sitelen,plasmid,ambiguity,min,max,blunt,
                                   sticky,commercial,methyl,l);
	ajDebug("hits:%d listlen:%u\n", hits, ajListGetLength(l));
	if(hits)
	{
	    name = ajStrNewC(ajSeqGetNameC(seq));
	    restover_printHits(seq, seqcmp, outf,l,name,hits,begin,end,
			       min,max,plasmid,
			       sitelen,limit,table,alpha,frags,
			       html);
	    ajStrDel(&name);
	}

	ajListFree(&l);
    }


    ajListFree(&l);
    ajSeqDel(&seq);
    ajFileClose(&outf);
    ajFileClose(&dfile);
    ajFileClose(&enzfile);
    ajFileClose(&equfile);
    ajFileClose(&methfile);

    ajSeqallDel(&seqall);
    ajStrDel(&seqcmp);
    ajStrDel(&enzymes);
    ajStrDel(&name);

    ajTablestrFree(&table);

    embExit();

    return 0;
}
Esempio n. 2
0
static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r,
					AjPAssemContig const * contigs,
					ajint ncontigs)
{
    AjPAssemTag    t = NULL;
    AjIList l = NULL;
    AjPStr qualstr = NULL;
    AjPStr tmp  = NULL;
    ajint  POS  = 0;
    AjPStr CIGAR = NULL;
    const char* RNEXT = NULL;
    AjPStr SEQ  = NULL;
    AjPStr QUAL = NULL;
    AjPStr SEQunpadded  = NULL;
    AjPStr QUALunpadded = NULL;
    AjPStr consensus = NULL;
    AjBool rc= ajFalse;
    AjBool ret = ajTrue;
    const char* refseq = NULL;
    const AjPAssemContig contig = NULL;

    ajuint k = 0;

    if(r->Reference>=ncontigs)
	ajDie("assemoutWriteSamAlignment: reference sequence number"
		" '%d' is larger than or equal to known number of reference"
		" sequences '%d'. Problem while processing read '%S'.",
		r->Reference,
		ncontigs,
		r->Name);

    contig = (r->Reference==-1 ? NULL : contigs[r->Reference]);

    ajStrAssignRef(&SEQ, r->Seq);
    consensus = contig==NULL? NULL : contig->Consensus;

    if (r->Rnext==-1)
	RNEXT= "*";
    else if(r->Rnext==r->Reference)
	RNEXT = "=";
    else
	RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name);

    if (r->Flag & BAM_FREVERSE)
    {
	rc = ajTrue;
	qualstr = ajStrNewS(r->SeqQ);

	if(!r->Reversed)
	{
	    ajStrReverse(&qualstr);
	    ajSeqstrReverse(&SEQ);
	}

	QUAL = qualstr;
	POS = r->y1;
	ajStrAssignSubS(&tmp, SEQ,
		ajStrGetLen(r->Seq) - r->y2,
		ajStrGetLen(r->Seq) - r->x2
	);

    }
    else
    {
	rc= ajFalse;
	POS = r->x1;
	QUAL = r->SeqQ;
	ajStrAssignSubS(&tmp, SEQ,
		r->x2-1,
		r->y2-1
	);
    }

    if(r->Cigar==NULL && consensus)
    {
	refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1);

	CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp));

	SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ));
	QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ));

	for(k=0; k< ajStrGetLen(SEQ); k++)
	{
	    if (ajStrGetCharPos(SEQ, k) == '*')
		continue;

	    ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k));
	    ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k));
	}

	ajDebug("cigar: %S\n", CIGAR);

	ajStrAssignS(&tmp, CIGAR);

	if(rc)
	{
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintS(&CIGAR, "%dS%S",
		            ajStrGetLen(SEQ) - r->y2, tmp);
	    if(r->x2 > 1)
		ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1);
	}
	else
	{
	    if(r->x2 > 1)
		ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp);
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintAppS(&CIGAR, "%dS",
		               ajStrGetLen(SEQ) - r->y2);
	}
	ajStrDel(&tmp);
    }
    else if(r->Cigar==NULL)
    {
	ajErr("both CIGAR string and consensus sequence not available");
	ret = ajFalse;
	ajStrAssignK(&CIGAR, '*');
    }
    else if(!ajStrGetLen(r->Cigar))
	ajStrAssignK(&CIGAR, '*');
    else if(ajStrGetLen(r->Cigar))
    {
	if(!ajStrGetLen(SEQ))
	    ajStrAssignK(&SEQ, '*');

	if(!ajStrGetLen(QUAL))
	    ajStrAssignK(&QUAL, '*');
    }

    ajStrDel(&tmp);

    ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S",
	    r->Name,
	    r->Flag,
	    (contig==NULL ? "*" : ajStrGetPtr(contig->Name)),
	    POS,
	    r->MapQ,
	    (CIGAR ? CIGAR : r->Cigar),
	    RNEXT,
	    r->Pnext,
	    r->Tlen,
	    (r->Cigar ? SEQ  : SEQunpadded),
	    (r->Cigar ? QUAL : QUALunpadded));

    l = ajListIterNewread(r->Tags);
    while (!ajListIterDone(l))
    {
	t = ajListIterGet(l);

	/* TODO: array type, 'B' */

	/* In SAM, all single integer types are mapped to int32_t [SAM spec] */
	ajFmtPrintF(outf, "\t%S:%c:",
		t->Name,
		(t->type == 'c' || t->type == 'C' ||
		 t->type == 's' || t->type == 'S'
				|| t->type == 'I') ? 'i' : t->type
	);

	if(t->x1 || t->y1)
	    ajFmtPrintF(outf, " %u %u", t->x1, t->y1);

	if(t->Comment && ajStrGetLen(t->Comment)>0)
	    ajFmtPrintF(outf, "%S", t->Comment);

    }
    ajListIterDel(&l);

    ajFmtPrintF(outf, "\n");

    if(qualstr)
	ajStrDel(&qualstr);

    ajStrDel(&SEQ);
    ajStrDel(&CIGAR);
    ajStrDel(&SEQunpadded);
    ajStrDel(&QUALunpadded);

    return ret;
}
Esempio n. 3
0
static void restover_printHits(const AjPSeq seq, const AjPStr seqcmp,
			       AjPFile outf,
			       AjPList l, const AjPStr name, ajint hits,
			       ajint begin, ajint end,
			       ajint mincut, ajint maxcut, AjBool plasmid,
			       ajint sitelen,
			       AjBool limit, const AjPTable table,
			       AjBool alpha, AjBool frags,
			       AjBool html)
{
    EmbPMatMatch m = NULL;
    AjPStr ps = NULL;
    ajint *fa = NULL;
    ajint *fx = NULL;
    ajint fc = 0;
    ajint fn = 0;
    ajint fb = 0;
    ajint last = 0;
    AjPStr overhead = NULL;

    const AjPStr value = NULL;

    ajint i;
    ajint c = 0;

    ajint hang1;
    ajint hang2;


    ps = ajStrNew();
    fn = 0;

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Restrict of %S from %d to %d\n",name,begin,end);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"#\n");

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Minimum cuts per enzyme: %d\n",mincut);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Maximum cuts per enzyme: %d\n",maxcut);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Minimum length of recognition site: %d\n",
		sitelen);
    if(html)
	ajFmtPrintF(outf,"<BR>");

    hits = embPatRestrictRestrict(l,hits,!limit,alpha);

    if(frags)
    {
	fa = AJALLOC(hits*2*sizeof(ajint));
	fx = AJALLOC(hits*2*sizeof(ajint));
    }


    ajFmtPrintF(outf,"# Number of hits with any overlap: %d\n",hits);

    if(html)
	ajFmtPrintF(outf,"<BR>");

    if(html)
	ajFmtPrintF(outf,"</p><table  border cellpadding=4 "
		    "bgcolor=\"#FFFFF0\">\n");
    if(html)
	ajFmtPrintF(outf,
		    "<th>Base Number</th><th>Enzyme</th><th>Site</th>"
		    "<th>5'</th><th>3'</th><th>[5'</th><th>3']</th>\n");
    else
	ajFmtPrintF(outf,"# Base Number\tEnzyme\t\tSite\t\t5'\t3'\t"
		    "[5'\t3']\n");

    for(i=0;i<hits;++i)
    {
	ajListPop(l,(void **)&m);
	ajDebug("hit %d start:%d cut1:%d cut2:%d\n",
		i, m->start, m->cut1, m->cut2);

	hang1 = (ajint)m->cut1 - (ajint)m->start;
	hang2 = (ajint)m->cut2 - (ajint)m->start;

	if(!plasmid && (hang1>100 || hang2>100))
	{
	    embMatMatchDel(&m);
	    continue;
	}

	if(limit)
	{
	    value=ajTableFetchS(table,m->cod);
	    if(value)
		ajStrAssignS(&m->cod,value);
	}

	if(m->cut2 >= m->cut1)
	    ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut1, m->cut2-1);
	else
	{
	    ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut2, m->cut1-1);
	    ajStrReverse(&overhead);
	}

	ajDebug("overhead:%S seqcmp:%S\n", overhead, seqcmp);

	/* Print out only those who have the same overhang. */
	if(ajStrMatchCaseS(overhead, seqcmp))
	{
	    if(html)
	    {
		ajFmtPrintF(outf,
			    "<tr><td>%-d</td><td>%-16s</td><td>%-16s"
			    "</td><td>%d</td><td>%d</td></tr>\n",
			    m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat),
			    m->cut1,m->cut2);
	    }
	    else
		ajFmtPrintF(outf,"\t%-d\t%-16s%-16s%d\t%d\t\n",
			    m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat),
			    m->cut1,m->cut2);
	}

	if(frags)
	    fa[fn++] = m->cut1;

	if(m->cut3 || m->cut4)
	{
	    if(m->cut4 >= m->cut3)
		ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq),
				m->cut3, m->cut4-1);
	    else
	    {
		ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq),
				m->cut4, m->cut3-1);
		ajStrReverse(&overhead);
	    }

	    if(ajStrMatchCaseS(overhead, seqcmp))
	    {
		if(html)
		    ajFmtPrintF(outf,
				"<tr><td>%-d</td><td>%-16s</td><td>%-16s"
				"</td><td></td><td></td><td>%d</td><td>%d"
				"</td></tr>\n",
				m->start,ajStrGetPtr(m->cod),
				ajStrGetPtr(m->pat),
				m->cut1,m->cut2);
		else
		    ajFmtPrintF(outf,"\t%-d\t%-16s%-16s\t\t%d\t%d\t\n",
				m->start,ajStrGetPtr(m->cod),
				ajStrGetPtr(m->pat),
				m->cut1,m->cut2);
	    }
	}

	/* I am not sure what fragments are doing so I left it in ...*/
	/* used in the report tail in restrict - restover does much the same */
	if(m->cut3 || m->cut4)
	{
	    if(frags)
		fa[fn++] = m->cut3;
	    /*	       ajFmtPrintF(*outf,"%d\t%d",m->cut3,m->cut4);*/
	}
	ajStrDel(&overhead);

	embMatMatchDel(&m);
    }



    if(frags)
    {
	ajSortIntInc(fa,fn);
	ajFmtPrintF(outf,"\n\nFragment lengths:\n");
	if(!fn || (fn==1 && plasmid))
	    ajFmtPrintF(outf,"    %d\n",end-begin+1);
	else
	{
	    last = -1;
	    fb = 0;
	    for(i=0;i<fn;++i)
	    {
		if((c=fa[i])!=last)
		    fa[fb++]=c;
		last = c;
	    }
	    fn = fb;
	    /* Calc lengths */

	    for(i=0;i<fn-1;++i)
		fx[fc++] = fa[i+1]-fa[i];
	    if(!plasmid)
	    {
		fx[fc++] = fa[0]-begin+1;
		fx[fc++] = end-fa[fn-1];
	    }
	    else
		fx[fc++] = (fa[0]-begin+1)+(end-fa[fn-1]);

	    ajSortIntDec(fx,fc);
	    for(i=0;i<fc;++i)
		ajFmtPrintF(outf,"    %d\n",fx[i]);
	}
	AJFREE(fa);
	AJFREE(fx);
    }


    ajStrDel(&ps);

    if(html)
	ajFmtPrintF(outf,"</table>\n");

    return;
}