Ejemplo n.º 1
0
/* @funcstatic domainreso_StrBinSearchDomain ********************************
**
** Performs a binary search for a DOMAIN domain code over an array of AjPStr
** (which of course must first have been sorted, e.g. by domainreso_StrComp). 
** This is a case-insensitive search.
**
** @param [r] id  [AjPStr]      Search term
** @param [r] arr [AjPStr*]     Array of AjPStr objects
** @param [r] siz [ajint]       Size of array
**
** @return [ajint] Index of first AjPStr object found with an PDB code
** matching id, or -1 if id is not found.
** @@
******************************************************************************/
static ajint domainreso_StrBinSearchDomain(AjPStr id, 
					   AjPStr *arr,
					   ajint siz)	
{
    int l;
    int m;
    int h;
    int c;


    l = 0;
    h = siz-1;
    while(l<=h)
    {
        m = (l+h)>>1;

        if((c=ajStrCmpCaseS(id, arr[m])) < 0)
	    h = m-1;
        else if(c>0) 
	    l = m+1;
        else 
	    return m;
    }

    return -1;
}
Ejemplo n.º 2
0
static int cacheensembl_stringcompare(const void* P1, const void* P2)
{
    const AjPStr string1 = NULL;
    const AjPStr string2 = NULL;

    string1 = *(AjPStr const*) P1;
    string2 = *(AjPStr const*) P2;

#if AJFALSE
    if(ajDebugTest("cacheensembl_stringcompare"))
        ajDebug("cacheensembl_stringcompare\n"
                "  string1 %u\n"
                "  string2 %u\n",
                string1,
                string2);
#endif

    return ajStrCmpCaseS(string1, string2);
}
Ejemplo n.º 3
0
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable,
			    AjBool html, const AjPStr enzymes, AjBool blunt,
			    AjBool sticky, ajuint sitelen, AjBool commercial,
			    AjBool ambiguity, AjBool limit,
			    const AjPTable retable)
{

    /* for iterating over hittable */
    PValue value;
    void **keyarray = NULL;			/* array for table */
    void **valarray = NULL;			/* array for table */
    ajint i;

    /* list of enzymes that cut */
    AjPList cutlist;
    AjIList citer;			/* iterator for cutlist */
    AjPStr cutname = NULL;
    AjBool found;

    /* for parsing value->iso string */
    AjPStrTok tok;
    char tokens[] = " ,";
    AjPStr code = NULL;
    const char *p;

    /* for reading in enzymes names */
    AjPFile enzfile = NULL;
    AjPStr *ea;
    ajint ne;				/* number of enzymes */
    AjBool isall = ajTrue;

    /* list of enzymes that don't cut */
    AjPList nocutlist;
    AjIList niter;			/* iterator for nocutlist */
    AjPStr nocutname = NULL;

    /* count of rejected enzymes not matching criteria */
    ajint rejected_count = 0;

    EmbPPatRestrict enz;

    /* for renaming preferred isoschizomers */
    AjPList newlist;

     /*
     **
     ** Make a list of enzymes('cutlist') that hit
     ** including the isoschizomer names
     **
     */
    ajDebug("Make a list of all enzymes that cut\n");


    cutlist   = ajListstrNew();
    nocutlist = ajListstrNew();


    ajTableToarrayKeysValues(hittable, &keyarray, &valarray);
    for(i = 0; keyarray[i]; i++)
    {
        value = (PValue) valarray[i];
        cutname = ajStrNew();
        ajStrAssignRef(&cutname, keyarray[i]);
        ajListstrPushAppend(cutlist, cutname);

        /* Add to cutlist all isoschizomers of enzymes that cut */
        ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n");

        /* start token to parse isoschizomers names */
        tok = ajStrTokenNewC(value->iso,  tokens);
        while(ajStrTokenNextParseC(&tok, tokens, &code))
        {
            cutname = ajStrNew();
            ajStrAssignS(&cutname, code);
            ajListstrPushAppend(cutlist, cutname);
        }
        ajStrTokenDel(&tok);
    }
    ajStrDel(&code);
    AJFREE(keyarray);
    AJFREE(valarray);



     /*
     ** Read in list of enzymes ('nocutlist') - either all or
     ** the input enzyme list.
     ** Exclude those that don't match the selection criteria - count these.
     */

    ajDebug("Read in a list of all input enzyme names\n");

    ne = 0;
    if(!enzymes)
	isall = ajTrue;
    else
    {
	/* get input list of enzymes into ea[] */
	ne = ajArrCommaList(enzymes, &ea);
	if(ajStrMatchCaseC(ea[0], "all"))
	    isall = ajTrue;
	else
	{
	    isall = ajFalse;
	    for(i=0; i<ne; ++i)
		ajStrRemoveWhite(&ea[i]);
	}
    }

    enzfile = ajDatafileNewInNameC(ENZDATA);

    /* push all enzyme names without the required criteria onto nocutlist */

    enz = embPatRestrictNew();
    while(!ajFileIsEof(enzfile))
    {
        if(!embPatRestrictReadEntry(enz, enzfile))
	    continue;

         /* 
	 ** If user entered explicit enzyme list, then check to see if
	 ** this is one of that explicit list 
	 */
        if(!isall)
	{
            found = AJFALSE;
            for(i=0; i<ne; ++i)
                if(ajStrMatchCaseS(ea[i], enz->cod))
		{
		    found = AJTRUE;
                    break;
                }

	    if(!found)			/* not in the explicit list */
		continue;

	    ajDebug("RE %S is in the input explicit list of REs\n", enz->cod);
	}

	/* ignore ncuts==0 as they are unknown */
	if(!enz->ncuts)
	{
	    /* number of cut positions */
            ajDebug("RE %S has an unknown number of cut positions\n",
		    enz->cod);
	    continue;
	}
        ajDebug("RE %S has a known number of cut sites\n", enz->cod);

	if(enz->len < sitelen)
	{
	    /* recognition site length */
            ajDebug("RE %S does not have a long enough recognition site\n", 
		    enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!blunt && enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is blunt\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!sticky && !enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is sticky\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	/* commercially available enzymes have uppercase patterns */
	p = ajStrGetPtr(enz->pat);

         /* 
	 ** The -commercial qualifier is only used if we are searching
	 ** through 'all' of the REBASE database - if we have specified an
	 ** explicit list of enzymes then they are searched for whether or
	 ** not they are commercially available
	 */
	if((*p >= 'a' && *p <= 'z') && commercial && isall)
	{
            ajDebug("RE %S is not commercial\n", enz->cod);
	    rejected_count++;
	    continue;
        }

	if(!ambiguity && remap_Ambiguous(enz->pat)) {
	    ajDebug("RE %S is ambiguous\n", enz->cod);
	    rejected_count++;
	    continue;	
	}

        ajDebug("RE %S matches all required criteria\n", enz->cod);

        code = ajStrNew();
	ajStrAssignS(&code, enz->cod);
	ajListstrPushAppend(nocutlist, code);
    }
    embPatRestrictDel(&enz);
    ajFileClose(&enzfile);


    for(i=0; i<ne; ++i)
	if(ea[i])
	    ajStrDel(&ea[i]);

    if(ne)
	AJFREE(ea);

    /*
     ** Change names of enzymes in the non-cutter list
     ** to that of preferred (prototype) 
     ** enzyme name so that the isoschizomers of cutters
     ** will be removed from the 
     ** non-cutter list in the next bit.
     ** Remove duplicate prototype names.
     */
    if(limit)
    {
        newlist = ajListstrNew();
        remap_RenamePreferred(nocutlist, retable, newlist);
        ajListstrFreeData(&nocutlist);
        nocutlist = newlist;
        ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel);
    }


     /*
     ** Iterate through the list of input enzymes removing those that are in
     ** the cutlist.
     */

    ajDebug("Remove from the nocutlist all enzymes and isoschizomers "
	    "that cut\n");

     /*
     **  This steps down both lists at the same time, comparing names and
     **  iterating to the next name in whichever list whose name compares
     **  alphabetically before the other.  Where a match is found, the
     **  nocutlist item is deleted.
     */

    ajListSort(nocutlist, remap_cmpcase);
    ajListSort(cutlist, remap_cmpcase);

    citer = ajListIterNewread(cutlist);
    niter = ajListIterNew(nocutlist);

    /*
       while((cutname = (AjPStr)ajListIterGet(citer)) != NULL)
       ajDebug("dbg cutname = %S\n", cutname);
       */

    nocutname = (AjPStr)ajListIterGet(niter);
    cutname   = (AjPStr)ajListIterGet(citer);

    ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname);

    while(nocutname != NULL && cutname != NULL)
    {
	i = ajStrCmpCaseS(cutname, nocutname);
	ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname);
	ajDebug("ajStrCmpCase=%d\n", i);
	if(i == 0)
	{
	    /* match - so remove from nocutlist */
	    ajDebug("ajListstrRemove %S\n", nocutname);
	    ajListstrIterRemove(niter);
	    nocutname = (AjPStr)ajListIterGet(niter);
	     /* 
	     ** Don't increment the cutname list pointer here
	     ** - there may be more than one entry in the nocutname
	     ** list with the same name because we have converted 
	     ** isoschizomers to their preferred name
	     */
	    /* cutname = (AjPStr)ajListIterGet(citer); */
	}
	else if(i == -1)
	    /* cutlist name sorts before nocutlist name */
	    cutname = (AjPStr)ajListIterGet(citer);
	else if(i == 1)
	    /* nocutlist name sorts before cutlist name */
	    nocutname = (AjPStr)ajListIterGet(niter);
    }

    ajListIterDel(&citer);
    ajListIterDel(&niter);
    ajListstrFreeData(&cutlist);


     /* Print the resulting list of those that do not cut*/

    ajDebug("Print out the list\n");

    /* print the title */
    if(html)
	ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n");

    if(html)
	ajFmtPrintF(outfile, "</H2>\n");

    if(html)
	ajFmtPrintF(outfile, "<PRE>");

    /*  ajListSort(nocutlist, ajStrVcmp);*/
    niter = ajListIterNewread(nocutlist);
    i = 0;
    while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL)
    {
	ajFmtPrintF(outfile, "%-10S", nocutname);
	/* new line after every 7 names printed */
	if(i++ == 7)
	{
	    ajFmtPrintF(outfile, "\n");
	    i = 0;
	}
    }
    ajListIterDel(&niter);


    /* end the output */
    ajFmtPrintF(outfile, "\n");
    if(html) {ajFmtPrintF(outfile, "</PRE>\n");}



     /*
     ** Print the count of rejected enzymes
     ** N.B. This is the count of ALL rejected enzymes including all
     ** isoschizomers
     */

    if(html)
        ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile,
		"\n\n# No. of cutting enzymes which do not match the\n"
		"# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n");
    if(html)
	ajFmtPrintF(outfile, "</H2>\n");
    ajFmtPrintF(outfile, "%d\n", rejected_count);

    ajDebug("Tidy up\n");
    ajListstrFreeData(&nocutlist);
    ajListstrFreeData(&cutlist);

    return;
}
Ejemplo n.º 4
0
static void remap_RestrictPreferred(const AjPList l, const AjPTable t)
{
    AjIList iter   = NULL;
    EmbPMatMatch m = NULL;
    const AjPStr value   = NULL;
    AjPStr newiso  = NULL;
    AjBool found;		/* name found in isoschizomer list */
        	    
    /* for parsing value->iso string */
    AjPStrTok tok = NULL;
    char tokens[] = " ,";
    AjPStr code = NULL;

    iter = ajListIterNewread(l);
    
    while((m = (EmbPMatMatch)ajListIterGet(iter)))
    {
	found = ajFalse;
	
    	/* get prototype name */
    	value = ajTableFetchS(t, m->cod);
    	if(value) 
    	{
    	    ajStrAssignC(&newiso, "");

	    /* parse isoschizomer names from m->iso */
	    ajStrTokenDel(&tok);
            tok = ajStrTokenNewC(m->iso,  tokens);
            while(ajStrTokenNextParseC(&tok, tokens, &code))
            {
	        if(ajStrGetLen(newiso) > 0)
	            ajStrAppendC(&newiso, ",");

		/* found the prototype name? */
	        if(!ajStrCmpCaseS(code, value)) 
	        {
	            ajStrAppendS(&newiso, m->cod);
	            found = ajTrue;
	        }
		else
	            ajStrAppendS(&newiso, code);
            }
            ajStrTokenDel(&tok);

	    /* if the name was not replaced, then add it in now */
	    if(!found)
	    {
	        if(ajStrGetLen(newiso) > 0)
	            ajStrAppendC(&newiso, ",");

	        ajStrAppendS(&newiso, m->cod);	
	    }
	    
	    ajDebug("RE: %S -> %S iso=%S newiso=%S\n", m->cod, value,
		    m->iso, newiso);

	    /* replace the old iso string with the new one */
	    ajStrAssignS(&m->iso, newiso);

	    /* rename the enzyme to the prototype name */
    	    ajStrAssignS(&m->cod, value);
    	}
    }
    
    ajListIterDel(&iter);     
    ajStrDel(&newiso);
    ajStrDel(&code);
    ajStrTokenDel(&tok);

    return; 
}
Ejemplo n.º 5
0
int main(int argc, char **argv)
{
    AjPSeqset seq1;
    AjPSeqset seq2;
    AjPFile list;
    ajint n1;
    ajint n2;
    ajint *lengths1;
    ajint *lengths2;
    ajuint *order1;
    ajuint *order2;
    ajint *hits1;
    ajint *hits2;
    ajint curr1;
    ajint curr2;
    ajint tmp1;
    ajint tmp2 = 0;
    ajint i;
    AjPStr operator;
    ajint OperatorCode=0;


    embInit("listor", argc, argv);

    seq1     = ajAcdGetSeqset("firstsequences");
    seq2     = ajAcdGetSeqset("secondsequences");
    list     = ajAcdGetOutfile("outfile");
    operator = ajAcdGetListSingle("operator");

    /* get the operator value */
    switch(ajStrGetCharFirst(operator))
    {
    case 'O':
	OperatorCode = L_OR;
	break;
    case 'A':
	OperatorCode = L_AND;
	break;
    case 'X':
	OperatorCode = L_XOR;
	break;
    case 'N':
	OperatorCode = L_NOT;
	break;
    default:
	ajFatal("Invalid operator type: %S", operator);
	embExitBad();
    }


    /* get the order of seqset 1 by length */
    n1 = ajSeqsetGetSize(seq1);

    /* lengths of seq1 entries */
    lengths1 = AJCALLOC0(n1, sizeof(ajint));

    /* seq1 entries which match seq2 */
    hits1    = AJCALLOC0(n1, sizeof(ajint));

    /* seq1 entries in length order */
    order1   = AJCALLOC0(n1, sizeof(ajint));
    for(i=0; i<n1; i++)
    {
	lengths1[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq1, i));
	order1[i]   = i;
	hits1[i]    = -1;
    }
    ajSortIntIncI(lengths1, order1, n1);

    /* get the order of seqset 2 by length */
    n2 = ajSeqsetGetSize(seq2);
    lengths2 = AJCALLOC0(n2, sizeof(ajint));
    hits2    = AJCALLOC0(n2, sizeof(ajint));
    order2   = AJCALLOC0(n2, sizeof(ajint));

    for(i=0; i<n2; i++)
    {
	lengths2[i] = ajSeqGetLen(ajSeqsetGetseqSeq(seq2, i));
	order2[i]   = i;
	hits2[i]    = -1;
    }
    ajSortIntIncI(lengths2, order2, n2);

    /*
    ** go down the two sequence sets, by size order, looking for identical
    **lengths
    */
    curr1 = 0;
    curr2 = 0;
    while(curr1 < n1 &&  curr2 < n2)
    {
	if(lengths1[order1[curr1]] < lengths2[order2[curr2]])
	    /* seq1 is shorter - increment curr1 index */
	    curr1++;
	else if(lengths1[order1[curr1]] > lengths2[order2[curr2]])
	    /* seq2 is shorter - increment curr2 index */
	    curr2++;
	else
	{
	    /* identical lengths - check all seq1/seq2 entries of this len */
	    for(tmp1=curr1; tmp1<n1
		 && lengths1[order1[tmp1]] == lengths2[order2[curr2]]; tmp1++)
		for(tmp2=curr2; tmp2<n2 && lengths2[order2[tmp2]] ==
		    lengths2[order2[curr2]]; tmp2++)
		    /* check to see if the sequences are identical */
		    if(!ajStrCmpCaseS(ajSeqGetSeqS(ajSeqsetGetseqSeq(seq1,
							     order1[tmp1])),
				      ajSeqGetSeqS(ajSeqsetGetseqSeq(seq2,
				      order2[tmp2]))))
		    {
			hits1[order1[tmp1]] = order2[tmp2];
			hits2[order2[tmp2]] = order1[tmp1];
		    }

	    curr1 = tmp1;
	    curr2 = tmp2;
	}
    }

    /* output the required entries to the list file */
    listor_Output(list, OperatorCode, seq1, seq2, hits1, hits2, n1, n2);


    AJFREE(lengths1);
    AJFREE(lengths2);
    AJFREE(order1);
    AJFREE(order2);
    AJFREE(hits1);
    AJFREE(hits2);
    ajFileClose(&list);
    ajStrDel(&operator);

    ajSeqsetDel(&seq1);
    ajSeqsetDel(&seq2);

    embExit();

    return 0;
}