Esempio n. 1
0
static void messTableDelete(AjPTable* table) 
{
    void **keyarray = NULL;
    void **valarray = NULL;
    ajint i;

    if(!table)
	return;
    if(!*table)
	return;

    ajTableToarrayKeysValues(*table, &keyarray, &valarray);

    for(i = 0; keyarray[i]; i++)
    {
	AJFREE(keyarray[i]);
	AJFREE(valarray[i]);
    }

    AJFREE(keyarray);
    AJFREE(valarray);

    ajTableFree(table);
    *table = NULL;

    return;
}
Esempio n. 2
0
static void remap_DelTable(AjPTable * table)
{

    void **keyarray = NULL;		/* array for table */
    void **valarray = NULL;		/* array for table */
    ajint i;
    PValue value;

    if(ajTableGetLength(*table))
    {
      ajTableToarrayKeysValues(*table, &keyarray, &valarray);
      for(i = 0; keyarray[i]; i++)
      {
          value = (PValue) valarray[i];
          ajStrDel(&(value->iso));
          AJFREE(valarray[i]);	/* free the ajint* value */
	  ajStrDel((AjPStr*)&keyarray[i]);
      }
      AJFREE(keyarray);
      AJFREE(valarray);
    }
    ajTableFree(table);

    return;
}
Esempio n. 3
0
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable,
			    AjBool html, const AjPStr enzymes, AjBool blunt,
			    AjBool sticky, ajuint sitelen, AjBool commercial,
			    AjBool ambiguity, AjBool limit,
			    const AjPTable retable)
{

    /* for iterating over hittable */
    PValue value;
    void **keyarray = NULL;			/* array for table */
    void **valarray = NULL;			/* array for table */
    ajint i;

    /* list of enzymes that cut */
    AjPList cutlist;
    AjIList citer;			/* iterator for cutlist */
    AjPStr cutname = NULL;
    AjBool found;

    /* for parsing value->iso string */
    AjPStrTok tok;
    char tokens[] = " ,";
    AjPStr code = NULL;
    const char *p;

    /* for reading in enzymes names */
    AjPFile enzfile = NULL;
    AjPStr *ea;
    ajint ne;				/* number of enzymes */
    AjBool isall = ajTrue;

    /* list of enzymes that don't cut */
    AjPList nocutlist;
    AjIList niter;			/* iterator for nocutlist */
    AjPStr nocutname = NULL;

    /* count of rejected enzymes not matching criteria */
    ajint rejected_count = 0;

    EmbPPatRestrict enz;

    /* for renaming preferred isoschizomers */
    AjPList newlist;

     /*
     **
     ** Make a list of enzymes('cutlist') that hit
     ** including the isoschizomer names
     **
     */
    ajDebug("Make a list of all enzymes that cut\n");


    cutlist   = ajListstrNew();
    nocutlist = ajListstrNew();


    ajTableToarrayKeysValues(hittable, &keyarray, &valarray);
    for(i = 0; keyarray[i]; i++)
    {
        value = (PValue) valarray[i];
        cutname = ajStrNew();
        ajStrAssignRef(&cutname, keyarray[i]);
        ajListstrPushAppend(cutlist, cutname);

        /* Add to cutlist all isoschizomers of enzymes that cut */
        ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n");

        /* start token to parse isoschizomers names */
        tok = ajStrTokenNewC(value->iso,  tokens);
        while(ajStrTokenNextParseC(&tok, tokens, &code))
        {
            cutname = ajStrNew();
            ajStrAssignS(&cutname, code);
            ajListstrPushAppend(cutlist, cutname);
        }
        ajStrTokenDel(&tok);
    }
    ajStrDel(&code);
    AJFREE(keyarray);
    AJFREE(valarray);



     /*
     ** Read in list of enzymes ('nocutlist') - either all or
     ** the input enzyme list.
     ** Exclude those that don't match the selection criteria - count these.
     */

    ajDebug("Read in a list of all input enzyme names\n");

    ne = 0;
    if(!enzymes)
	isall = ajTrue;
    else
    {
	/* get input list of enzymes into ea[] */
	ne = ajArrCommaList(enzymes, &ea);
	if(ajStrMatchCaseC(ea[0], "all"))
	    isall = ajTrue;
	else
	{
	    isall = ajFalse;
	    for(i=0; i<ne; ++i)
		ajStrRemoveWhite(&ea[i]);
	}
    }

    enzfile = ajDatafileNewInNameC(ENZDATA);

    /* push all enzyme names without the required criteria onto nocutlist */

    enz = embPatRestrictNew();
    while(!ajFileIsEof(enzfile))
    {
        if(!embPatRestrictReadEntry(enz, enzfile))
	    continue;

         /* 
	 ** If user entered explicit enzyme list, then check to see if
	 ** this is one of that explicit list 
	 */
        if(!isall)
	{
            found = AJFALSE;
            for(i=0; i<ne; ++i)
                if(ajStrMatchCaseS(ea[i], enz->cod))
		{
		    found = AJTRUE;
                    break;
                }

	    if(!found)			/* not in the explicit list */
		continue;

	    ajDebug("RE %S is in the input explicit list of REs\n", enz->cod);
	}

	/* ignore ncuts==0 as they are unknown */
	if(!enz->ncuts)
	{
	    /* number of cut positions */
            ajDebug("RE %S has an unknown number of cut positions\n",
		    enz->cod);
	    continue;
	}
        ajDebug("RE %S has a known number of cut sites\n", enz->cod);

	if(enz->len < sitelen)
	{
	    /* recognition site length */
            ajDebug("RE %S does not have a long enough recognition site\n", 
		    enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!blunt && enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is blunt\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!sticky && !enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is sticky\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	/* commercially available enzymes have uppercase patterns */
	p = ajStrGetPtr(enz->pat);

         /* 
	 ** The -commercial qualifier is only used if we are searching
	 ** through 'all' of the REBASE database - if we have specified an
	 ** explicit list of enzymes then they are searched for whether or
	 ** not they are commercially available
	 */
	if((*p >= 'a' && *p <= 'z') && commercial && isall)
	{
            ajDebug("RE %S is not commercial\n", enz->cod);
	    rejected_count++;
	    continue;
        }

	if(!ambiguity && remap_Ambiguous(enz->pat)) {
	    ajDebug("RE %S is ambiguous\n", enz->cod);
	    rejected_count++;
	    continue;	
	}

        ajDebug("RE %S matches all required criteria\n", enz->cod);

        code = ajStrNew();
	ajStrAssignS(&code, enz->cod);
	ajListstrPushAppend(nocutlist, code);
    }
    embPatRestrictDel(&enz);
    ajFileClose(&enzfile);


    for(i=0; i<ne; ++i)
	if(ea[i])
	    ajStrDel(&ea[i]);

    if(ne)
	AJFREE(ea);

    /*
     ** Change names of enzymes in the non-cutter list
     ** to that of preferred (prototype) 
     ** enzyme name so that the isoschizomers of cutters
     ** will be removed from the 
     ** non-cutter list in the next bit.
     ** Remove duplicate prototype names.
     */
    if(limit)
    {
        newlist = ajListstrNew();
        remap_RenamePreferred(nocutlist, retable, newlist);
        ajListstrFreeData(&nocutlist);
        nocutlist = newlist;
        ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel);
    }


     /*
     ** Iterate through the list of input enzymes removing those that are in
     ** the cutlist.
     */

    ajDebug("Remove from the nocutlist all enzymes and isoschizomers "
	    "that cut\n");

     /*
     **  This steps down both lists at the same time, comparing names and
     **  iterating to the next name in whichever list whose name compares
     **  alphabetically before the other.  Where a match is found, the
     **  nocutlist item is deleted.
     */

    ajListSort(nocutlist, remap_cmpcase);
    ajListSort(cutlist, remap_cmpcase);

    citer = ajListIterNewread(cutlist);
    niter = ajListIterNew(nocutlist);

    /*
       while((cutname = (AjPStr)ajListIterGet(citer)) != NULL)
       ajDebug("dbg cutname = %S\n", cutname);
       */

    nocutname = (AjPStr)ajListIterGet(niter);
    cutname   = (AjPStr)ajListIterGet(citer);

    ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname);

    while(nocutname != NULL && cutname != NULL)
    {
	i = ajStrCmpCaseS(cutname, nocutname);
	ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname);
	ajDebug("ajStrCmpCase=%d\n", i);
	if(i == 0)
	{
	    /* match - so remove from nocutlist */
	    ajDebug("ajListstrRemove %S\n", nocutname);
	    ajListstrIterRemove(niter);
	    nocutname = (AjPStr)ajListIterGet(niter);
	     /* 
	     ** Don't increment the cutname list pointer here
	     ** - there may be more than one entry in the nocutname
	     ** list with the same name because we have converted 
	     ** isoschizomers to their preferred name
	     */
	    /* cutname = (AjPStr)ajListIterGet(citer); */
	}
	else if(i == -1)
	    /* cutlist name sorts before nocutlist name */
	    cutname = (AjPStr)ajListIterGet(citer);
	else if(i == 1)
	    /* nocutlist name sorts before cutlist name */
	    nocutname = (AjPStr)ajListIterGet(niter);
    }

    ajListIterDel(&citer);
    ajListIterDel(&niter);
    ajListstrFreeData(&cutlist);


     /* Print the resulting list of those that do not cut*/

    ajDebug("Print out the list\n");

    /* print the title */
    if(html)
	ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n");

    if(html)
	ajFmtPrintF(outfile, "</H2>\n");

    if(html)
	ajFmtPrintF(outfile, "<PRE>");

    /*  ajListSort(nocutlist, ajStrVcmp);*/
    niter = ajListIterNewread(nocutlist);
    i = 0;
    while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL)
    {
	ajFmtPrintF(outfile, "%-10S", nocutname);
	/* new line after every 7 names printed */
	if(i++ == 7)
	{
	    ajFmtPrintF(outfile, "\n");
	    i = 0;
	}
    }
    ajListIterDel(&niter);


    /* end the output */
    ajFmtPrintF(outfile, "\n");
    if(html) {ajFmtPrintF(outfile, "</PRE>\n");}



     /*
     ** Print the count of rejected enzymes
     ** N.B. This is the count of ALL rejected enzymes including all
     ** isoschizomers
     */

    if(html)
        ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile,
		"\n\n# No. of cutting enzymes which do not match the\n"
		"# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n");
    if(html)
	ajFmtPrintF(outfile, "</H2>\n");
    ajFmtPrintF(outfile, "%d\n", rejected_count);

    ajDebug("Tidy up\n");
    ajListstrFreeData(&nocutlist);
    ajListstrFreeData(&cutlist);

    return;
}
Esempio n. 4
0
int main(int argc, char **argv)
{
    const char *codons[]=
    {
	"TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */
	"TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */
	"GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */
	"AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */
	"ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */
	"CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */
	"TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */
	"ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC"	 /* 56-63 */
    };

    const char *aa=
	"***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY";

    AjPFile inf     = NULL;
    AjPFile outf    = NULL;
    char *entryname = NULL;
    AjPStr fname    = NULL;
    AjPStr key      = NULL;
    AjPStr tmpkey   = NULL;
    AjBool allrecords = AJFALSE;

    AjPTable table  = NULL;
    ajint i = 0;
    ajint j = 0;
    ajint k = 0;
    ajint x = 0;
    ajint savecount[3];

    AjPStr *keyarray = NULL;
    CutgPValues *valarray = NULL;
    AjPCod codon  = NULL;
    ajint sum = 0;
    char c;

    AjPList flist = NULL;
    AjPFile logf = NULL;
    AjPStr  entry = NULL;
    AjPStr  baseentry = NULL;
    AjPStr  wild  = NULL;
    AjPStr division = NULL;
    AjPStr release = NULL;
    AjPStr wildspecies = NULL;
    CutgPValues value = NULL;
    AjPStr docstr = NULL;
    AjPStr species = NULL;
    AjPStr filename = NULL;
    ajint nstops;

    embInit("cutgextract",argc,argv);

    tmpkey = ajStrNew();
    fname  = ajStrNew();


    table = ajTablestrNewLen(TABLE_ESTIMATE);


    flist = ajAcdGetDirlist("directory");
    wild  = ajAcdGetString("wildspec");
    release  = ajAcdGetString("release");
    logf = ajAcdGetOutfile("outfile");
    wildspecies = ajAcdGetString("species");
    filename = ajAcdGetString("filename");
    allrecords = ajAcdGetBoolean("allrecords");

    ajStrInsertC(&release, 0, "CUTG");
    ajStrRemoveWhite(&release);

    while(ajListPop(flist,(void **)&entry))
    {
	ajStrAssignS(&baseentry, entry);
	ajFilenameTrimPath(&baseentry);
	ajDebug("Testing file '%S'\n", entry);
	if(!ajStrMatchWildS(baseentry,wild))
	{
	    ajStrDel(&entry);
	    continue;
	}

	ajDebug("... matched wildcard '%S'\n", wild);
	inf = ajFileNewInNameS(entry);
	if(!inf)
	    ajFatal("cannot open file %S",entry);

	ajFmtPrintS(&division, "%F", inf);
	ajFilenameTrimAll(&division);

	while((entryname = cutgextract_next(inf, wildspecies,
					    &species, &docstr)))
	{
	    if(ajStrGetLen(filename))
		ajStrAssignS(&tmpkey,filename);
	    else
		ajStrAssignC(&tmpkey,entryname);

	    /* See if organism is already in the table */
	    value = ajTableFetch(table,tmpkey);
	    if(!value)			/* Initialise */
	    {
		key = ajStrNewS(tmpkey);
		AJNEW0(value);
		ajStrAssignS(&value->Species,species);
		ajStrAssignS(&value->Division, division);
		ajTablePut(table,(void *)key,(void *)value);
	    }
	    for(k=0;k<3;k++)
		savecount[k] = value->Count[k];
	    nstops = cutgextract_readcodons(inf,allrecords, value->Count);
	    if(nstops < 1)
	    {
		value->Skip++;
		continue;
	    }
	    value->CdsCount++;
	    if(nstops>1)
	    {
		value->CdsCount += (nstops - 1);
		value->Warn++;
		ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'",
		       nstops,
		       value->Count[0] - savecount[0],
		       value->Count[1] - savecount[1],
		       value->Count[2] - savecount[2],
		       cutgextractSavepid);
	    }
	}
	ajStrDel(&entry);
	ajFileClose(&inf);
    }

    ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray);

    i = 0;
    while(keyarray[i])
    {
	key   = keyarray[i];
	value = (CutgPValues) valarray[i++];
	codon = ajCodNew();
	sum   = 0;
	for(j=0;j<CODONS;++j)
	{
	    sum += value->Count[j];
	    x = ajCodIndexC(codons[j]);
	    codon->num[x] = value->Count[j];

	    c = aa[j];
	    if(c=='*')
		codon->aa[x] = 27;
	    else
		codon->aa[x] = c-'A';
	}
	ajCodCalcUsage(codon,sum);

	ajStrAppendC(&key, ".cut");
	if(allrecords)
	{
	    if(value->Warn)
		ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n",
			    key, value->CdsCount, value->Warn);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}
	else
	{
	    if(value->Skip)
		ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n",
			    key, value->CdsCount, value->Skip);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}

	ajFmtPrintS(&fname,"CODONS/%S",key);
	outf = ajDatafileNewOutNameS(fname);
	if(!outf)
	    ajFatal("Cannot open output file %S",fname);

	ajCodSetNameS(codon, key);
	ajCodSetSpeciesS(codon, value->Species);
	ajCodSetDivisionS(codon, value->Division);
	ajCodSetReleaseS(codon, release);
	ajCodSetNumcds(codon, value->CdsCount);
	ajCodSetNumcodons(codon, sum);

	ajCodWrite(codon, outf);
	ajFileClose(&outf);


	ajStrDel(&key);
	ajStrDel(&value->Division);
	ajStrDel(&value->Doc);
	ajStrDel(&value->Species);
	AJFREE(value);
	ajCodDel(&codon);
    }

    AJFREE(keyarray);
    AJFREE(valarray);

    ajTableFree(&table);
    ajListFree(&flist);
    ajStrDel(&wild);
    ajStrDel(&release);
    ajStrDel(&wildspecies);
    ajStrDel(&filename);
    ajFileClose(&logf);

    ajStrDel(&cutgextractSavepid);
    ajStrDel(&cutgextractLine);
    ajStrDel(&cutgextractOrg);

    ajStrDel(&fname);
    ajStrDel(&tmpkey);
    ajStrDel(&species);
    ajStrDel(&docstr);
    ajStrDel(&division);
    ajStrDel(&baseentry);

    embExit();

    return 0;
}