示例#1
0
AjPMatrix ajMatrixNew(const AjPPStr codes, ajint n, const AjPStr filename)
{
    ajint     i   = 0;
    AjPMatrix ret = NULL;
    ajint nsize;

    if((!n) || (!codes) || (!filename))
	return NULL;

    nsize = n + 1;

    AJNEW0(ret);

    ajStrAssignS(&ret->Name, filename);

    AJCNEW0(ret->Codes, n);

    for(i=0; i<n; i++)
	ret->Codes[i] = ajStrNew();
    
    for(i=0; i<n; i++)
	ajStrAssignS(&ret->Codes[i], codes[i]);

    ret->Size = nsize;

    AJCNEW0(ret->Matrix, nsize);

    for(i=0; i<nsize; i++)
	AJCNEW0(ret->Matrix[i], nsize);

    ret->Cvt = ajSeqcvtNewStr(codes, n);

    return ret;
}
示例#2
0
AjPMatrixf ajMatrixfNewAsym(const AjPPStr codes, ajint n, 
			    const AjPPStr rcodes, ajint rn, 
			    const AjPStr filename)
{
    ajint i = 0;
    AjPMatrixf ret = 0;
    ajint nsize;
    ajint rnsize;
 
    if((!n) || (!codes) || (!filename))
	return NULL;

    nsize = n + 1;
    rnsize = rn + 1;

    AJNEW0(ret);

    ajStrAssignS(&ret->Name, filename);

    AJCNEW0(ret->Codes, n);

    for(i=0; i<n; i++)
	ret->Codes[i] = ajStrNew();

    for(i=0; i<n; i++)
	ajStrAssignS(&ret->Codes[i], codes[i]);

    ret->Size = nsize;


    AJCNEW0(ret->CodesRow, rn);

    for(i=0; i<rn; i++)
	ret->CodesRow[i] = ajStrNew();

    for(i=0; i<rn; i++)
	ajStrAssignS(&ret->CodesRow[i], rcodes[i]);

    ret->SizeRow = rnsize;


    AJCNEW0(ret->Matrixf, rnsize);

    for(i=0; i<rnsize; i++)
	AJCNEW0(ret->Matrixf[i], nsize);

    ret->Cvt = ajSeqcvtNewStrAsym(codes, n, rcodes, rn);

    return ret;
}
示例#3
0
文件: ajreg.c 项目: ICO2S/emboss
AjPRegexp ajRegCompC(const char* rexp)
{
    AjPRegexp ret;
    int options = 0;
    int errpos  = 0;
    const char *errptr            = NULL;
    const unsigned char *tableptr = NULL;

    AJNEW0(ret);
    AJCNEW0(ret->ovector, AJREG_OVECSIZE);
    ret->ovecsize = AJREG_OVECSIZE/3;
    ret->pcre = pcre_compile(rexp, options, &errptr, &errpos, tableptr);

    if(!ret->pcre)
    {
	ajErr("Failed to compile regular expression '%s' at position %d: %s",
	      rexp, errpos, errptr);
	AJFREE(ret);
	return NULL;
    }

    regAlloc += sizeof(ret);
    regCount ++;
    regTotal ++;
    /*ajDebug("ajRegCompC %x size %d regexp '%s'\n",
      ret, (int) sizeof(ret), rexp);*/

    return ret;
}
示例#4
0
static ajuint jaspscan_readmatrix(const AjPStr mfname, float ***matrix)
{

    AjPFile inf  = NULL;
    AjPStr line  = NULL;

    ajuint i = 0;
    ajuint cols = 0;

    AJCNEW0(*matrix,4);

    line = ajStrNew();

    inf = ajFileNewInNameS(mfname);
    if(!inf)
	ajFatal("Cannot open matrix file %S",mfname);

    i = 0;
    while(ajReadlineTrim(inf,&line))
    {
	if(!i)
	    cols = ajStrParseCountC(line," \n");

	(*matrix)[i++] = ajArrFloatLine(line," \n",1,cols);
    }
    


    ajStrDel(&line);
    ajFileClose(&inf);
    
    return cols;;
}
示例#5
0
文件: ajhist.c 项目: WenchaoLin/JAMg
AjPHist ajHistNew(ajuint numofsets, ajuint numofpoints)
{
    static AjPHist hist = NULL;
    ajuint i;

    AJNEW0(hist);

    hist->numofsets = 0;
    hist->numofsetsmax    = numofsets;
    hist->numofdatapoints = numofpoints;
    hist->xmin = 0;
    hist->xmax = 0;
    hist->displaytype = HIST_SIDEBYSIDE; /* default draw
					    multiple histograms
					    side by side */
    hist->bins = 0;
    hist->BaW  = AJFALSE;
    ajStrAssignEmptyC(&hist->title,"");
    ajStrAssignEmptyC(&hist->xaxis,"");
    ajStrAssignEmptyC(&hist->yaxisleft,"");
    ajStrAssignEmptyC(&hist->yaxisright,"");

    AJCNEW0(hist->hists,numofsets);

    for(i=0;i<numofsets; i++)
    {
	AJNEW0((hist->hists[i]));
	(hist->hists)[i]->data       = NULL;
	(hist->hists)[i]->deletedata = AJFALSE;
	(hist->hists)[i]->colour  = i+2;
	(hist->hists)[i]->pattern = 0;
	(hist->hists)[i]->title = NULL;
	(hist->hists)[i]->xaxis = NULL;
	(hist->hists)[i]->yaxis = NULL;
    }

    return hist;
}
示例#6
0
文件: embprop.c 项目: WenchaoLin/JAMg
EmbPPropAmino* embPropEaminoRead(AjPFile mfptr)
{
    AjPStr  line  = NULL;
    AjPStr  token = NULL;
    
    AjBool firstline;
    
    const char *p;
    ajuint i;
    ajint  n;
    
    EmbPPropAmino *ret;

    line  = ajStrNew();
    token = ajStrNew();
    
    firstline = ajTrue;

    AJCNEW0(ret,EMBPROPSIZE);

    for(i=0; i < EMBPROPSIZE; ++i)
	AJNEW0(ret[i]);
    
    while(ajReadline(mfptr, &line))
    {
	ajStrRemoveWhiteExcess(&line);
	p = ajStrGetPtr(line);

	if(*p=='#' || *p=='!' || !*p)
	    continue;

	if(firstline)
	{
	    if(!ajStrPrefixC(line,"aa"))
		ajFatal("Incorrect (old?) format amino data file");

	    firstline = ajFalse;
	    continue;
	}

	ajFmtScanS(line,"%S",&token);
	ajStrFmtUpper(&token);

	if(ajStrGetLen(token) != 1)
	    ajFatal("Amino file line doesn't begin with a single character");

	i = ajBasecodeToInt((ajint) *ajStrGetPtr(token));

	if(i == 27)
	    ajFatal("Amino file line doesn't begin with a single A->Z (%S)",
		    line);

	n = ajFmtScanS(line,"%*s%d%d%d%d%d%d%f%d%d%d",
		       &ret[i]->tiny,
		       &ret[i]->sm_all,
		       &ret[i]->aliphatic,
		       &ret[i]->aromatic,
		       &ret[i]->nonpolar,
		       &ret[i]->polar,
		       &ret[i]->charge,
		       &ret[i]->pve,
		       &ret[i]->nve,
		       &ret[i]->extcoeff);
	if(n!= 10)
	    ajFatal("Only %d columns in amino file - expected %d",n+1,11);
    }

    ajStrDel(&line);
    ajStrDel(&token);

    return ret;
}
示例#7
0
static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem)
{
    AjPFile outf = ajOutfileGetFile(outfile);
    AjPSeqBamHeader header = NULL;
    AjPAssemContig c = NULL;
    AjPSeqBam bam;
    AjPAssemRead   r = NULL;
    AjPAssemContig* contigs = NULL;
    AjPAssemTag    t = NULL;
    AjIList j = NULL;
    AjPSeqBamBgzf gzfile = NULL;
    AjPStr headertext=NULL;
    const AjPStr rgheadertext=NULL;
    AjBool ret = ajTrue;
    ajint i=0;
    ajulong ncontigs=0UL;

    if(!outf) return ajFalse;
    if(!assem) return ajFalse;

    if(!assem->Hasdata)
    {
        if(ajListGetLength(assem->ContigsOrder))
            ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs);
        else
            ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs);

        ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n",
                    ajAssemGetSortorderC(assem));
        header = ajSeqBamHeaderNewN((ajuint) ncontigs);

        gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w");
        outfile->OutData = gzfile;

        while (contigs[i])   /* contigs */
        {
            c = contigs[i];

            if(ajStrMatchC(c->Name, "*"))
            {
                i++;
                continue;
            }

            header->target_name[i] = strdup(ajStrGetPtr(c->Name));
            header->target_len[i++] = c->Length;

            ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d",
                           c->Name, c->Length);

            if(c->URI)
                ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI);

            if(c->MD5)
                ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5);

            if(c->Species)
                ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species);

            ajFmtPrintAppS(&headertext, "\n");


            j = ajListIterNewread(c->Tags);
            while (!ajListIterDone(j))
            {
                t = ajListIterGet(j);
                ajFmtPrintAppS(&headertext,
                               "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1,
                               t->Comment);
            }
            ajListIterDel(&j);
        }

        rgheadertext = assemSAMGetReadgroupHeaderlines(assem);
        if(rgheadertext)
            ajStrAppendS(&headertext, rgheadertext);

        ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext));
        ajSeqBamHeaderWrite(gzfile, header);

        ajSeqBamHeaderDel(&header);
        ajStrDel(&headertext);

        AJFREE(contigs);

        if(!assem->BamHeader)
            return ajTrue;
    }

    /* data */

    gzfile = outfile->OutData;

    AJNEW0(bam);
    bam->m_data=10;
    AJCNEW0(bam->data, bam->m_data);

    j = ajListIterNewread(assem->Reads);

    while (!ajListIterDone(j))  /* reads */
    {
	r = ajListIterGet(j);
	assemoutWriteBamAlignment(gzfile, r, bam);
    }

    ajListIterDel(&j);

    AJFREE(bam->data);
    AJFREE(bam);

    /* ajSeqBamBgzfClose(gzfile);*/

    return ret;
}
示例#8
0
AjPMatrixf ajMatrixfNewFile(const AjPStr filename)
{
    AjPMatrixf ret = NULL;
    AjPStr *orderstring = NULL;
    AjPStr buffer       = NULL;
    AjPStr firststring  = NULL;
    AjPStr reststring   = NULL;
    const AjPStr tok    = NULL;

    ajint len  = 0;
    ajint i    = 0;
    ajint l    = 0;
    ajint k    = 0;
    ajint cols = 0;
    ajint rows   = 0;
    
    const char *ptr = NULL;

    AjPFile file    = NULL;
    AjBool  first   = ajTrue;

    float **matrix  = NULL;
    float *templine = NULL;
    float minval    = -1.0;

    AjPList rlabel_list = NULL;
    AjPStr  *rlabel_arr  = NULL;
#ifndef WIN32
    static const char *delimstr = " :\t\n";
#else
    static const char *delimstr = " :\t\n\r";
#endif



    rlabel_list = ajListNew();
    

    
    firststring = ajStrNew();
    reststring  = ajStrNew();

    file = ajDatafileNewInNameS(filename);
    
    if(!file)
    {
	ajStrDel(&firststring);
	ajStrDel(&reststring);

	return NULL;
    }
    

    /* Read row labels */
    while(ajReadline(file,&buffer))
    {
	ptr = ajStrGetPtr(buffer);
#ifndef WIN32
	if(*ptr != '#' && *ptr != '\n')
#else
	if(*ptr != '#' && *ptr != '\n' && *ptr != '\r')
#endif
	{	
	    if(first)
		first = ajFalse;
	    else
	    {
		ajFmtScanC(ptr, "%S", &firststring);
		ajListPushAppend(rlabel_list, firststring);
		firststring = ajStrNew();
	    }
	}
    }
    first = ajTrue;
    ajStrDel(&firststring);
    rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr);
    ajFileSeek(file, 0, 0);


    while(ajReadline(file,&buffer))
    {
	ajStrRemoveWhiteExcess(&buffer);
	ptr = ajStrGetPtr(buffer);

	if(*ptr && *ptr != '#')
	{				
	    if(first)
	    {
		cols = ajStrParseCountC(buffer,delimstr);
		AJCNEW0(orderstring, cols);

		for(i=0; i<cols; i++)   
		    orderstring[i] = ajStrNew();

		tok = ajStrParseC(buffer, " :\t\n");
		ajStrAssignS(&orderstring[l++], tok);

		while((tok = ajStrParseC(NULL, " :\t\n")))
		    ajStrAssignS(&orderstring[l++], tok);

		first = ajFalse;

		ret = ajMatrixfNewAsym(orderstring, cols, 
						 rlabel_arr, rows, 
						 filename);
		matrix = ret->Matrixf;
	    }
	    else
	    {
		ajFmtScanC(ptr, "%S", &firststring);
		/* JISON 19/7/4 
		   k = ajSeqcvtGetCodeK(ret->Cvt,
                   ajStrGetCharFirst(firststring)); */
		k = ajSeqcvtGetCodeS(ret->Cvt, firststring); 

		len = MAJSTRGETLEN(firststring);
		ajStrAssignSubC(&reststring, ptr, len, -1);

		/* 
		** Must discard the first string (label) and use 
		** reststring otherwise ajArrFloatLine would fail (it 
		** cannot convert a string to a float)
		**   
		** Use cols,1,cols in below because although 2nd and 
		** subsequent lines have one more string in them (the
		** residue label in the 1st column) we've discarded that
		** from the string that's passed
		*/
		templine = ajArrFloatLine(reststring,delimstr,1,cols);
		
		for(i=0; i<cols; i++)  
		{
		    if(templine[i] < minval) 
			minval = templine[i];

		    /* JISON 19/7/4
		    matrix[k][ajSeqcvtGetCodeK(ret->Cvt,
					ajStrGetCharFirst(orderstring[i]))] 
					    = templine[i]; */

		    matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt,
					       orderstring[i])] 
						   = templine[i];
		}
		AJFREE(templine);
	    }
	}
    }
    ajDebug("fill rest with minimum value %d\n", minval);
    

    ajFileClose(&file);
    ajStrDel(&buffer);

    for(i=0; i<cols; i++)   
	ajStrDel(&orderstring[i]);

    AJFREE(orderstring);


    ajDebug("read matrix file %S\n", filename);
    
    ajStrDel(&firststring);
    ajStrDel(&reststring);

   for(i=0; i<rows; i++)   
	ajStrDel(&rlabel_arr[i]);

   AJFREE(rlabel_arr);
   ajListFree(&rlabel_list);

   return ret;
}
示例#9
0
int main(int argc, char **argv)
{

    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajint blastv = 0;
    char dbtype  = '\0';

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPStr version = NULL;
    AjPStr seqtype = NULL;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjBool usesrc = AJTRUE;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPStr idformat = NULL;

    EmbPEntry entry;

    PBlastDb db = NULL;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listTestFiles = NULL;
    void ** testFiles = NULL;
    ajuint nfiles;
    ajuint ifile;
    ajuint jfile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile = NULL;

    AjPStr* divfiles   = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i = 0;

    embInit("dbiblast", argc, argv);

    idformat = ajStrNewC("NCBI");

    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    version    = ajAcdGetListSingle("blastversion");
    seqtype    = ajAcdGetListSingle("seqtype");
    usesrc     = ajAcdGetBoolean("sourcefile");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint) maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }
    
    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();
    
    if(ajUtilGetBigendian())
	readReverse = ajFalse;
    else
	readReverse = ajTrue;
    
    ajStrToInt(version, &blastv);
    dbtype = ajStrGetCharFirst(seqtype);
    
    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);
    
    listTestFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listTestFiles, ajStrVcmp);
    nfiles = ajListToarray(listTestFiles, &testFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);
    
    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) testFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);
    
    /*
    ** process each input file, one at a time
    */
    
    jfile = 0;
    for(ifile=0; ifile < nfiles; ifile++)
    {
	curfilename = (AjPStr) testFiles[ifile];
	if(!dbiblast_blastopenlib(curfilename,
				  usesrc, blastv, dbtype, &db))
	    continue;	 /* could be the wrong file type with "*.*" */

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%S' ...\n", db->TFile->Name);


	ajStrAssignS(&divfiles[jfile], db->TFile->Name);
	ajFilenameTrimPath(&divfiles[jfile]);
	if(ajStrGetLen(divfiles[jfile]) >= maxfilelen)
	    maxfilelen = ajStrGetLen(divfiles[jfile]) + 1;

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, jfile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbiblast_nextblastentry(db, jfile,
					     idformat, systemsort,
					     fields,
					     maxFieldLen,
					     &maxidlen, countField,
					     elistfile, alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
	    {
		embDbiMemEntry(idlist, fieldList, nfields, entry, jfile);
	    }
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    /* lost the entry, so can't free it :-) */
	}

	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
	dbiblast_dbfree(&db);
	jfile++;
    }
    nfiles = jfile;
    
    /*
    ** write the division.lkp file
    */
    
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);
    
    /*
    ** Write the entryname.idx index
    */
    
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);
    
    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);
    
    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }
    
    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);
    
    /*
    ** Write the fields index files
    */
    
    for(ifield=0; ifield < nfields; ifield++)
    {

        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }
    
    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);
    
    ajListMap(idlist, embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    AJFREE(entryIds);

    ajStrDelarray(&fields);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }
    AJFREE(alistfile);
    AJFREE(fieldList);
    ajStrDel(&version);
    ajStrDel(&seqtype);
    ajFileClose(&elistfile);
    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }
    AJFREE(countField);
    AJFREE(fieldTot);

    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&idformat);
    ajStrDel(&tmpfname);

    AJFREE(maxFieldLen);

    ajFileClose(&logfile);

    ajListstrFreeData(&listTestFiles);

    ajStrDel(&t);
    ajStrDel(&id);
    ajStrDel(&acc);
    ajStrDel(&hline);
    ajStrDel(&tmpdes);
    ajStrDel(&tmpfd);
    ajStrDel(&tmpgi);
    ajStrDel(&tmpdb);
    ajStrDel(&tmpac);
    ajStrDel(&tmpsv);
    ajRegFree(&wrdexp);

    embDbiEntryDel(&dbiblastEntry);

    if(fdl)
    {
        for(i=0; i < nfields; i++)
            ajListFree(&fdl[i]);
        AJFREE(fdl);
    }

    for(i=0;i<nfiles;i++)
    {
        ajStrDel(&divfiles[i]);
    }
    AJFREE(divfiles);
    AJFREE(testFiles);

    embExit();

    return 0;
}
示例#10
0
AjPMatrix ajMatrixNewFile(const AjPStr filename)
{
    AjPMatrix ret = NULL;
    AjPStr buffer = NULL;
    const AjPStr tok    = NULL;

    AjPStr firststring  = NULL;
    AjPStr *orderstring = NULL;

    AjPFile file    = NULL;
    AjBool first    = ajTrue;
    const char *ptr = NULL;
    ajint **matrix  = NULL;

    ajint minval = -1;
    ajint i      = 0;
    ajint l      = 0;
    ajint k      = 0;
    ajint cols   = 0;
    ajint rows   = 0;

    ajint *templine = NULL;

    AjPList rlabel_list = NULL;
    AjPStr  *rlabel_arr  = NULL;

#ifndef WIN32
    static const char *delimstr = " :\t\n";
#else
    static const char *delimstr = " :\t\n\r";
#endif

    rlabel_list = ajListNew();
    

    firststring = ajStrNew();
    
    file = ajDatafileNewInNameS(filename);
    
    if(!file)
    {
	ajStrDel(&firststring);
	ajListFree(&rlabel_list);

	return NULL;
    }
    
    /* Read row labels */
    while(ajReadline(file,&buffer))
    {
	ptr = ajStrGetPtr(buffer);
#ifndef WIN32
	if(*ptr != '#' && *ptr != '\n')
#else
	if(*ptr != '#' && *ptr != '\n' && *ptr != '\r')
#endif
	{
	    if(first)
		first = ajFalse;
	    else
	    {	
		ajFmtScanC(ptr, "%S", &firststring);
		ajListPushAppend(rlabel_list, firststring);
		firststring = ajStrNew();	
	    }
	}
    }

    first = ajTrue;
    ajStrDel(&firststring);
    rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr);
    ajFileSeek(file, 0, 0);


    while(ajReadline(file,&buffer))
    {
	ajStrRemoveWhiteExcess(&buffer);
	ptr = ajStrGetPtr(buffer);

	if(*ptr && *ptr != '#')
	{				
	    if(first)
	    {
		cols = ajStrParseCountC(buffer,delimstr);
		AJCNEW0(orderstring, cols);

		for(i=0; i<cols; i++)   
		    orderstring[i] = ajStrNew();
		
		tok = ajStrParseC(buffer, " :\t\n");
		ajStrAssignS(&orderstring[l++], tok);

		while((tok = ajStrParseC(NULL, " :\t\n")))
		    ajStrAssignS(&orderstring[l++], tok);

		first = ajFalse;

		ret = ajMatrixNewAsym(orderstring, cols, 
						rlabel_arr, rows, 
						filename);
		matrix = ret->Matrix;
	    }
	    else
	    {
		ajFmtScanC(ptr, "%S", &firststring);
		
		/* JISON 19/7/4
		   k = ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(firststring)); */
		k = ajSeqcvtGetCodeS(ret->Cvt, firststring);

		/* 
		 ** cols+1 is used below because 2nd and subsequent lines have 
		 ** one more string in them (the residue label) 
		 */
		templine = ajArrIntLine(buffer,delimstr,2,cols+1);
		
		for(i=0; i<cols; i++)   
		{
		    if(templine[i] < minval) 
			minval = templine[i];

		    /* JISON 19/7/4
		    matrix[k][ajSeqcvtGetCodeK(ret->Cvt,
					ajStrGetCharFirst(orderstring[i]))] 
					    = templine[i]; */
		    matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt,
					       orderstring[i])] 
						   = templine[i];
		}

		AJFREE(templine);
	    }
	}
    }

    ajDebug("fill rest with minimum value %d\n", minval);
    

    ajFileClose(&file);
    ajStrDel(&buffer);

    for(i=0; i<cols; i++)   
	ajStrDel(&orderstring[i]);

    AJFREE(orderstring);
        
    
    ajDebug("read matrix file %S\n", filename);
    
    ajStrDel(&firststring);    

    for(i=0; i<rows; i++)   
	ajStrDel(&rlabel_arr[i]);

    AJFREE(rlabel_arr);
    ajListFree(&rlabel_list);

    return ret;
}
示例#11
0
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info)
{
    ajint n;
    ajint maxlen;
    ajint len;
    char **seqs;
    const AjPSeq seq = NULL;
    ajint i=0;
    const AjPStr fmt=NULL;
    const char *p=NULL;
    char  c='\0';
    /*
    char *q=NULL;
    AjPSelexseq   sqdata=NULL;
    AjPSelexdata sdata=NULL;
    */
    ajint cnt=0;
    info->name = NULL;
    info->rf=NULL;
    info->cs=NULL;
    info->desc=NULL;
    info->acc=NULL;
    info->au=NULL;
    info->flags=0;

    AjPStr tmpstr = NULL;

    ajSeqsetFill(seqset);

    fmt = ajSeqsetGetFormat(seqset);
    n = ajSeqsetGetSize(seqset);
    ajSeqsetFmtUpper(seqset);

    maxlen = ajSeqsetGetLen(seqset);


    /* First allocate and copy sequences */
    AJCNEW0(seqs,n);
    for(i=0; i<n; ++i)
    {
        seqs[i] = ajCharNewRes(maxlen+1);
        strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i)));
    }

    info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        strcpy(info->sqinfo[i].name,"");
        strcpy(info->sqinfo[i].id,"");
        strcpy(info->sqinfo[i].acc,"");
        strcpy(info->sqinfo[i].desc,"");
        info->sqinfo[i].len = 0;
        info->sqinfo[i].start = 0;
        info->sqinfo[i].stop = 0;
        info->sqinfo[i].olen = 0;
        info->sqinfo[i].type = 0;
        info->sqinfo[i].ss = NULL;
        info->sqinfo[i].sa =NULL;
    }

    AJCNEW0(info->wgt,n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        info->wgt[i] = ajSeqsetGetseqWeight(seqset,i);
    }
    info->nseq = n;
    info->alen = maxlen;

    for(i=0; i<n; ++i)
    {
        seq = ajSeqsetGetseqSeq(seqset,i);
        if((len=ajStrGetLen(ajSeqGetNameS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len);
            strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ID;
            strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_NAME;
        }
        if((len=ajStrGetLen(ajSeqGetAccS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len);
            strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ACC;
        }
    }
    seq = ajSeqsetGetseqSeq(seqset,0);
    info->cs = ajCharNewS(ajSeqGetSeqS(seq));
    info->name = ajCharNewS(ajSeqGetNameS(seq));
    info->acc = ajCharNewS(ajSeqGetAccS(seq));
    info->desc = ajCharNewS(ajSeqGetDescS(seq));
    info->rf = ajCharNewS(ajSeqGetSeqS(seq));

    /*
        info->rf = ajCharNewS(seq);

    	len = ajStrGetLen(seq->Selexdata->name);
    	info->name = ajCharNewRes(len+1);
    	strcpy(info->name,ajStrGetPtr(seq->Selexdata->name));
    	len = ajStrGetLen(seq->Selexdata->de);
    	info->desc = ajCharNewRes(len+1);

    	sdata = seq->Selexdata;
    	strcpy(info->desc,ajStrGetPtr(sdata->de));
    	len = ajStrGetLen(sdata->ac);
    	info->acc = ajCharNewRes(len+1);
    	strcpy(info->acc,ajStrGetPtr(sdata->ac));
    	len = ajStrGetLen(sdata->au);
    	info->au = ajCharNewRes(len+1);
    	strcpy(info->au,ajStrGetPtr(sdata->au));
    	if(sdata->tc[0] || sdata->tc[1])
    	{
    	    info->flags |= AINFO_TC;
    	    info->tc1 = sdata->tc[0];
    	    info->tc2 = sdata->tc[1];
    	}
    	if(sdata->nc[0] || sdata->nc[1])
    	{
    	    info->flags |= AINFO_NC;
    	    info->nc1 = sdata->nc[0];
    	    info->nc2 = sdata->nc[1];
    	}
    	if(sdata->ga[0] || sdata->ga[1])
    	{
    	    info->flags |= AINFO_GA;
    	    info->ga1 = sdata->ga[0];
    	    info->ga2 = sdata->ga[1];
    	}

    	for(i=0;i<n;++i)
    	{
    	    seq = ajSeqsetGetseqSeq(seqset,i);
    	    sqdata = seq->Selexdata->sq;
    	    if((len=ajStrGetLen(sqdata->name)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name));
    		else
    		    strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63);
    		info->sqinfo[i].name[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_NAME;
    	    }
    / *
    	    if((len=ajStrGetLen(sqdata->id)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id));
    		else
    		    strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63);
    		info->sqinfo[i].id[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ID;
    	    }
    * /

    	    strcpy(info->sqinfo[i].id,info->sqinfo[i].name);
    	    info->sqinfo[i].flags |= SQINFO_ID;
    	    if((len=ajStrGetLen(sqdata->ac)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac));
    		else
    		    strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63);
    		info->sqinfo[i].acc[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ACC;
    	    }
    	    if((len=ajStrGetLen(sqdata->de)))
    	    {
    		if(len<127)
    		    strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de));
    		else
    		    strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127);
    		info->sqinfo[i].desc[127]='\0';
    		info->sqinfo[i].flags |= SQINFO_DESC;
    	    }
    	    if(sqdata->start || sqdata->stop || sqdata ->len)
    	    {
    		info->sqinfo[i].start = sqdata->start;
    		info->sqinfo[i].stop  = sqdata->stop;
    		info->sqinfo[i].olen  = sqdata->len;
    		info->sqinfo[i].flags |= SQINFO_START;
    		info->sqinfo[i].flags |= SQINFO_STOP;
    		info->sqinfo[i].flags |= SQINFO_OLEN;
    	    }

    	    if(ajStrGetLen(seq->Selexdata->ss))
    	    {

    		info->sqinfo[i].ss = ajCharNewRes(maxlen+1);
    		p = ajStrGetPtr(seq->Selexdata->ss);
    		q = info->sqinfo[i].ss;
    		while((c==*p))
    		{
    		    if(c=='.' || c==' ' || c=='_' || c=='-')
    			*q++ = c;
    		    ++p;
    		}
    		*q = '\0';
    		info->sqinfo[i].flags |= SQINFO_SS;
    	    }
    	}
        }
    / *
        }
    */


    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].type = kOtherSeq;
        if(ajSeqsetIsDna(seqset))
            info->sqinfo[i].type = kDNA;
        if(ajSeqsetIsRna(seqset))
            info->sqinfo[i].type = kRNA;
        if(ajSeqsetIsProt(seqset))
            info->sqinfo[i].type = kAmino;
        info->sqinfo[i].flags |= SQINFO_TYPE;

        seq = ajSeqsetGetseqSeq(seqset,i);

        p = ajSeqGetSeqC(seq);
        cnt = 0;
        while((c=*p))
        {
            if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~'))
                ++cnt;
            ++p;
        }
        info->sqinfo[i].len = cnt;
        info->sqinfo[i].flags |= SQINFO_LEN;
    }


    *retseqs = seqs;
    ajStrDel(&tmpstr);

    return;
}
示例#12
0
文件: embprop.c 项目: WenchaoLin/JAMg
EmbPPropMolwt* embPropEmolwtRead(AjPFile mfptr)
{
    AjPStr  line  = NULL;
    AjPStr  token = NULL;
    
    AjBool firstline;
    
    const char *p;
    ajuint i;
    ajint  n;
    
    EmbPPropMolwt *ret;

    line  = ajStrNew();
    token = ajStrNew();
    
    firstline = ajTrue;

    AJCNEW0(ret,EMBPROPSIZE+2);

    for(i=0; i < EMBPROPSIZE+2; ++i)
	AJNEW0(ret[i]);
    
    while(ajReadline(mfptr, &line))
    {
	ajStrRemoveWhiteExcess(&line);
	p = ajStrGetPtr(line);

	if(*p=='#' || *p=='!' || !*p)
	    continue;

	if(firstline)
	{
	    if(!ajStrPrefixC(line,"Mol"))
		ajFatal("Incorrect format molwt file: '%S'", line);

	    firstline = ajFalse;
	    continue;
	}

	ajFmtScanS(line,"%S",&token);
	ajStrFmtUpper(&token);

	if(ajStrGetLen(token) != 1)
	{
	    if(ajStrPrefixC(token,"HYDROGEN"))
	    {
		if(ajFmtScanS(line,"%*s%lf%lf",
			      &ret[EMBPROPHINDEX]->average,
			      &ret[EMBPROPHINDEX]->mono) != 2)
		    ajFatal("Bad format hydrogen data line");
	    }
	    else if(ajStrPrefixC(token,"OXYGEN"))
	    {
		if(ajFmtScanS(line,"%*s%lf%lf",
			      &ret[EMBPROPOINDEX]->average,
			      &ret[EMBPROPOINDEX]->mono) != 2)
		    ajFatal("Bad format oxygen data line");
	    }
	    else if(ajStrPrefixC(token,"WATER"))
	    {
		if(ajFmtScanS(line,"%*s%lf%lf",
			      &ret[EMBPROPWINDEX]->average,
			      &ret[EMBPROPWINDEX]->mono) != 2)
		    ajFatal("Bad format water data line");
	    }
	    else
		ajFatal("Unknown molwt token %S",token);

	    continue;
	}


	i = ajBasecodeToInt((ajint) *ajStrGetPtr(token));

	if(i == 27)
	    ajFatal("Molwt file line doesn't begin with a single A->Z (%S)",
		    line);

	n = ajFmtScanS(line,"%*s%lf%lf",
		       &ret[i]->average,
		       &ret[i]->mono);
	if(n != 2)
	    ajFatal("Only %d columns in amino file - expected %d",n,3);
    }

    ajStrDel(&line);
    ajStrDel(&token);

    return ret;
}
示例#13
0
int main(int argc, char **argv)
{
    AjPSeqall queryseqs;
    AjPSeqset targetseqs;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    AjPStr queryaln = 0;
    AjPStr targetaln = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    const char   *queryseqc;
    const char   *targetseqc;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;
    float minscore;

    ajuint j, k;
    ajint querystart = 0;
    ajint targetstart = 0;
    ajint queryend   = 0;
    ajint targetend   = 0;
    ajint width  = 0;
    AjPTable kmers = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;
    ajint newmax = 0;

    ajuint ntargetseqs;
    ajuint nkmers;

    AjPAlign align = NULL;
    EmbPWordMatch maxmatch; /* match with maximum score */

    /* Cursors for the current sequence being scanned,
    ** i.e., until which location it was scanned.
    ** Separate cursor/location entries for each sequence in the seqset.
    */
    ajuint* lastlocation;

    EmbPWordRK* wordsw = NULL;
    AjPList* matchlist = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    queryseqs = ajAcdGetSeqall("asequence");
    targetseqs= ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* width for banded Smith-Waterman */
    minscore  = ajAcdGetFloat("minscore");

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    /* seqset sequence is the reference sequence for SAM format */
    ajAlignSetRefSeqIndx(align, 1);

    ajSeqsetTrim(targetseqs);

    ntargetseqs = ajSeqsetGetSize(targetseqs);

    AJCNEW0(matchlist, ntargetseqs);

    /* get tables of words */
    for(k=0;k<ntargetseqs;k++)
    {
	targetseq = ajSeqsetGetseqSeq(targetseqs, k);
	embWordGetTable(&kmers, targetseq);
	ajDebug("Number of distinct kmers found so far: %d\n",
		ajTableGetLength(kmers));
    }
    AJCNEW0(lastlocation, ntargetseqs);

    if(ajTableGetLength(kmers)<1)
	ajErr("no kmers found");

    nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs);

    while(ajSeqallNext(queryseqs,&queryseq))
    {
	ajSeqTrim(queryseq);

	queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq));

	ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq));

	for(k=0;k<ntargetseqs;k++)
	{
	    lastlocation[k]=0;
	    matchlist[k] = ajListstrNew();
	}

	embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs,
		(const EmbPWordRK*)wordsw, wordlen, nkmers,
		matchlist, lastlocation, ajFalse);


	for(k=0;k<ajSeqsetGetSize(targetseqs);k++)
	{
	    targetseq      = ajSeqsetGetseqSeq(targetseqs, k);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq));

	    if(ajListGetLength(matchlist[k])==0)
	    {
		ajFmtPrintF(errorf,
		            "No wordmatch start points for "
		            "%s vs %s. No alignment\n",
		            ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq));
		embWordMatchListDelete(&matchlist[k]);
		continue;
	    }


	    /* only the maximum match is used as seed
	     * (if there is more than one location with the maximum match
	     * only the first one is used)
	     * TODO: we should add a new option to make above limit optional
	     */
	    maxmatch = embWordMatchFirstMax(matchlist[k]);

	    supermatcher_findendpoints(maxmatch,targetseq, queryseq,
		    &targetstart, &querystart,
		    &targetend, &queryend);

	    targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq));
	    queryseqc = ajSeqGetSeqC(queryseq);
	    targetseqc = ajSeqGetSeqC(targetseq);

	    ajStrAssignC(&queryaln,"");
	    ajStrAssignC(&targetaln,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq),
		    targetstart, querystart, targetend, queryend);

	    newmax = (targetend-targetstart+2)*width;

	    if(newmax > oldmax)
	    {
		AJCRESIZE0(path,oldmax,newmax);
		AJCRESIZE0(compass,oldmax,newmax);
		oldmax=newmax;
		ajDebug("++ memory re/allocation for path/compass arrays"
			" to size: %d\n", newmax);
	    }
	    else
	    {
		AJCSET0(path,newmax);
		AJCSET0(compass,newmax);
	    }

	    ajDebug("Calling embAlignPathCalcSWFast "
		    "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n",
		    querystart, queryend, (queryend - querystart + 1),
		    ajSeqGetLen(queryseq),
		    targetstart, targetend, (targetend - targetstart + 1),
		    ajSeqGetLen(targetseq),
		    width);

	    score = embAlignPathCalcSWFast(&targetseqc[targetstart],
	                                   &queryseqc[querystart],
	                                   targetend-targetstart+1,
	                                   queryend-querystart+1,
	                                   0,width,
	                                   gapopen,gapextend,
	                                   path,sub,cvt,
	                                   compass,show);
	    if(score>minscore)
	    {
		embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,
		                         targetseq,queryseq,
		                         &targetaln,&queryaln,
		                         targetend-targetstart+1,
		                         queryend-querystart+1,
		                         0,width,
		                         &targetstart,&querystart);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(targetaln),
		                    ajStrGetPtr(queryaln),
		                    ajSeqGetNameC(targetseq),
		                    ajSeqGetNameC(queryseq));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    ajDebug(" queryaln:%S \ntargetaln:%S\n",
		            queryaln,targetaln);
		    embAlignReportLocal(align,
			    queryseq, targetseq,
			    queryaln, targetaln,
			    querystart, targetstart,
			    gapopen, gapextend,
			    score, matrix,
			    1 + ajSeqGetOffset(queryseq),
			    1 + ajSeqGetOffset(targetseq)
		    );
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    }
	    ajStrDel(&targetaln);

	    embWordMatchListDelete(&matchlist[k]);
	}

	ajStrDel(&queryaln);
    }


    for(k=0;k<nkmers;k++)
    {
	AJFREE(wordsw[k]->seqindxs);
	AJFREE(wordsw[k]->nSeqMatches);

	for(j=0;j<wordsw[k]->nseqs;j++)
	    AJFREE(wordsw[k]->locs[j]);

	AJFREE(wordsw[k]->nnseqlocs);
	AJFREE(wordsw[k]->locs);
	AJFREE(wordsw[k]);
    }

    embWordFreeTable(&kmers);

    if(!ajAlignFormatShowsSequences(align))
	ajMatrixfDel(&matrix);
    
    AJFREE(path);
    AJFREE(compass);
    AJFREE(kmers);
    AJFREE(wordsw);

    AJFREE(matchlist);
    AJFREE(lastlocation);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&queryseqs);
    ajSeqDel(&queryseq);
    ajSeqsetDel(&targetseqs);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
示例#14
0
int main(int argc, char **argv)
{
    AjPSeq seq;
    AjPGraph graph = 0;
    AjPFile   outf = NULL;
    AjPFile file   = NULL;
    AjPStr buffer  = NULL;
    float twist[4][4][4];
    float roll[4][4][4];
    float tilt[4][4][4];
    float rbend;
    float rcurve;
    float bendscale;
    float curvescale;
    float twistsum = (float) 0.0;
    float pi       = (float) 3.14159;
    float pifac    = (pi/(float) 180.0);
    float pi2      = pi/(float) 2.0;
    ajint *iseq    = NULL;
    float *x;
    float *y;
    float *xave;
    float *yave;
    float *curve;
    float *bend;
    const char *ptr;
    ajint i;
    ajint ii;
    ajint k;
    ajint j;
    char residue[2];
    float maxbend;
    float minbend;
    float bendfactor;
    float maxcurve;
    float mincurve;
    float curvefactor;

    float fxp;
    float fyp;
    float yincr;
    float yy1;
    ajint ixlen;
    ajint iylen;
    ajint ixoff;
    ajint iyoff;
    float ystart;
    float defheight;
    float currentheight;
    ajint count;
    ajint portrait = 0;
    ajint title    = 0;
    ajint numres;
    ajint ibeg;
    ajint iend;
    ajint ilen;
    AjPStr sstr = NULL;
    ajint ipos;
    float dx;
    float dy;
    float rxsum;
    float rysum;
    float yp1;
    float yp2;
    double td;
    
    embInit("banana", argc, argv);
    seq    = ajAcdGetSeq("sequence");
    file   = ajAcdGetDatafile("anglesfile");
    outf   = ajAcdGetOutfile("outfile");
    graph  = ajAcdGetGraph("graph");
    numres = ajAcdGetInt("residuesperline");

    ibeg = ajSeqGetBegin(seq);
    iend = ajSeqGetEnd(seq);

    ajStrAssignSubS(&sstr, ajSeqGetSeqS(seq), ibeg-1, iend-1);
    ilen = ajStrGetLen(sstr);

    AJCNEW0(iseq,ilen+1);
    AJCNEW0(x,ilen+1);
    AJCNEW0(y,ilen+1);
    AJCNEW0(xave,ilen+1);
    AJCNEW0(yave,ilen+1);
    AJCNEW0(curve,ilen+1);
    AJCNEW0(bend,ilen+1);

    ptr= ajStrGetPtr(sstr);

    for(ii=0;ii<ilen;ii++)
    {
	if(*ptr=='A' || *ptr=='a')
	    iseq[ii+1] = 0;
	else if(*ptr=='T' || *ptr=='t')
	    iseq[ii+1] = 1;
	else if(*ptr=='G' || *ptr=='g')
	    iseq[ii+1] = 2;
	else if(*ptr=='C' || *ptr=='c')
	    iseq[ii+1] = 3;
	else
	    ajErr("%c is not an ATCG hence not valid",*ptr);

	ptr++;
    }


    if(!file)
	ajErr("Banana failed to open angle file");

    ajReadline(file,&buffer);		/* 3 junk lines */
    ajReadline(file,&buffer);
    ajReadline(file,&buffer);

    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	{
	    if(ajReadline(file,&buffer))
	    {
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",
		       &twist[ii][0][k],&twist[ii][1][k],&twist[ii][2][k],
		       &twist[ii][3][k]);
	    }
	    else
		ajErr("Error reading angle file");

	    for(j=0;j<4;j++)
		twist[ii][j][k] *= pifac;
	}


    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	    if(ajReadline(file,&buffer))
	    {
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&roll[ii][0][k],
		       &roll[ii][1][k],&roll[ii][2][k],&roll[ii][3][k]);
	    }
	    else
		ajErr("Error reading angle file");


    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	    if(ajReadline(file,&buffer))
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&tilt[ii][0][k],
		       &tilt[ii][1][k],&tilt[ii][2][k],&tilt[ii][3][k]);
	    else
		ajErr("Error reading angle file");


    if(ajReadline(file,&buffer))
	sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&rbend,&rcurve,
	       &bendscale,&curvescale);
    else
	ajErr("Error reading angle file");

    ajFileClose(&file);
    ajStrDel(&buffer);


    for(ii=1;ii<ilen-1;ii++)
    {
	twistsum += twist[iseq[ii]][iseq[ii+1]][iseq[ii+2]];
	dx = (roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum)) +
	    (tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum-pi2));
	dy = roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum) +
	    tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum-pi2);

	x[ii+1] = x[ii]+dx;
	y[ii+1] = y[ii]+dy;

    }

    for(ii=6;ii<ilen-6;ii++)
    {
	rxsum = 0.0;
	rysum = 0.0;
	for(k=-4;k<=4;k++)
	{
	    rxsum+=x[ii+k];
	    rysum+=y[ii+k];
	}
	rxsum+=(x[ii+5]*(float)0.5);
	rysum+=(y[ii+5]*(float)0.5);
	rxsum+=(x[ii-5]*(float)0.5);
	rysum+=(y[ii-5]*(float)0.5);

	xave[ii] = rxsum*(float)0.1;
	yave[ii] = rysum*(float)0.1;
    }

    for(i=(ajint)rbend+1;i<=ilen-(ajint)rbend-1;i++)
    {
	td = sqrt(((x[i+(ajint)rbend]-x[i-(ajint)rbend])*
		   (x[i+(ajint)rbend]-x[i-(ajint)rbend])) +
		  ((y[i+(ajint)rbend]-y[i-(ajint)rbend])*
		   (y[i+(ajint)rbend]-y[i-(ajint)rbend])));
	bend[i] = (float) td;
	bend[i]*=bendscale;
    }

    for(i=(ajint)rcurve+6;i<=ilen-(ajint)rcurve-6;i++)
    {
	td = sqrt(((xave[i+(ajint)rcurve]-
		    xave[i-(ajint)rcurve])*(xave[i+(ajint)rcurve]-
					    xave[i-(ajint)rcurve]))+
		  ((yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])*
		   (yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])));
	curve[i] = (float) td;
    }
    

    if(outf)
    {
	ajFmtPrintF(outf,"Base   Bend      Curve\n");
	ptr = ajStrGetPtr(sstr);
	for(ii=1;ii<=ilen;ii++)
	{
	    ajFmtPrintF(outf,"%c    %6.1f   %6.1f\n",
			*ptr, bend[ii], curve[ii]);
	    ptr++;
	}
	ajFileClose(&outf);
    }

    if(graph)
    {
	maxbend  = minbend  = 0.0;
	maxcurve = mincurve = 0.0;
	for(ii=1;ii<=ilen;ii++)
	{
	    if(bend[ii] > maxbend)
		maxbend = bend[ii];
	    if(bend[ii] < minbend)
		minbend = bend[ii];
	    if(curve[ii] > maxcurve)
		maxcurve = curve[ii];
	    if(curve[ii] < mincurve)
		mincurve = curve[ii];
	}

	ystart = 75.0;

	ajGraphAppendTitleS(graph, ajSeqGetUsaS(seq));

        ajGraphicsSetPagesize(960, 768);

	ajGraphOpenWin(graph,(float)-1.0, (float)numres+(float)10.0,
		       (float)0.0, ystart+(float)5.0);

	ajGraphicsGetParamsPage(&fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff);

	if(portrait)
        {
            ixlen = 768;
            iylen = 960;
        }
        else
        {
            ixlen = 960;
            iylen = 768;
        }

	ajGraphicsGetCharsize(&defheight,&currentheight);
	if(!currentheight)
	{
	    defheight = currentheight = (float) 4.440072;
	    currentheight = defheight *
		((float)ixlen/ ((float)(numres)*(currentheight+(float)1.0)))
		    /currentheight;
	}
	ajGraphicsSetCharscale(((float)ixlen/((float)(numres)*
					  (currentheight+(float)1.0)))/
			    currentheight);
	ajGraphicsGetCharsize(&defheight,&currentheight);

	yincr = (currentheight + (float)3.0)*(float)0.3;

	if(!title)
	    yy1 = ystart;
	else
	    yy1 = ystart-(float)5.0;

	count = 1;

	residue[1]='\0';

	bendfactor = (3*yincr)/maxbend;
	curvefactor = (3*yincr)/maxcurve;

	ptr = ajStrGetPtr(sstr);

	yy1 = yy1-(yincr*((float)5.0));
	for(ii=1;ii<=ilen;ii++)
	{
	    if(count > numres)
	    {
		yy1 = yy1-(yincr*((float)5.0));
		if(yy1<1.0)
		{
		    if(!title)
			yy1=ystart;
		    else
			yy1 = ystart-(float)5.0;

		    yy1 = yy1-(yincr*((float)5.0));
		    ajGraphNewpage(graph,AJFALSE);
		}
		count = 1;
	    }
	    residue[0] = *ptr;

	    ajGraphicsDrawposTextAtend((float)(count)+(float)2.0,yy1,residue);

	    if(ii>1 && ii < ilen)
	    {
		yp1 = yy1+yincr + (bend[ii]*bendfactor);
		yp2 = yy1+yincr + (bend[ii+1]*bendfactor);
		ajGraphicsDrawposLine((float)count+(float)1.5,yp1,
			    (float)(count)+(float)2.5,yp2);
	    }

	    ipos = ilen-(ajint)rcurve-7;
	    if(ipos < 0)
		ipos = 0;

	    if(ii>rcurve+5 && ii<ipos)
	    {
		yp1 = yy1+yincr + (curve[ii]*curvefactor);
		yp2 = yy1+yincr + (curve[ii+1]*curvefactor);
		ajGraphicsDrawposLine((float)count+(float)1.7,yp1,
			    (float)(count)+(float)2.3,yp2);
	    }

	    ajGraphicsDrawposLine((float)count+(float)1.5,yy1+yincr,
			(float)(count)+(float)2.5,yy1+yincr);

	    count++;
	    ptr++;
	}

	ajGraphicsClose();
    }

    AJFREE(iseq);
    AJFREE(x);
    AJFREE(y);
    AJFREE(xave);
    AJFREE(yave);
    AJFREE(curve);
    AJFREE(bend);

    ajStrDel(&sstr);

    ajSeqDel(&seq);
    ajFileClose(&file);
    ajFileClose(&outf);
    ajGraphxyDel(&graph);

    embExit();

    return 0;
}
示例#15
0
int main(int argc, char **argv)
{
    /* Global details */
    AjBool explain_flag;
    AjBool file_flag;
    AjPStr* task;
    AjBool do_primer;
    AjBool do_hybrid;
    ajint num_return;
    ajint first_base_index;

    /* "Sequence" Input Tags */
    AjPSeqall sequence;
    AjPRange included_region;
    AjPRange target;
    AjPRange excluded_region;
    AjPStr left_input;
    AjPStr right_input;

    /* Primer details */
    AjBool pick_anyway;
    AjPFile mispriming_library;
    float max_mispriming;
    float pair_max_mispriming;
    ajint gc_clamp;
    ajint opt_size;
    ajint min_size;
    ajint max_size;
    float opt_tm;
    float min_tm;
    float max_tm;
    float max_diff_tm;
    float opt_gc_percent;
    float min_gc;
    float max_gc;
    float salt_conc;
    float dna_conc;
    ajint num_ns_accepted;
    float self_any;
    float self_end;
    ajint max_poly_x;

    /* Sequence Quality. These are not (yet) implemented */
    /*
       AjPFile sequence_quality;
       ajint	min_quality;
       ajint	min_end_quality;
       ajint	quality_range_min;
       ajint	quality_range_max;
       */

    /* Product details */
    ajint product_opt_size;
    AjPRange product_size_range;
    float product_opt_tm;
    float product_min_tm;
    float product_max_tm;

    /* Objective Function Penalty Weights for Primers */
    float max_end_stability;

    /* these are not (yet) implemented */
    /*
       float		inside_penalty;
       float		outside_penalty;
    */

    /* Primer penalties */
    /* these are not (yet) implemented */

    /* Internal Oligo "Sequence" Input Tags */
    AjPRange internal_oligo_excluded_region;

    /* Internal Oligo "Global" Input Tags */
    AjPStr internal_oligo_input;
    ajint internal_oligo_opt_size;
    ajint internal_oligo_min_size;
    ajint internal_oligo_max_size;
    float internal_oligo_opt_tm;
    float internal_oligo_min_tm;
    float internal_oligo_max_tm;
    float internal_oligo_opt_gc_percent;
    float internal_oligo_min_gc;
    float internal_oligo_max_gc;
    float internal_oligo_salt_conc;
    float internal_oligo_dna_conc;
    float internal_oligo_self_any;
    float internal_oligo_self_end;
    ajint internal_oligo_max_poly_x;
    AjPFile internal_oligo_mishyb_library;
    float internal_oligo_max_mishyb;

    /*
       ajint		internal_oligo_min_quality;
    */

    /* Internal Oligo penalties */
    /* these are not (yet) implemented */

    /* EMBOSS-wrapper-specific stuff */
    AjPFile	outfile;

    /* other variables */
    AjPStr result = NULL;
    AjPStr strand = NULL;
    AjPStr substr = NULL;
    AjPSeq seq    = NULL;
    ajint begin   = 0;
    ajint end;
    FILE* stream;
    AjPStr taskstr  = NULL;
    const AjPStr program = NULL;

    /* pipe variables */

    int *pipeto;	  /* pipe to feed the exec'ed program input */
    int *pipefrom;	  /* pipe to get the exec'ed program output */

    embInit("eprimer3", argc, argv);

    /* Global details */
    explain_flag     = ajAcdGetBoolean("explainflag");
    file_flag        = ajAcdGetBoolean("fileflag");
    task             = ajAcdGetList("task");
    do_primer        = ajAcdGetToggle("primer");
    do_hybrid        = ajAcdGetToggle("hybridprobe");
    num_return       = ajAcdGetInt("numreturn");
    first_base_index = ajAcdGetInt("firstbaseindex");

    /* "Sequence" Input Tags */
    sequence        = ajAcdGetSeqall("sequence");
    included_region = ajAcdGetRange("includedregion");
    target          = ajAcdGetRange("targetregion");
    excluded_region = ajAcdGetRange("excludedregion");
    left_input      = ajAcdGetString("forwardinput");
    right_input     = ajAcdGetString("reverseinput");

    /* Primer details */
    pick_anyway         = ajAcdGetBoolean("pickanyway");
    mispriming_library  = ajAcdGetInfile("mispriminglibraryfile");
    max_mispriming      = ajAcdGetFloat("maxmispriming");
    pair_max_mispriming = ajAcdGetFloat("pairmaxmispriming");
    gc_clamp            = ajAcdGetInt("gcclamp");
    opt_size            = ajAcdGetInt("osize");
    min_size            = ajAcdGetInt("minsize");
    max_size            = ajAcdGetInt("maxsize");
    opt_tm              = ajAcdGetFloat("otm");
    min_tm              = ajAcdGetFloat("mintm");
    max_tm              = ajAcdGetFloat("maxtm");
    max_diff_tm         = ajAcdGetFloat("maxdifftm");
    opt_gc_percent      = ajAcdGetFloat("ogcpercent");
    min_gc              = ajAcdGetFloat("mingc");
    max_gc              = ajAcdGetFloat("maxgc");
    salt_conc           = ajAcdGetFloat("saltconc");
    dna_conc            = ajAcdGetFloat("dnaconc");
    num_ns_accepted     = ajAcdGetInt("numnsaccepted");
    self_any            = ajAcdGetFloat("selfany");
    self_end            = ajAcdGetFloat("selfend");
    max_poly_x          = ajAcdGetInt("maxpolyx");

    AJCNEW0(pipeto,2);
    AJCNEW0(pipefrom,2);

    /* Sequence Quality */
    /* these are not (yet) implemented */
    /*
       sequence_quality  = ajAcdGetInfile("sequencequality");
       min_quality       = ajAcdGetInt("minquality");
       min_end_quality   = ajAcdGetInt("minendquality");
       quality_range_min = ajAcdGetInt("qualityrangemin");
       quality_range_max = ajAcdGetInt("qualityrangemax");
       */

    /* Product details */
    product_opt_size    = ajAcdGetInt("psizeopt");
    product_size_range  = ajAcdGetRange("prange");
    product_opt_tm      = ajAcdGetFloat("ptmopt");
    product_min_tm      = ajAcdGetFloat("ptmmin");
    product_max_tm      = ajAcdGetFloat("ptmmax");

    /* Objective Function Penalty Weights for Primers */
    max_end_stability   = ajAcdGetFloat("maxendstability");
    /* these are not (yet) implemented */
    /*
       inside_penalty      = ajAcdGetFloat("insidepenalty");
       outside_penalty     = ajAcdGetFloat("outsidepenalty");
    */

    /* Primer penalties */
    /* these are not (yet) implemented */

    /* Internal Oligo "Sequence" Input Tags */
    internal_oligo_excluded_region = ajAcdGetRange("oexcludedregion");
    internal_oligo_input           = ajAcdGetString("oligoinput");

    /* Internal Oligo "Global" Input Tags */
    internal_oligo_opt_size       = ajAcdGetInt("osizeopt");
    internal_oligo_min_size       = ajAcdGetInt("ominsize");
    internal_oligo_max_size       = ajAcdGetInt("omaxsize");
    internal_oligo_opt_tm         = ajAcdGetFloat("otmopt");
    internal_oligo_min_tm         = ajAcdGetFloat("otmmin");
    internal_oligo_max_tm         = ajAcdGetFloat("otmmax");
    internal_oligo_opt_gc_percent = ajAcdGetFloat("ogcopt");
    internal_oligo_min_gc         = ajAcdGetFloat("ogcmin");
    internal_oligo_max_gc         = ajAcdGetFloat("ogcmax");
    internal_oligo_salt_conc      = ajAcdGetFloat("osaltconc");
    internal_oligo_dna_conc       = ajAcdGetFloat("odnaconc");
    internal_oligo_self_any       = ajAcdGetFloat("oanyself");
    internal_oligo_self_end       = ajAcdGetFloat("oendself");
    internal_oligo_max_poly_x     = ajAcdGetInt("opolyxmax");
    internal_oligo_mishyb_library = ajAcdGetInfile("mishyblibraryfile");
    internal_oligo_max_mishyb     = ajAcdGetFloat("omishybmax");
    /*
       internal_oligo_min_quality    = ajAcdGetInt("oligominquality");
    */

    /* Internal Oligo penalties */
    /* these are not (yet) implemented */


    /* EMBOSS-wrapper-specific stuff */
    outfile = ajAcdGetOutfile("outfile");


    ajStrRemoveWhite(&left_input);
    ajStrRemoveWhite(&right_input);

    /*
    ** OK - we will now try to do a separate fork-exec for each sequence.
    */

    result = ajStrNew();

    while(ajSeqallNext(sequence, &seq))
    {
        program = ajAcdGetpathC("primer3_core");

        if(!ajSysExecRedirectC(ajStrGetPtr(program),&pipeto,&pipefrom))
            ajFatal("eprimer3: Could not exec primer3_core");

        stream = eprimer3_start_write(pipeto[1]);

        /* send primer3 Primer "Global" parameters */
        eprimer3_send_bool(stream, "PRIMER_EXPLAIN_FLAG", explain_flag);
        eprimer3_send_bool(stream, "PRIMER_FILE_FLAG", file_flag);

        if(do_hybrid)
        {
            if(!ajStrCmpC(task[0], "1"))
                ajStrAssignC(&taskstr, "pick_pcr_primers_and_hyb_probe");
            else if(!ajStrCmpC(task[0], "2"))
                ajStrAssignC(&taskstr, "pick_left_only");
            else if(!ajStrCmpC(task[0], "3"))
                ajStrAssignC(&taskstr, "pick_right_only");
            else if(!ajStrCmpC(task[0], "4"))
                ajStrAssignC(&taskstr, "pick_hyb_probe_only");

            if (!do_primer)
                ajStrAssignC(&taskstr, "pick_hyb_probe_only");
        }
        else
        {
            if(!ajStrCmpC(task[0], "1"))
                ajStrAssignC(&taskstr, "pick_pcr_primers");
            else if(!ajStrCmpC(task[0], "2"))
                ajStrAssignC(&taskstr, "pick_left_only");
            else if(!ajStrCmpC(task[0], "3"))
                ajStrAssignC(&taskstr, "pick_right_only");
            else if(!ajStrCmpC(task[0], "4"))
                ajStrAssignC(&taskstr, "pick_hyb_probe_only");
        }

        eprimer3_send_string(stream, "PRIMER_TASK", taskstr);
        eprimer3_send_int(stream, "PRIMER_NUM_RETURN", num_return);
        eprimer3_send_int(stream, "PRIMER_FIRST_BASE_INDEX",
                          first_base_index);
        eprimer3_send_bool(stream, "PRIMER_PICK_ANYWAY", pick_anyway);

        /* mispriming library may not have been specified */
        if(mispriming_library)
            eprimer3_send_stringC(stream, "PRIMER_MISPRIMING_LIBRARY",
                                  ajFileGetPrintnameC(mispriming_library));

        eprimer3_send_float(stream, "PRIMER_MAX_MISPRIMING",
                            max_mispriming);
        eprimer3_send_float(stream, "PRIMER_PAIR_MAX_MISPRIMING",
                            pair_max_mispriming);
        eprimer3_send_int(stream, "PRIMER_GC_CLAMP", gc_clamp);
        eprimer3_send_int(stream, "PRIMER_OPT_SIZE", opt_size);
        eprimer3_send_int(stream, "PRIMER_MIN_SIZE", min_size);
        eprimer3_send_int(stream, "PRIMER_MAX_SIZE", max_size);
        eprimer3_send_float(stream, "PRIMER_OPT_TM", opt_tm);
        eprimer3_send_float(stream, "PRIMER_MIN_TM", min_tm);
        eprimer3_send_float(stream, "PRIMER_MAX_TM", max_tm);
        eprimer3_send_float(stream, "PRIMER_MAX_DIFF_TM", max_diff_tm);
        eprimer3_send_float(stream, "PRIMER_OPT_GC_PERCENT",
                            opt_gc_percent);
        eprimer3_send_float(stream, "PRIMER_MIN_GC", min_gc);
        eprimer3_send_float(stream, "PRIMER_MAX_GC", max_gc);
        eprimer3_send_float(stream, "PRIMER_SALT_CONC", salt_conc);
        eprimer3_send_float(stream, "PRIMER_DNA_CONC", dna_conc);
        eprimer3_send_int(stream, "PRIMER_NUM_NS_ACCEPTED",
                          num_ns_accepted);
        eprimer3_send_float(stream, "PRIMER_SELF_ANY", self_any);
        eprimer3_send_float(stream, "PRIMER_SELF_END", self_end);
        eprimer3_send_int(stream, "PRIMER_MAX_POLY_X", max_poly_x);
        eprimer3_send_int(stream, "PRIMER_PRODUCT_OPT_SIZE",
                          product_opt_size);
        eprimer3_send_range2(stream, "PRIMER_PRODUCT_SIZE_RANGE",
                             product_size_range);
        eprimer3_send_float(stream, "PRIMER_PRODUCT_OPT_TM",
                            product_opt_tm);
        eprimer3_send_float(stream, "PRIMER_PRODUCT_MIN_TM",
                            product_min_tm);
        eprimer3_send_float(stream, "PRIMER_PRODUCT_MAX_TM",
                            product_max_tm);
        eprimer3_send_float(stream, "PRIMER_MAX_END_STABILITY",
                            max_end_stability);

        /* send primer3 Internal Oligo "Global" parameters */
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_OPT_SIZE",
                          internal_oligo_opt_size);
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MIN_SIZE",
                          internal_oligo_min_size);
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_SIZE",
                          internal_oligo_max_size);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_TM",
                            internal_oligo_opt_tm);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_TM",
                            internal_oligo_min_tm);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_TM",
                            internal_oligo_max_tm);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_OPT_GC_PERCENT",
                            internal_oligo_opt_gc_percent);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MIN_GC",
                            internal_oligo_min_gc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_GC",
                            internal_oligo_max_gc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SALT_CONC",
                            internal_oligo_salt_conc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_DNA_CONC",
                            internal_oligo_dna_conc);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_ANY",
                            internal_oligo_self_any);
        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_SELF_END",
                            internal_oligo_self_end);
        eprimer3_send_int(stream, "PRIMER_INTERNAL_OLIGO_MAX_POLY_X",
                          internal_oligo_max_poly_x);

        /*
        ** internal oligo mishybridising library may not have been
        ** specified
        */
        if(internal_oligo_mishyb_library)
            eprimer3_send_stringC(stream,
                                  "PRIMER_INTERNAL_OLIGO_MISHYB_LIBRARY",
                                  ajFileGetPrintnameC(internal_oligo_mishyb_library));

        eprimer3_send_float(stream, "PRIMER_INTERNAL_OLIGO_MAX_MISHYB",
                            internal_oligo_max_mishyb);


        /*
        ** Start sequence-specific stuff
        */

        begin = ajSeqallGetseqBegin(sequence) - 1;
        end   = ajSeqallGetseqEnd(sequence) - 1;

        strand = ajSeqGetSeqCopyS(seq);

        ajStrFmtUpper(&strand);
        ajStrAssignSubC(&substr,ajStrGetPtr(strand), begin, end);

        /* send flags to turn on using optimal product size */
        eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_GT",
                            (float)0.05);
        eprimer3_send_float(stream, "PRIMER_PAIR_WT_PRODUCT_SIZE_LT",
                            (float)0.05);

        /* send primer3 Primer "Sequence" parameters */
        eprimer3_send_string(stream, "SEQUENCE", substr);

        /* if no ID name, use the USA */
        if(ajStrMatchC(ajSeqGetNameS(seq),""))
            eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID",
                                 ajSeqGetUsaS(seq));
        else
            eprimer3_send_string(stream, "PRIMER_SEQUENCE_ID",
                                 ajSeqGetNameS(seq));

        eprimer3_send_range(stream, "INCLUDED_REGION", included_region,
                            begin);
        eprimer3_send_range(stream, "TARGET", target, begin);
        eprimer3_send_range(stream, "EXCLUDED_REGION", excluded_region,
                            begin);
        eprimer3_send_string(stream, "PRIMER_LEFT_INPUT", left_input);
        eprimer3_send_string(stream, "PRIMER_RIGHT_INPUT", right_input);

        /* send primer3 Internal Oligo "Sequence" parameters */
        eprimer3_send_range(stream,
                            "PRIMER_INTERNAL_OLIGO_EXCLUDED_REGION",
                            internal_oligo_excluded_region, begin);
        eprimer3_send_string(stream, "PRIMER_INTERNAL_OLIGO_INPUT",
                             internal_oligo_input);


        /* end the primer3 input sequence record with a '=' */
        eprimer3_send_end(stream);
        /* and close the ouput pipe stream */
        eprimer3_end_write(stream);

        /* read the primer3 output */
        eprimer3_read(pipefrom[0], &result);

        eprimer3_report(outfile, result, num_return, begin);

        ajStrSetClear(&result);

#ifndef WIN32
        close(pipeto[1]);
        close(pipefrom[0]);
#endif
    }	/* end of sequence loop */


    ajStrDel(&result);
    ajSeqDel(&seq);
    ajStrDel(&strand);
    ajStrDel(&substr);
    ajFileClose(&outfile);
    ajStrDel(&taskstr);
    ajStrDelarray(&task);

    ajSeqallDel(&sequence);
    ajSeqDel(&seq);

    ajRangeDel(&included_region);
    ajRangeDel(&target);
    ajRangeDel(&excluded_region);
    ajRangeDel(&product_size_range);
    ajRangeDel(&internal_oligo_excluded_region);

    ajStrDel(&left_input);
    ajStrDel(&right_input);
    ajStrDel(&internal_oligo_input);

    AJFREE(pipeto);
    AJFREE(pipefrom);

    ajFileClose(&mispriming_library);

    embExit();

    return 0;
}
示例#16
0
EmbPMatPrints embMatProtReadInt(AjPFile fp)
{
    EmbPMatPrints ret;
    AjPStr line;

    ajint i;
    ajuint j;
    ajuint m;
    const char *p;

    line = ajStrNewC("#");

    p = ajStrGetPtr(line);

    while(!*p || *p=='#' || *p=='!' || *p=='\n')
    {
	if(!ajReadlineTrim(fp,&line))
	{
	    ajStrDel(&line);
	    return NULL;
	}

	p = ajStrGetPtr(line);
    }

    ajDebug("embMatProtReadint starting\n");
    ajDebug ("Line: %S\n", line);

    AJNEW0 (ret);

    ret->cod = ajStrNew();
    ajStrAssignS(&ret->cod,line);

    ajReadlineTrim(fp,&line);
    ret->acc = ajStrNew();
    ajStrAssignS(&ret->acc,line);
    ajReadlineTrim(fp,&line);
    ajStrToUint(line,&ret->n);
    ajReadlineTrim(fp,&line);
    ret->tit = ajStrNew();
    ajStrAssignS(&ret->tit,line);

    ajDebug ("Lineb: %S\n", line);
    AJCNEW(ret->len, ret->n);
    AJCNEW(ret->max, ret->n);
    AJCNEW(ret->thresh, ret->n);
    AJCNEW(ret->matrix, ret->n);

    for(m=0;m<ret->n;++m)
    {
	ajReadlineTrim(fp,&line);
	ajStrToUint(line,&ret->len[m]);
	ajReadlineTrim(fp,&line);
	ajStrToUint(line,&ret->thresh[m]);
	ajReadlineTrim(fp,&line);
	ajStrToUint(line,&ret->max[m]);
	ajDebug ("m: %d/%d len:%d thresh:%d max:%d\n",
		 m, ret->n, ret->len[m], ret->thresh[m], ret->max[m]);

	for(i=0;i<26;++i)
	{
	    AJCNEW0(ret->matrix[m][i], ret->len[m]);
	    ajReadlineTrim(fp,&line);
	    ajDebug ("Linec [%d][%d]: %S\n", m, i, line);
	    p = ajStrGetPtr(line);

	    for(j=0;j<ret->len[m];++j)
	    {
		if(!j)
		    p = ajSysFuncStrtok(p," ");
		else
		    p = ajSysFuncStrtok(NULL," ");

		sscanf(p,"%u",&ret->matrix[m][i][j]);
	    }
	}
    }

    ajReadlineTrim(fp,&line);
    ajDebug ("Linec: %S\n", line);

    ajStrDel(&line);

    return ret;
}
示例#17
0
int main(int argc, char **argv)
{
    AjPFile outf = NULL;

    AjPSeq sequence = NULL;
    AjPStr substr   = NULL;
    AjPStr seqstr = NULL;
    AjPStr revstr = NULL;

    AjPStr p1;
    AjPStr p2;

    PPrimer eric = NULL;
    PPrimer fred = NULL;

    PPrimer f;
    PPrimer r;

    PPair pair;

    AjPList forlist  = NULL;
    AjPList revlist  = NULL;
    AjPList pairlist = NULL;

    AjBool targetrange;
    AjBool isDNA  = ajTrue;
    AjBool dolist = ajFalse;

    ajint primerlen    = 0;
    ajint minprimerlen = 0;
    ajint maxprimerlen = 0;
    ajint minprodlen   = 0;
    ajint maxprodlen   = 0;
    ajint prodlen      = 0;

    ajint seqlen = 0;
    ajint stepping_value = 1;

    ajint targetstart = 0;
    ajint targetend   = 0;

    ajint limit    = 0;
    ajint limit2   = 0;
    ajint lastpos  = 0;
    ajint startpos = 0;
    ajint endpos   = 0;

    ajint begin;
    ajint end;
    ajint v1;
    ajint v2;

    ajint overlap;

    float minpmGCcont   = 0.;
    float maxpmGCcont   = 0.;
    float minprodGCcont = 0.;
    float maxprodGCcont = 0.;
    float prodTm;
    float prodGC;

    ajint i;
    ajint j;

    ajint neric=0;
    ajint nfred=0;
    ajint npair=0;

    float minprimerTm = 0.0;
    float maxprimerTm = 0.0;

    float saltconc = 0.0;
    float dnaconc  = 0.0;

    embInit ("prima", argc, argv);

    substr = ajStrNew();

    forlist  = ajListNew();
    revlist  = ajListNew();
    pairlist = ajListNew();

    p1 = ajStrNew();
    p2 = ajStrNew();


    sequence = ajAcdGetSeq("sequence");
    outf     = ajAcdGetOutfile("outfile");

    minprimerlen = ajAcdGetInt("minprimerlen");
    maxprimerlen = ajAcdGetInt("maxprimerlen");
    minpmGCcont  = ajAcdGetFloat("minpmGCcont");
    maxpmGCcont  = ajAcdGetFloat("maxpmGCcont");
    minprimerTm  = ajAcdGetFloat("mintmprimer");
    maxprimerTm  = ajAcdGetFloat("maxtmprimer");

    minprodlen    = ajAcdGetInt("minplen");
    maxprodlen    = ajAcdGetInt("maxplen");
    minprodGCcont = ajAcdGetFloat("minpgccont");
    maxprodGCcont = ajAcdGetFloat("maxpgccont");

    saltconc = ajAcdGetFloat("saltconc");
    dnaconc  = ajAcdGetFloat("dnaconc");

    targetrange = ajAcdGetToggle("targetrange");
    targetstart = ajAcdGetInt("targetstart");
    targetend   = ajAcdGetInt("targetend");

    overlap = ajAcdGetInt("overlap");
    dolist  = ajAcdGetBoolean("list");

    seqstr = ajSeqGetSeqCopyS(sequence);
    ajStrFmtUpper(&seqstr);

    begin  = ajSeqGetBegin(sequence);
    end    = ajSeqGetEnd(sequence);
    seqlen = end-begin+1;

    ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1);
    revstr = ajStrNewC(ajStrGetPtr(substr));
    ajSeqstrReverse(&revstr);

    AJCNEW0(entropy, seqlen);
    AJCNEW0(enthalpy, seqlen);
    AJCNEW0(energy, seqlen);

    /* Initialise Tm calculation arrays */
    ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1,
	  &entropy, &enthalpy, &energy);


    ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n");
    ajFmtPrintF(outf, "*************\n\n");

    if(targetrange)
	ajFmtPrintF
	    (outf, "Prima of %s from positions %d to %d bps\n",
	     ajSeqGetNameC(sequence),targetstart, targetend);
    else
	ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence));

    ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n");
    ajFmtPrintF
	(outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR ");
    ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n");
    ajFmtPrintF(outf,
		"Primer size range is %d-%d\n",minprimerlen,maxprimerlen);
    ajFmtPrintF(outf,
		"Primer GC content range is %.2f-%.2f\n",minpmGCcont,
		maxpmGCcont);
    ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n",
		minprimerTm, maxprimerTm);

    ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n");

    ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n",
		minprodGCcont, maxprodGCcont);

    ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc);
    ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc);



    if(targetrange)
	ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n",
		    targetstart,targetend);
    else
    {
	ajFmtPrintF(outf,"Considering all suitable Primer pairs with ");
	ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen,
		    maxprodlen);
    }


    ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n");
    ajFmtPrintF(outf, "*****************************************\n\n");


    if(seqlen-minprimerlen < 0)
	ajFatal("Sequence too short");

    if(targetrange)
    {
	ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin);

	prodGC = ajMeltGC(substr,seqlen);
	prodTm = ajMeltTempProd(prodGC,saltconc,seqlen);

	if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
	{
	    ajFmtPrintF(outf,
			"Product GC content [%.2f] outside acceptable range\n",
			prodGC);
	    embExitBad();
	    return 0;
	}

	prima_testtarget(substr, revstr, targetstart-begin, targetend-begin,
			 minprimerlen, maxprimerlen,
			 seqlen, minprimerTm, maxprimerTm, minpmGCcont,
			 maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc,
			 dnaconc, pairlist, &npair);
    }



    if(!targetrange)
    {

    limit   = seqlen-minprimerlen-minprodlen+1;
    lastpos = seqlen-minprodlen;
    limit2  = maxprodlen-minprodlen;

    /* Outer loop selects all possible product start points */
    for(i=minprimerlen; i<limit; ++i)
    {
	startpos = i;
	ajDebug("Position in sequence %d\n",startpos);
	endpos = i+minprodlen-1;
	/* Inner loop selects all possible product lengths  */
	for(j=0; j<limit2; ++j, ++endpos)
	{
	    if(endpos>lastpos)
		break;

	    v1 = endpos-startpos+1;
	    ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos);
	    prodGC = ajMeltGC(p1,v1);
	    prodTm = ajMeltTempProd(prodGC,saltconc,v1);

	    if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
		continue;

	    /* Only accept primers with acceptable Tm and GC */
	    neric = 0;
	    nfred = 0;
	    prima_testproduct(substr, startpos, endpos, primerlen,
			      minprimerlen, maxprimerlen,minpmGCcont,
			      maxpmGCcont, minprimerTm, maxprimerTm,
			      minprodlen, maxprodlen, prodTm, prodGC, seqlen,
			      &eric,&fred,forlist,revlist,&neric,&nfred,
			      stepping_value, saltconc,dnaconc, isDNA, begin);
	    if(!neric)
		continue;



	    /* Now reject those primers with self-complementarity */

	    prima_reject_self(forlist,revlist,&neric,&nfred);
	    if(!neric)
		continue;

	    /* Reject any primers that could bind elsewhere in the
               sequence */
	    prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr,
			     seqlen);



	    /* Now select the least complementary pair (if any) */
	    prima_best_primer(forlist, revlist, &neric, &nfred);
	    if(!neric)
		continue;

	    AJNEW(pair);
	    ajListPop(forlist,(void **)&f);
	    ajListPop(revlist,(void **)&r);
	    pair->f = f;
	    pair->r = r;
	    ++npair;
	    ajListPush(pairlist,(void *)pair);
	}
     }

  }


    if(!targetrange)
    {
	/* Get rid of primer pairs nearby the top scoring ones */
	prima_TwoSortscorepos(&pairlist);
	prima_prune_nearby(pairlist, &npair, maxprimerlen-1);
	ajListSort(pairlist,prima_PosCompare);
	prima_check_overlap(pairlist,&npair,overlap);
    }



    if(npair)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"%d pairs found\n\n",npair);
	else
	    ajFmtPrintF(outf,
			"Closest primer pair to specified product is:\n\n");

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	    ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n");
    }



    for(i=0;i<npair;++i)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"[%d]\n",i+1);

	ajListPop(pairlist,(void **)&pair);


	prodlen = pair->r->start - (pair->f->start + pair->f->primerlen);

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	{
	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;

	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1),
			v2+begin);


	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajFmtPrintF(outf,
			"%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin);


	    ajFmtPrintF(outf,"       Tm  %.2f C  (GC %.2f%%)\t\t       ",
			pair->f->primerTm,pair->f->primGCcont*100.);
	    ajFmtPrintF(outf,"Tm  %.2f C  (GC %.2f%%)\n",
			pair->r->primerTm,pair->r->primGCcont*100.);

	    ajFmtPrintF(outf,"             Length: %-32dLength: %d\n",
			pair->f->primerlen,pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:    %.2f C\t\t\t",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));
	    ajFmtPrintF(outf,"     Tma:    %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));


	    ajFmtPrintF(outf,"       Product GC: %.2f%%\n",
			pair->f->prodGC * 100.0);
	    ajFmtPrintF(outf,"       Product Tm: %.2f C\n",
			pair->f->prodTm);
	    ajFmtPrintF(outf,"       Length:     %d\n\n\n",prodlen);
	}
	else
	{
	    ajFmtPrintF(outf,"    Product from %d to %d\n",pair->f->start+
			pair->f->primerlen+begin,pair->r->start-1+begin);
	    ajFmtPrintF(outf,"                 Tm: %.2f C   GC: %.2f%%\n",
			pair->f->prodTm,pair->f->prodGC*(float)100.);
	    ajFmtPrintF(outf,"                 Length: %d\n\n\n",prodlen);


	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Forward: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->f->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->f->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->f->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));

	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Reverse: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->r->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->r->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));
	}

	prima_PrimerDel(&pair->f);
	prima_PrimerDel(&pair->r);
	AJFREE(pair);
    }



    ajStrDel(&seqstr);
    ajStrDel(&revstr);
    ajStrDel(&substr);
    ajStrDel(&p1);
    ajStrDel(&p2);

    ajListFree(&forlist);
    ajListFree(&revlist);
    ajListFree(&pairlist);

    ajFileClose(&outf);
    ajSeqDel(&sequence);

    AJFREE(entropy);
    AJFREE(enthalpy);
    AJFREE(energy);

    embExit();

    return 0;
}
示例#18
0
文件: needle.c 项目: Katzlab/Pipeline
int main(int argc, char **argv)
{
    AjPAlign align;
    AjPSeqall seqall;
    AjPSeq a;
    AjPSeq b;
    AjPStr alga;
    AjPStr algb;
    AjPStr ss;

    ajuint    lena;
    ajuint    lenb;

    const char   *p;
    const char   *q;

    ajint start1 = 0;
    ajint start2 = 0;

    ajint *compass;
    float* ix;
    float* iy;
    float* m;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;

    float gapopen;
    float gapextend;
    float endgapopen;
    float endgapextend;
    size_t maxarr = 1000; 	/* arbitrary. realloc'd if needed */
    size_t len;

    float score;

    AjBool dobrief = ajTrue;
    AjBool endweight = ajFalse; /* should end gap penalties be applied */

    float id   = 0.;
    float sim  = 0.;
    float idx  = 0.;
    float simx = 0.;

    AjPStr tmpstr = NULL;

    embInit("needle", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    a         = ajAcdGetSeq("asequence");
    ajSeqTrim(a);
    seqall    = ajAcdGetSeqall("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    endgapopen   = ajAcdGetFloat("endopen");
    endgapextend = ajAcdGetFloat("endextend");
    dobrief   = ajAcdGetBoolean("brief");
    endweight   = ajAcdGetBoolean("endweight");

    align     = ajAcdGetAlign("outfile");

    gapopen = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    AJCNEW0(compass, maxarr);
    AJCNEW0(m, maxarr);
    AJCNEW0(ix, maxarr);
    AJCNEW0(iy, maxarr);

    alga  = ajStrNew();
    algb  = ajStrNew();
    ss = ajStrNew();

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    lena = ajSeqGetLen(a);

    while(ajSeqallNext(seqall,&b))
    {
	ajSeqTrim(b);
	lenb = ajSeqGetLen(b);

	if(lenb > (LONG_MAX/(size_t)(lena+1)))
	   ajDie("Sequences too big. Try 'stretcher'");

	len = (size_t)lena*(size_t)lenb;

	if(len>maxarr)
	{
	    AJCRESIZETRY0(compass,(size_t)maxarr,len);
	    if(!compass)
		ajDie("Sequences too big. Try 'stretcher'");
	    AJCRESIZETRY0(m,(size_t)maxarr,len);
	    if(!m)
		ajDie("Sequences too big. Try 'stretcher'");
	    AJCRESIZETRY0(ix,(size_t)maxarr,len);
	    if(!ix)
		ajDie("Sequences too big. Try 'stretcher'");
	    AJCRESIZETRY0(iy,(size_t)maxarr,len);
	    if(!iy)
		ajDie("Sequences too big. Try 'stretcher'");
	    maxarr=len;
	}


	p = ajSeqGetSeqC(a);
	q = ajSeqGetSeqC(b);

	ajStrAssignC(&alga,"");
	ajStrAssignC(&algb,"");

	score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb,
	        gapopen, gapextend, endgapopen, endgapextend,
	        &start1, &start2, sub, cvt,
	        m, ix, iy, compass, ajFalse, endweight);



	embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb,
	        lena, lenb, &start1, &start2,
	        compass);
		
	embAlignReportGlobal(align, a, b, alga, algb,
			     start1, start2,
			     gapopen, gapextend,
			     score, matrix,
			     ajSeqGetOffset(a), ajSeqGetOffset(b));

	if(!dobrief)
	{
	  embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,&sim,&idx,
				 &simx);
	  ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n",
			 id);
	  ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n",
			 sim);
	  ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n",
			 idx);
	  ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%",
			 simx);
	  ajAlignSetSubHeaderApp(align, tmpstr);
	}
	ajAlignWrite(align);
	ajAlignReset(align);

    }

    ajAlignClose(align);
    ajAlignDel(&align);

    ajSeqallDel(&seqall);
    ajSeqDel(&a);
    ajSeqDel(&b);

    AJFREE(compass);
    AJFREE(ix);
    AJFREE(iy);
    AJFREE(m);

    ajStrDel(&alga);
    ajStrDel(&algb);
    ajStrDel(&ss);
    ajStrDel(&tmpstr);

    embExit();

    return 0;
}
示例#19
0
int main(int argc, char **argv)
{
    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPFile libr=NULL;
    AjPStr idformat = NULL;

    EmbPEntry entry;

    ajuint idtype  = 0;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listInputFiles = NULL;
    void ** inputFiles = NULL;
    ajuint nfiles;
    ajuint ifile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile  = NULL;

    AjPStr* divfiles   = NULL;
    AjPRegexp regIdExp      = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i;

    embInit("dbifasta", argc, argv);

    idformat   = ajAcdGetListSingle("idformat");
    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint)maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }

    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();

    regIdExp = dbifasta_getExpr(idformat, &idtype);

    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);

    listInputFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listInputFiles, &ajStrVcmp);
    nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);

    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) inputFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);

    /*
    ** process each input file, one at a time
    */

    for(ifile=0; ifile < nfiles; ifile++)
    {
	ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]);
	embDbiFlatOpenlib(curfilename, &libr);
	ajFilenameTrimPath(&curfilename);
	if(ajStrGetLen(curfilename) >= maxfilelen)
	    maxfilelen = ajStrGetLen(curfilename) + 1;

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%F' ...\n", libr);
	ajStrAssignS(&divfiles[ifile], curfilename);

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, ifile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbifasta_NextFlatEntry(libr, ifile,
					    regIdExp, idtype,
					    systemsort, fields, 
					    maxFieldLen, &maxidlen,
					    countField, elistfile,
					    alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
		embDbiMemEntry(idlist, fieldList, nfields,
			       entry, ifile);
		entry = NULL;
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    AJFREE(entry);
	}
	else
	{
	    embDbiEntryDel(&dbifastaGEntry);
	}
	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
    }

    /*  write the division.lkp file */
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);

    /* Write the entryname.idx index */
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);

    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);

    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }

    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);

    /* Write the fields index files */
    for(ifield=0; ifield < nfields; ifield++)
    {
        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }

    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);

    ajStrDel(&idformat);
    ajStrDelarray(&fields);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&tmpfname);
    ajFileClose(&libr);
    ajFileClose(&logfile);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], &embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }

    AJFREE(alistfile);
    AJFREE(fieldList);
    AJFREE(maxFieldLen);
    AJFREE(countField);
    AJFREE(fieldTot);

    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }

    AJFREE(divfiles);
    AJFREE(inputFiles);

    embDbiEntryDel(&dbifastaGEntry);

    ajStrDel(&dbifastaGRline);
    ajStrDel(&dbifastaGTmpId);

    if(dbifastaGFdl)
    {
	for(i=0; i < nfields; i++)
	    ajListFree(&dbifastaGFdl[i]);
	AJFREE(dbifastaGFdl);
    }

    ajListMap(idlist, &embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    ajListstrFreeData(&listInputFiles);
    AJFREE(entryIds);
    ajRegFree(&dbifastaGIdexp);
    ajRegFree(&dbifastaGWrdexp);
    ajRegFree(&regIdExp);

    ajStrDel(&dbifastaGTmpAc);
    ajStrDel(&dbifastaGTmpSv);
    ajStrDel(&dbifastaGTmpGi);
    ajStrDel(&dbifastaGTmpDb);
    ajStrDel(&dbifastaGTmpDes);
    ajStrDel(&dbifastaGTmpFd);
    ajStrDel(&curfilename);

    embExit();

    return 0;
}
示例#20
0
int main(int argc, char **argv)
{
    AjPSeqset seqset;
    AjPSeqall seqall;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    ajint wordlen;
    AjPTable wordsTable = NULL;
    AjPList* matchlist = NULL;
    AjPFile logfile;
    AjPFeattable* seqsetftables = NULL;
    AjPFeattable seqallseqftable = NULL;
    AjPFeattabOut ftoutforseqsetseq = NULL;
    AjPFeattabOut ftoutforseqallseq = NULL;
    AjPAlign align = NULL;
    AjIList iter = NULL;
    ajint targetstart;
    ajint querystart;
    ajint len;
    ajuint i, j;
    ajulong nAllMatches = 0;
    ajulong sumAllScore = 0;
    AjBool dumpAlign = ajTrue;
    AjBool dumpFeature = ajTrue;
    AjBool checkmode = ajFalse;
    EmbPWordRK* wordsw = NULL;
    ajuint npatterns = 0;
    ajuint seqsetsize;
    ajuint nmatches;
    ajuint* nmatchesseqset;
    ajuint* lastlocation; /* Cursors for Rabin-Karp search. */
                          /* Shows until what point the query sequence was
                           *  scanned for a pattern sequences in the seqset.
                          */
    char* paddedheader = NULL;
    const char* header;
    AjPStr padding;

    header = "Pattern %S  #pat-sequences  #all-matches  avg-match-length\n";
    padding = ajStrNew();

    embInit("wordmatch", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seqset  = ajAcdGetSeqset("asequence");
    seqall  = ajAcdGetSeqall("bsequence");
    logfile = ajAcdGetOutfile("logfile");
    dumpAlign = ajAcdGetToggle("dumpalign");
    dumpFeature = ajAcdGetToggle("dumpfeat");

    if(dumpAlign)
    {
        align = ajAcdGetAlign("outfile");
        ajAlignSetExternal(align, ajTrue);
    }

    seqsetsize = ajSeqsetGetSize(seqset);
    ajSeqsetTrim(seqset);
    AJCNEW0(matchlist, seqsetsize);
    AJCNEW0(seqsetftables, seqsetsize);
    AJCNEW0(nmatchesseqset, seqsetsize);

    if (dumpFeature)
    {
        ftoutforseqsetseq =  ajAcdGetFeatout("aoutfeat");
        ftoutforseqallseq =  ajAcdGetFeatout("boutfeat");
    }

    checkmode = !dumpFeature && !dumpAlign;
    embWordLength(wordlen);

    ajFmtPrintF(logfile, "Small sequence/file for constructing"
	    " target patterns: %S\n", ajSeqsetGetUsa(seqset));
    ajFmtPrintF(logfile, "Large sequence/file to be scanned"
	    " for patterns: %S\n", ajSeqallGetUsa(seqall));
    ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n",
            seqsetsize);
    ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen);

    for(i=0;i<seqsetsize;i++)
    {
        targetseq = ajSeqsetGetseqSeq(seqset, i);
        embWordGetTable(&wordsTable, targetseq);
    }

    AJCNEW0(lastlocation, seqsetsize);

    if(ajTableGetLength(wordsTable)>0)
    {
        npatterns = embWordRabinKarpInit(wordsTable,
                                       &wordsw, wordlen, seqset);
        ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns);

        while(ajSeqallNext(seqall,&queryseq))
        {
            for(i=0;i<seqsetsize;i++)
            {
                lastlocation[i]=0;

                if (!checkmode)
                    matchlist[i] = ajListstrNew();
            }

            nmatches = embWordRabinKarpSearch(
                    ajSeqGetSeqS(queryseq), seqset,
                    (EmbPWordRK const *)wordsw, wordlen, npatterns,
                    matchlist, lastlocation, checkmode);
            nAllMatches += nmatches;

            if (checkmode)
        	continue;

            for(i=0;i<seqsetsize;i++)
            {
                if(ajListGetLength(matchlist[i])>0)
                {
                    iter = ajListIterNewread(matchlist[i]) ;

                    while(embWordMatchIter(iter, &targetstart, &querystart, &len,
                            &targetseq))
                    {
                        if(dumpAlign)
                        {
                            ajAlignDefineSS(align, targetseq, queryseq);
                            ajAlignSetScoreI(align, len);
                            /* ungapped alignment means same length
                             *  for both sequences
                            */
                            ajAlignSetSubRange(align, targetstart, 1, len,
                                    ajSeqIsReversed(targetseq),
                                    ajSeqGetLen(targetseq),
                                    querystart, 1, len,
                                    ajSeqIsReversed(queryseq),
                                    ajSeqGetLen(queryseq));
                        }
                    }

                    if(dumpAlign)
                    {
                	ajAlignWrite(align);
                	ajAlignReset(align);
                    }

                    if(ajListGetLength(matchlist[i])>0 && dumpFeature)
                    {
                        embWordMatchListConvToFeat(matchlist[i],
                                                   &seqsetftables[i],
                                                   &seqallseqftable,
                                                   targetseq, queryseq);
                        ajFeattableWrite(ftoutforseqallseq, seqallseqftable);
                        ajFeattableDel(&seqallseqftable);
                    }

                    ajListIterDel(&iter);
                }

                embWordMatchListDelete(&matchlist[i]);
            }
        }

        /* search completed, now report statistics */
        for(i=0;i<npatterns;i++)
        {
            sumAllScore += wordsw[i]->lenMatches;

            for(j=0;j<wordsw[i]->nseqs;j++)
        	nmatchesseqset[wordsw[i]->seqindxs[j]] +=
        		wordsw[i]->nSeqMatches[j];
        }

        ajFmtPrintF(logfile, "Number of sequences in the file scanned "
                "for patterns: %u\n", ajSeqallGetCount(seqall));
        ajFmtPrintF(logfile, "Number of all matches: %Lu"
                " (wordmatch finds exact matches only)\n", nAllMatches);

        if(nAllMatches>0)
        {
            ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore);
            ajFmtPrintF(logfile, "Average match length: %.2f\n",
        	    sumAllScore*1.0/nAllMatches);

            ajFmtPrintF(logfile, "\nDistribution of the matches among pattern"
        	    " sequences:\n");
            ajFmtPrintF(logfile, "-----------------------------------------"
        	    "-----------\n");

            for(i=0;i<ajSeqsetGetSize(seqset);i++)
            {
        	if (nmatchesseqset[i]>0)
        	    ajFmtPrintF(logfile, "%-42s: %8u\n",
        	                ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)),
        	                nmatchesseqset[i]);

        	ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]);
        	ajFeattableDel(&seqsetftables[i]);
            }

            ajFmtPrintF(logfile, "\nPattern statistics:\n");
            ajFmtPrintF(logfile, "-------------------\n");
            if(wordlen>7)
        	ajStrAppendCountK(&padding, ' ', wordlen-7);
            paddedheader = ajFmtString(header,padding);
            ajFmtPrintF(logfile, paddedheader);

            for(i=0;i<npatterns;i++)
        	if (wordsw[i]->nMatches>0)
        	    ajFmtPrintF(logfile, "%-7s: %12u  %12u %17.2f\n",
        	                wordsw[i]->word->fword, wordsw[i]->nseqs,
        	                wordsw[i]->nMatches,
        	                wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches);
        }

    }

    for(i=0;i<npatterns;i++)
    {
        for(j=0;j<wordsw[i]->nseqs;j++)
            AJFREE(wordsw[i]->locs[j]);

        AJFREE(wordsw[i]->locs);
        AJFREE(wordsw[i]->seqindxs);
        AJFREE(wordsw[i]->nnseqlocs);
        AJFREE(wordsw[i]->nSeqMatches);
        AJFREE(wordsw[i]);
    }

    embWordFreeTable(&wordsTable);

    AJFREE(wordsw);
    AJFREE(matchlist);
    AJFREE(lastlocation);
    AJFREE(nmatchesseqset);
    AJFREE(seqsetftables);

    if(dumpAlign)
    {
        ajAlignClose(align);
        ajAlignDel(&align);
    }

    if(dumpFeature)
    {
        ajFeattabOutDel(&ftoutforseqsetseq);
        ajFeattabOutDel(&ftoutforseqallseq);
    }

    ajFileClose(&logfile);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&queryseq);
    ajStrDel(&padding);
    AJFREE(paddedheader);

    embExit();

    return 0;
}