Example #1
0
static void acdrelations_readtypefile
            (AjPFile inf, 
	     PKtype *T)
{
    AjPStr     line    = NULL;
    PKtypedat  dattmp  = NULL;
    AjPList    datlist = NULL;
    
    if(!T)
        ajFatal("Null arg error 1 in acdrelations_readtypefile");
    if(!inf)
        ajFatal("Null arg error 3 in acdrelations_readtypefile");


    /* Allocate memory */
    line           = ajStrNew();
    datlist        = ajListNew();

    
    /* Read data from file */
    while(ajReadline(inf,&line))
    {
        /* Discard comment lines */
        if(ajStrPrefixC(line,"#")) 
            continue;

        /* Create object for holding line */
        dattmp = ajKtypedatNew();
        
        /* Tokenise line delimited by '|'
           Parse first token (value of knowntype: attribute) */
        ajStrAssignS(&dattmp->ktype, ajStrParseC(line, "|"));
        ajStrRemoveSetC(&dattmp->ktype, "_");
        ajStrRemoveWhite(&dattmp->ktype);
        
        /* Parse second token (ACD datatype) */
        ajStrAssignS(&dattmp->acdtype, ajStrParseC(NULL, "|"));

        /* Parse third token (EDAM relations: value ) */
        ajStrAssignS(&dattmp->edam, ajStrParseC(NULL, "|"));

        /* Push line onto list */
        ajListPushAppend(datlist, dattmp);
    }
    

    /* Write PKtype structure */
    ((*T)->n) = ajListToarray(datlist, (void***) &((*T)->dat));
  
    
    /* Free memory */
    ajStrDel(&line);
    ajListFree(&datlist);

    return;
}
Example #2
0
/* @funcstatic sigscanlig_WriteFastaHit ***************************************
**
** Write a Hit from a Hitlist object to an output file in embl-like format
** (see documentation for the DOMAINATRIX "seqsearch" application).
** Text for Class, Fold, Superfamily and Family is only written if the text
** is available.
** 
** @param [u] outf [AjPFile] Output file stream
** @param [r] hits [const AjPList] List of hit objects.
** @param [r] n  [ajint] Number of hit to generate.
** @param [r] DOSEQ  [AjBool] True if sequence is to be printed. 
**
** @return [AjBool] True on success
** @@
******************************************************************************/
AjBool sigscanlig_WriteFastaHit(AjPFile outf, AjPList hits, ajint n,
				AjBool DOSEQ)
{
/* AjBool sigscanlig_WriteFastaHit(AjPFile outf, AjPList siglist,
                                   AjPList hits, ajint n, AjBool DOSEQ) */
    EmbPHit hit       = NULL;
    EmbPSignature sig = NULL;
    
    /* EmbPSignature *sigarr = NULL; */
    EmbPHit *hitarr = NULL;
    ajint  sizarr=0;
    
        
/*    if(!outf || !siglist || !hits)
	return ajFalse; */

    if(!outf || !hits)
	return ajFalse;

    
    /* sizarr = ajListToarray(siglist, (void ***) &sigarr);
    if(sizarr != ajListToarray(hits, (void ***) &hitarr))
       ajFatal("Arrays are different sizes"); */
        
    sizarr = ajListToarray(hits, (void ***) &hitarr);
    

    if(n>=sizarr)
	ajFatal("Requested hit out of range in sigscanlig_WriteFastaHit");
    
    
    hit = hitarr[n];
    sig = hitarr[n]->Sig;
        
    /* sig = sigarr[n]; */
    
    ajFmtPrintF(outf, "> ");
    
    if(MAJSTRGETLEN(hit->Acc))
	ajFmtPrintF(outf, "%S^", hit->Acc);
    else
	ajFmtPrintF(outf, ".^");
    
    if(MAJSTRGETLEN(hit->Spr))
	ajFmtPrintF(outf, "%S^", hit->Spr);
    else
	ajFmtPrintF(outf, ".^");
    
    ajFmtPrintF(outf, "%d^%d^", hit->Start, hit->End);
    
    ajFmtPrintF(outf, "LIGAND^");
    
    if(MAJSTRGETLEN(sig->Id))
	ajFmtPrintF(outf, "%S^", sig->Id);
    else
	ajFmtPrintF(outf, ".^");
    
    if(MAJSTRGETLEN(sig->Domid))
	ajFmtPrintF(outf, "%S^", sig->Domid);
    else
	ajFmtPrintF(outf, ".^");
    
    if(MAJSTRGETLEN(sig->Ligid))
	ajFmtPrintF(outf, "%S^", sig->Ligid);
    else
	ajFmtPrintF(outf, ".^");
    
    ajFmtPrintF(outf,"%d^", sig->sn);
    ajFmtPrintF(outf,"%d^", sig->ns);
    ajFmtPrintF(outf,"%d^", sig->pn);
    ajFmtPrintF(outf,"%d^", sig->np);
    
    if(sig->Typesig == aj1D)
	ajFmtPrintF(outf,"1D");
    else if(sig->Typesig == aj3D)
	ajFmtPrintF(outf,"3D");
    else
	ajFatal("Signature type unknown in sigscanlig_WriteFasta");
    
    if(sig->np)
	ajFmtPrintF(outf,"P^");
    else
	ajFmtPrintF(outf,"F^");
    
    ajFmtPrintF(outf, "%.2f^", hit->Score);
    
    ajFmtPrintF(outf, "%.3e^", hit->Pval);
    
    ajFmtPrintF(outf, "%.3e", hit->Eval);
    
    if(DOSEQ)
    {
	ajFmtPrintF(outf, "\n");
	ajFmtPrintF(outf, "%S\n", hit->Seq);
    }
    
/*    AJFREE(sigarr); */
    AJFREE(hitarr);
    
    
    return ajTrue;
}
Example #3
0
AjBool sigscanlig_WriteFasta(AjPFile outf, AjPList hits)
{
    ajint x = 0;
    
    EmbPHit hit       = NULL;
    EmbPSignature sig = NULL;
    
    EmbPSignature *sigarr = NULL;
    EmbPHit *hitarr = NULL;
    ajint  sizarr=0;
    
        
    /* 
    if(!outf || !siglist || !hits)
	return ajFalse;
	*/

    if(!outf || !hits)
	return ajFalse;

    
    /*
    sizarr = ajListToarray(siglist, (void ***) &sigarr);
    if(sizarr != ajListToarray(hits, (void ***) &hitarr))
	ajFatal("Arrays are different sizes");
      */  

    sizarr = ajListToarray(hits, (void ***) &hitarr);
    

    for(x=0; x<sizarr; x++)
    {
	/* There has to be a hit for each signature for correct
	   generation of the LHF by sigscanlig_WriteFasta. Therefore
	   empty hits may have been pushed.  Catch those here. */
	/* if(!MAJSTRGETLEN(hit->Model))
	    continue; */
	
	hit = hitarr[x];
	/* sig = sigarr[x]; */
	sig = hitarr[x]->Sig;	


	ajFmtPrintF(outf, "> ");
	
	if(MAJSTRGETLEN(hit->Acc))
	    ajFmtPrintF(outf, "%S^", hit->Acc);
	else
	    ajFmtPrintF(outf, ".^");

	if(MAJSTRGETLEN(hit->Spr))
	    ajFmtPrintF(outf, "%S^", hit->Spr);
	else
	    ajFmtPrintF(outf, ".^");

	ajFmtPrintF(outf, "%d^%d^", hit->Start, hit->End);
	
	ajFmtPrintF(outf, "LIGAND^");
	
	if(MAJSTRGETLEN(sig->Id))
	    ajFmtPrintF(outf, "%S^", sig->Id);
	else
	    ajFmtPrintF(outf, ".^");

	if(MAJSTRGETLEN(sig->Domid))
	    ajFmtPrintF(outf, "%S^", sig->Domid);
	else
	    ajFmtPrintF(outf, ".^");

	if(MAJSTRGETLEN(sig->Ligid))
	    ajFmtPrintF(outf, "%S^", sig->Ligid);
	else
	    ajFmtPrintF(outf, ".^");

	ajFmtPrintF(outf,"%d^", sig->sn);
	ajFmtPrintF(outf,"%d^", sig->ns);
	ajFmtPrintF(outf,"%d^", sig->pn);
	ajFmtPrintF(outf,"%d^", sig->np);
	
	if(sig->Typesig == aj1D)
	    ajFmtPrintF(outf,"1D");
	else if(sig->Typesig == aj3D)
	    ajFmtPrintF(outf,"3D");
	else
	    ajFatal("Signature type unknown in sigscanlig_WriteFasta");
	
	if(sig->np)
	    ajFmtPrintF(outf,"P^");
	else
	    ajFmtPrintF(outf,"F^");

	ajFmtPrintF(outf, "%.2f^", hit->Score);

	ajFmtPrintF(outf, "%.3e^", hit->Pval);

	ajFmtPrintF(outf, "%.3e", hit->Eval);

	ajFmtPrintF(outf, "\n");
	ajFmtPrintF(outf, "%S\n", hit->Seq);
    }
    
    AJFREE(sigarr);
    AJFREE(hitarr);
    
    
    return ajTrue;
}
int main(int argc, char **argv)
{

    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajint blastv = 0;
    char dbtype  = '\0';

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPStr version = NULL;
    AjPStr seqtype = NULL;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjBool usesrc = AJTRUE;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPStr idformat = NULL;

    EmbPEntry entry;

    PBlastDb db = NULL;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listTestFiles = NULL;
    void ** testFiles = NULL;
    ajuint nfiles;
    ajuint ifile;
    ajuint jfile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile = NULL;

    AjPStr* divfiles   = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i = 0;

    embInit("dbiblast", argc, argv);

    idformat = ajStrNewC("NCBI");

    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    version    = ajAcdGetListSingle("blastversion");
    seqtype    = ajAcdGetListSingle("seqtype");
    usesrc     = ajAcdGetBoolean("sourcefile");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint) maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }
    
    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();
    
    if(ajUtilGetBigendian())
	readReverse = ajFalse;
    else
	readReverse = ajTrue;
    
    ajStrToInt(version, &blastv);
    dbtype = ajStrGetCharFirst(seqtype);
    
    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);
    
    listTestFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listTestFiles, ajStrVcmp);
    nfiles = ajListToarray(listTestFiles, &testFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);
    
    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) testFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);
    
    /*
    ** process each input file, one at a time
    */
    
    jfile = 0;
    for(ifile=0; ifile < nfiles; ifile++)
    {
	curfilename = (AjPStr) testFiles[ifile];
	if(!dbiblast_blastopenlib(curfilename,
				  usesrc, blastv, dbtype, &db))
	    continue;	 /* could be the wrong file type with "*.*" */

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%S' ...\n", db->TFile->Name);


	ajStrAssignS(&divfiles[jfile], db->TFile->Name);
	ajFilenameTrimPath(&divfiles[jfile]);
	if(ajStrGetLen(divfiles[jfile]) >= maxfilelen)
	    maxfilelen = ajStrGetLen(divfiles[jfile]) + 1;

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, jfile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbiblast_nextblastentry(db, jfile,
					     idformat, systemsort,
					     fields,
					     maxFieldLen,
					     &maxidlen, countField,
					     elistfile, alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
	    {
		embDbiMemEntry(idlist, fieldList, nfields, entry, jfile);
	    }
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    /* lost the entry, so can't free it :-) */
	}

	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
	dbiblast_dbfree(&db);
	jfile++;
    }
    nfiles = jfile;
    
    /*
    ** write the division.lkp file
    */
    
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);
    
    /*
    ** Write the entryname.idx index
    */
    
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);
    
    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);
    
    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }
    
    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);
    
    /*
    ** Write the fields index files
    */
    
    for(ifield=0; ifield < nfields; ifield++)
    {

        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }
    
    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);
    
    ajListMap(idlist, embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    AJFREE(entryIds);

    ajStrDelarray(&fields);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }
    AJFREE(alistfile);
    AJFREE(fieldList);
    ajStrDel(&version);
    ajStrDel(&seqtype);
    ajFileClose(&elistfile);
    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }
    AJFREE(countField);
    AJFREE(fieldTot);

    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&idformat);
    ajStrDel(&tmpfname);

    AJFREE(maxFieldLen);

    ajFileClose(&logfile);

    ajListstrFreeData(&listTestFiles);

    ajStrDel(&t);
    ajStrDel(&id);
    ajStrDel(&acc);
    ajStrDel(&hline);
    ajStrDel(&tmpdes);
    ajStrDel(&tmpfd);
    ajStrDel(&tmpgi);
    ajStrDel(&tmpdb);
    ajStrDel(&tmpac);
    ajStrDel(&tmpsv);
    ajRegFree(&wrdexp);

    embDbiEntryDel(&dbiblastEntry);

    if(fdl)
    {
        for(i=0; i < nfields; i++)
            ajListFree(&fdl[i]);
        AJFREE(fdl);
    }

    for(i=0;i<nfiles;i++)
    {
        ajStrDel(&divfiles[i]);
    }
    AJFREE(divfiles);
    AJFREE(testFiles);

    embExit();

    return 0;
}
Example #5
0
AjPMatrixf ajMatrixfNewFile(const AjPStr filename)
{
    AjPMatrixf ret = NULL;
    AjPStr *orderstring = NULL;
    AjPStr buffer       = NULL;
    AjPStr firststring  = NULL;
    AjPStr reststring   = NULL;
    const AjPStr tok    = NULL;

    ajint len  = 0;
    ajint i    = 0;
    ajint l    = 0;
    ajint k    = 0;
    ajint cols = 0;
    ajint rows   = 0;
    
    const char *ptr = NULL;

    AjPFile file    = NULL;
    AjBool  first   = ajTrue;

    float **matrix  = NULL;
    float *templine = NULL;
    float minval    = -1.0;

    AjPList rlabel_list = NULL;
    AjPStr  *rlabel_arr  = NULL;
#ifndef WIN32
    static const char *delimstr = " :\t\n";
#else
    static const char *delimstr = " :\t\n\r";
#endif



    rlabel_list = ajListNew();
    

    
    firststring = ajStrNew();
    reststring  = ajStrNew();

    file = ajDatafileNewInNameS(filename);
    
    if(!file)
    {
	ajStrDel(&firststring);
	ajStrDel(&reststring);

	return NULL;
    }
    

    /* Read row labels */
    while(ajReadline(file,&buffer))
    {
	ptr = ajStrGetPtr(buffer);
#ifndef WIN32
	if(*ptr != '#' && *ptr != '\n')
#else
	if(*ptr != '#' && *ptr != '\n' && *ptr != '\r')
#endif
	{	
	    if(first)
		first = ajFalse;
	    else
	    {
		ajFmtScanC(ptr, "%S", &firststring);
		ajListPushAppend(rlabel_list, firststring);
		firststring = ajStrNew();
	    }
	}
    }
    first = ajTrue;
    ajStrDel(&firststring);
    rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr);
    ajFileSeek(file, 0, 0);


    while(ajReadline(file,&buffer))
    {
	ajStrRemoveWhiteExcess(&buffer);
	ptr = ajStrGetPtr(buffer);

	if(*ptr && *ptr != '#')
	{				
	    if(first)
	    {
		cols = ajStrParseCountC(buffer,delimstr);
		AJCNEW0(orderstring, cols);

		for(i=0; i<cols; i++)   
		    orderstring[i] = ajStrNew();

		tok = ajStrParseC(buffer, " :\t\n");
		ajStrAssignS(&orderstring[l++], tok);

		while((tok = ajStrParseC(NULL, " :\t\n")))
		    ajStrAssignS(&orderstring[l++], tok);

		first = ajFalse;

		ret = ajMatrixfNewAsym(orderstring, cols, 
						 rlabel_arr, rows, 
						 filename);
		matrix = ret->Matrixf;
	    }
	    else
	    {
		ajFmtScanC(ptr, "%S", &firststring);
		/* JISON 19/7/4 
		   k = ajSeqcvtGetCodeK(ret->Cvt,
                   ajStrGetCharFirst(firststring)); */
		k = ajSeqcvtGetCodeS(ret->Cvt, firststring); 

		len = MAJSTRGETLEN(firststring);
		ajStrAssignSubC(&reststring, ptr, len, -1);

		/* 
		** Must discard the first string (label) and use 
		** reststring otherwise ajArrFloatLine would fail (it 
		** cannot convert a string to a float)
		**   
		** Use cols,1,cols in below because although 2nd and 
		** subsequent lines have one more string in them (the
		** residue label in the 1st column) we've discarded that
		** from the string that's passed
		*/
		templine = ajArrFloatLine(reststring,delimstr,1,cols);
		
		for(i=0; i<cols; i++)  
		{
		    if(templine[i] < minval) 
			minval = templine[i];

		    /* JISON 19/7/4
		    matrix[k][ajSeqcvtGetCodeK(ret->Cvt,
					ajStrGetCharFirst(orderstring[i]))] 
					    = templine[i]; */

		    matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt,
					       orderstring[i])] 
						   = templine[i];
		}
		AJFREE(templine);
	    }
	}
    }
    ajDebug("fill rest with minimum value %d\n", minval);
    

    ajFileClose(&file);
    ajStrDel(&buffer);

    for(i=0; i<cols; i++)   
	ajStrDel(&orderstring[i]);

    AJFREE(orderstring);


    ajDebug("read matrix file %S\n", filename);
    
    ajStrDel(&firststring);
    ajStrDel(&reststring);

   for(i=0; i<rows; i++)   
	ajStrDel(&rlabel_arr[i]);

   AJFREE(rlabel_arr);
   ajListFree(&rlabel_list);

   return ret;
}
Example #6
0
AjPMatrix ajMatrixNewFile(const AjPStr filename)
{
    AjPMatrix ret = NULL;
    AjPStr buffer = NULL;
    const AjPStr tok    = NULL;

    AjPStr firststring  = NULL;
    AjPStr *orderstring = NULL;

    AjPFile file    = NULL;
    AjBool first    = ajTrue;
    const char *ptr = NULL;
    ajint **matrix  = NULL;

    ajint minval = -1;
    ajint i      = 0;
    ajint l      = 0;
    ajint k      = 0;
    ajint cols   = 0;
    ajint rows   = 0;

    ajint *templine = NULL;

    AjPList rlabel_list = NULL;
    AjPStr  *rlabel_arr  = NULL;

#ifndef WIN32
    static const char *delimstr = " :\t\n";
#else
    static const char *delimstr = " :\t\n\r";
#endif

    rlabel_list = ajListNew();
    

    firststring = ajStrNew();
    
    file = ajDatafileNewInNameS(filename);
    
    if(!file)
    {
	ajStrDel(&firststring);
	ajListFree(&rlabel_list);

	return NULL;
    }
    
    /* Read row labels */
    while(ajReadline(file,&buffer))
    {
	ptr = ajStrGetPtr(buffer);
#ifndef WIN32
	if(*ptr != '#' && *ptr != '\n')
#else
	if(*ptr != '#' && *ptr != '\n' && *ptr != '\r')
#endif
	{
	    if(first)
		first = ajFalse;
	    else
	    {	
		ajFmtScanC(ptr, "%S", &firststring);
		ajListPushAppend(rlabel_list, firststring);
		firststring = ajStrNew();	
	    }
	}
    }

    first = ajTrue;
    ajStrDel(&firststring);
    rows = ajListToarray(rlabel_list, (void ***) &rlabel_arr);
    ajFileSeek(file, 0, 0);


    while(ajReadline(file,&buffer))
    {
	ajStrRemoveWhiteExcess(&buffer);
	ptr = ajStrGetPtr(buffer);

	if(*ptr && *ptr != '#')
	{				
	    if(first)
	    {
		cols = ajStrParseCountC(buffer,delimstr);
		AJCNEW0(orderstring, cols);

		for(i=0; i<cols; i++)   
		    orderstring[i] = ajStrNew();
		
		tok = ajStrParseC(buffer, " :\t\n");
		ajStrAssignS(&orderstring[l++], tok);

		while((tok = ajStrParseC(NULL, " :\t\n")))
		    ajStrAssignS(&orderstring[l++], tok);

		first = ajFalse;

		ret = ajMatrixNewAsym(orderstring, cols, 
						rlabel_arr, rows, 
						filename);
		matrix = ret->Matrix;
	    }
	    else
	    {
		ajFmtScanC(ptr, "%S", &firststring);
		
		/* JISON 19/7/4
		   k = ajSeqcvtGetCodeK(ret->Cvt, ajStrGetCharFirst(firststring)); */
		k = ajSeqcvtGetCodeS(ret->Cvt, firststring);

		/* 
		 ** cols+1 is used below because 2nd and subsequent lines have 
		 ** one more string in them (the residue label) 
		 */
		templine = ajArrIntLine(buffer,delimstr,2,cols+1);
		
		for(i=0; i<cols; i++)   
		{
		    if(templine[i] < minval) 
			minval = templine[i];

		    /* JISON 19/7/4
		    matrix[k][ajSeqcvtGetCodeK(ret->Cvt,
					ajStrGetCharFirst(orderstring[i]))] 
					    = templine[i]; */
		    matrix[k][ajSeqcvtGetCodeAsymS(ret->Cvt,
					       orderstring[i])] 
						   = templine[i];
		}

		AJFREE(templine);
	    }
	}
    }

    ajDebug("fill rest with minimum value %d\n", minval);
    

    ajFileClose(&file);
    ajStrDel(&buffer);

    for(i=0; i<cols; i++)   
	ajStrDel(&orderstring[i]);

    AJFREE(orderstring);
        
    
    ajDebug("read matrix file %S\n", filename);
    
    ajStrDel(&firststring);    

    for(i=0; i<rows; i++)   
	ajStrDel(&rlabel_arr[i]);

    AJFREE(rlabel_arr);
    ajListFree(&rlabel_list);

    return ret;
}
Example #7
0
int main(int argc, char **argv)
{
    AjPList idlist;
    AjPList* fieldList = NULL;

    AjBool systemsort;
    AjBool cleanup;

    ajuint maxindex;
    ajuint maxidlen = 0;
    ajuint maxlen;

    AjPFile elistfile  = NULL;
    AjPFile* alistfile = NULL;

    AjPStr dbname   = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjPStr sortopt  = NULL;
    void **entryIds = NULL;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr curfilename = NULL;

    AjPFile libr=NULL;
    AjPStr idformat = NULL;

    EmbPEntry entry;

    ajuint idtype  = 0;

    ajuint idCount = 0;
    ajuint idDone;
    AjPList listInputFiles = NULL;
    void ** inputFiles = NULL;
    ajuint nfiles;
    ajuint ifile;

    ajuint filesize;
    short recsize;
    ajuint maxfilelen = 20;
    char date[4] =
    {
	0,0,0,0
    };

    AjPStr tmpfname = NULL;
    AjPStr* fields  = NULL;

    AjPFile entFile  = NULL;

    AjPStr* divfiles   = NULL;
    AjPRegexp regIdExp      = NULL;
    ajint* maxFieldLen = NULL;

    ajuint ifield  = 0;
    ajuint nfields = 0;

    AjPFile logfile = NULL;
    ajuint* countField = NULL;
    ajuint* fieldTot = NULL;
    ajuint idCountFile = 0;
    ajuint i;

    embInit("dbifasta", argc, argv);

    idformat   = ajAcdGetListSingle("idformat");
    fields     = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    systemsort = ajAcdGetBoolean("systemsort");
    cleanup    = ajAcdGetBoolean("cleanup");
    sortopt    = ajAcdGetString("sortoptions");
    maxindex   = ajAcdGetInt("maxindex");
    logfile    = ajAcdGetOutfile("outfile");

    while(fields[nfields])		/* array ends with a NULL */
	nfields++;

    if(nfields)
    {
	AJCNEW(maxFieldLen, nfields);
	AJCNEW0(countField, nfields);
	AJCNEW0(fieldTot, nfields);
	for(ifield=0; ifield < nfields; ifield++)
	    maxFieldLen[ifield] = (ajint)maxindex * -1;

	if(systemsort)
	    AJCNEW(alistfile, nfields);
	else
	{
	    AJCNEW(fieldList, nfields);
	    for(ifield=0; ifield < nfields; ifield++)
		fieldList[ifield] = ajListNew();
	}
    }

    if(ajStrMatchC(datestr, "00/00/00"))
	ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex"));

    ajStrRemoveWhite(&dbname);		/* used for temp filenames */
    embDbiDateSet(datestr, date);
    idlist = ajListNew();

    regIdExp = dbifasta_getExpr(idformat, &idtype);

    ajDebug("reading '%S/%S'\n", directory, filename);
    ajDebug("writing '%S/'\n", indexdir);

    listInputFiles = embDbiFileListExc(directory, filename, exclude);
    ajListSort(listInputFiles, &ajStrVcmp);
    nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);

    embDbiLogHeader(logfile, dbname, release, datestr,
		     indexdir, maxindex);

    embDbiLogFields(logfile, fields, nfields);
    embDbiLogSource(logfile, directory, filename, exclude,
		    (AjPStr*) inputFiles, nfiles);
    embDbiLogCmdline(logfile);

    AJCNEW0(divfiles, nfiles);

    /*
    ** process each input file, one at a time
    */

    for(ifile=0; ifile < nfiles; ifile++)
    {
	ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]);
	embDbiFlatOpenlib(curfilename, &libr);
	ajFilenameTrimPath(&curfilename);
	if(ajStrGetLen(curfilename) >= maxfilelen)
	    maxfilelen = ajStrGetLen(curfilename) + 1;

	ajDebug("processing filename '%S' ...\n", curfilename);
	ajDebug("processing file '%F' ...\n", libr);
	ajStrAssignS(&divfiles[ifile], curfilename);

	if(systemsort)	 /* elistfile for entries, alist for fields */
	    elistfile = embDbiSortOpen(alistfile, ifile,
				       dbname, fields, nfields);

	idCountFile = 0;
	for(i=0;i<nfields;i++)
	    countField[i] = 0;
	while((entry=dbifasta_NextFlatEntry(libr, ifile,
					    regIdExp, idtype,
					    systemsort, fields, 
					    maxFieldLen, &maxidlen,
					    countField, elistfile,
					    alistfile)))
	{
	    idCountFile++;
	    if(!systemsort)	    /* save the entry data in lists */
		embDbiMemEntry(idlist, fieldList, nfields,
			       entry, ifile);
		entry = NULL;
	}
	idCount += idCountFile;
	if(systemsort)
	{
	    embDbiSortClose(&elistfile, alistfile, nfields);
	    AJFREE(entry);
	}
	else
	{
	    embDbiEntryDel(&dbifastaGEntry);
	}
	embDbiLogFile(logfile, curfilename, idCountFile, fields,
		      countField, nfields);
    }

    /*  write the division.lkp file */
    embDbiWriteDivision(indexdir, dbname, release, date,
			maxfilelen, nfiles, divfiles, NULL);

    /* Write the entryname.idx index */
    ajStrAssignC(&tmpfname, "entrynam.idx");
    entFile = ajFileNewOutNamePathS(tmpfname, indexdir);

    recsize = maxidlen+10;
    filesize = 300 + (idCount*(ajint)recsize);
    embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date);

    if(systemsort)
        idDone = embDbiSortWriteEntry(entFile, maxidlen,
				      dbname, nfiles, cleanup, sortopt);
    else			  /* save entries in entryIds array */
    {
        idDone = embDbiMemWriteEntry(entFile, maxidlen,
				     idlist, &entryIds);
	if(idDone != idCount)
	    ajFatal("Duplicates not allowed for in-memory processing");
    }

    embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone);
    ajFileClose(&entFile);

    /* Write the fields index files */
    for(ifield=0; ifield < nfields; ifield++)
    {
        if(maxindex)
	    maxlen = maxindex;
	else
	{
	    if(maxFieldLen[ifield] >= 0)
		maxlen = maxFieldLen[ifield];
	    else
		maxlen = - maxFieldLen[ifield];
	}

        if(systemsort)
	    fieldTot[ifield] = embDbiSortWriteFields(dbname, release,
						     date, indexdir,
						     fields[ifield], maxlen,
						     nfiles, idCount,
						     cleanup, sortopt);
	else
	    fieldTot[ifield] = embDbiMemWriteFields(dbname, release,
						    date, indexdir,
						    fields[ifield], maxlen,
						    fieldList[ifield],
						    entryIds);
    }

    embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot,
		   nfields, nfiles, idDone, idCount);

    if(systemsort)
	embDbiRmEntryFile(dbname, cleanup);

    ajStrDel(&idformat);
    ajStrDelarray(&fields);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&sortopt);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&tmpfname);
    ajFileClose(&libr);
    ajFileClose(&logfile);

    for(i=0;i<nfields;i++)
    {
	if(systemsort)
	{
	    ajFileClose(&alistfile[i]);
	}
	else
	{
	    ajListMap(fieldList[i], &embDbiFieldDelMap, NULL);
	    ajListFree(&fieldList[i]);
	}
    }

    AJFREE(alistfile);
    AJFREE(fieldList);
    AJFREE(maxFieldLen);
    AJFREE(countField);
    AJFREE(fieldTot);

    for(i=0;i<nfiles;i++)
    {
	ajStrDel(&divfiles[i]);
    }

    AJFREE(divfiles);
    AJFREE(inputFiles);

    embDbiEntryDel(&dbifastaGEntry);

    ajStrDel(&dbifastaGRline);
    ajStrDel(&dbifastaGTmpId);

    if(dbifastaGFdl)
    {
	for(i=0; i < nfields; i++)
	    ajListFree(&dbifastaGFdl[i]);
	AJFREE(dbifastaGFdl);
    }

    ajListMap(idlist, &embDbiEntryDelMap, NULL);
    ajListFree(&idlist);
    ajListstrFreeData(&listInputFiles);
    AJFREE(entryIds);
    ajRegFree(&dbifastaGIdexp);
    ajRegFree(&dbifastaGWrdexp);
    ajRegFree(&regIdExp);

    ajStrDel(&dbifastaGTmpAc);
    ajStrDel(&dbifastaGTmpSv);
    ajStrDel(&dbifastaGTmpGi);
    ajStrDel(&dbifastaGTmpDb);
    ajStrDel(&dbifastaGTmpDes);
    ajStrDel(&dbifastaGTmpFd);
    ajStrDel(&curfilename);

    embExit();

    return 0;
}
Example #8
0
static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem)
{
    AjPFile outf = ajOutfileGetFile(outfile);
    AjPSeqBamHeader header = NULL;
    AjPAssemContig c = NULL;
    AjPSeqBam bam;
    AjPAssemRead   r = NULL;
    AjPAssemContig* contigs = NULL;
    AjPAssemTag    t = NULL;
    AjIList j = NULL;
    AjPSeqBamBgzf gzfile = NULL;
    AjPStr headertext=NULL;
    const AjPStr rgheadertext=NULL;
    AjBool ret = ajTrue;
    ajint i=0;
    ajulong ncontigs=0UL;

    if(!outf) return ajFalse;
    if(!assem) return ajFalse;

    if(!assem->Hasdata)
    {
        if(ajListGetLength(assem->ContigsOrder))
            ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs);
        else
            ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs);

        ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n",
                    ajAssemGetSortorderC(assem));
        header = ajSeqBamHeaderNewN((ajuint) ncontigs);

        gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w");
        outfile->OutData = gzfile;

        while (contigs[i])   /* contigs */
        {
            c = contigs[i];

            if(ajStrMatchC(c->Name, "*"))
            {
                i++;
                continue;
            }

            header->target_name[i] = strdup(ajStrGetPtr(c->Name));
            header->target_len[i++] = c->Length;

            ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d",
                           c->Name, c->Length);

            if(c->URI)
                ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI);

            if(c->MD5)
                ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5);

            if(c->Species)
                ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species);

            ajFmtPrintAppS(&headertext, "\n");


            j = ajListIterNewread(c->Tags);
            while (!ajListIterDone(j))
            {
                t = ajListIterGet(j);
                ajFmtPrintAppS(&headertext,
                               "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1,
                               t->Comment);
            }
            ajListIterDel(&j);
        }

        rgheadertext = assemSAMGetReadgroupHeaderlines(assem);
        if(rgheadertext)
            ajStrAppendS(&headertext, rgheadertext);

        ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext));
        ajSeqBamHeaderWrite(gzfile, header);

        ajSeqBamHeaderDel(&header);
        ajStrDel(&headertext);

        AJFREE(contigs);

        if(!assem->BamHeader)
            return ajTrue;
    }

    /* data */

    gzfile = outfile->OutData;

    AJNEW0(bam);
    bam->m_data=10;
    AJCNEW0(bam->data, bam->m_data);

    j = ajListIterNewread(assem->Reads);

    while (!ajListIterDone(j))  /* reads */
    {
	r = ajListIterGet(j);
	assemoutWriteBamAlignment(gzfile, r, bam);
    }

    ajListIterDel(&j);

    AJFREE(bam->data);
    AJFREE(bam);

    /* ajSeqBamBgzfClose(gzfile);*/

    return ret;
}
Example #9
0
static AjBool assemoutWriteNextSam(AjPOutfile outfile, const AjPAssem assem)
{
    AjPFile outf = ajOutfileGetFile(outfile);
    AjPAssemContig c = NULL;
    AjPAssemRead   r = NULL;
    AjPAssemTag    t = NULL;
    AjPAssemContig* contigs = NULL;
    AjIList j = NULL;
    AjPStr argstr = NULL;
    const AjPStr headertext = NULL;
    ajint n = 0;
    ajulong i = 0UL;
    AjBool ret = ajTrue;

    if(!outf || !assem)
	return ajFalse;

    ajDebug("assemoutWriteSam: # of contigs = %d\n", n);

    if(!assem->Hasdata)
    {
        ajFmtPrintF(outf, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem));

        /* Program record */
        argstr = ajStrNewS(ajUtilGetCmdline());
        ajStrExchangeKK(&argstr, '\n', ' ');
        ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n",
                    ajUtilGetProgram(), ajNamValueVersion(), argstr);
        ajStrDel(&argstr);


        if(ajListGetLength(assem->ContigsOrder))
            ajListToarray(assem->ContigsOrder, (void***)&contigs);
        else
            ajTableToarrayValues(assem->Contigs, (void***)&contigs);

        while (contigs[i])   /* contigs */
        {
            c = contigs[i++];

            if(!ajStrMatchC(c->Name, "*"))
            {
                ajFmtPrintF(outf, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length);

                if(c->URI)
                    ajFmtPrintF(outf, "\tUR:%S", c->URI);

                if(c->MD5)
                    ajFmtPrintF(outf, "\tM5:%S", c->MD5);

                if(c->Species)
                    ajFmtPrintF(outf, "\tSP:%S", c->Species);

                ajFmtPrintF(outf, "\n");

                j = ajListIterNewread(c->Tags);
                while (!ajListIterDone(j))
                {
                    t = ajListIterGet(j);
                    ajFmtPrintF(outf, "@CO\t%S %u %u %S\n",
                                t->Name, t->x1, t->y1,
                                t->Comment);
                }
                ajListIterDel(&j);
            }
        }

        headertext = assemSAMGetReadgroupHeaderlines(assem);
        if(headertext)
            ajFmtPrintF(outf,"%S", headertext);

        AJFREE(contigs);

        if(!assem->BamHeader)
            return ajTrue;
    }


    /* data */

    j = ajListIterNewread(assem->Reads);
    if(ajListGetLength(assem->ContigsOrder))
        i = ajListToarray(assem->ContigsOrder, (void***)&contigs);
    else
        i = ajTableToarrayValues(assem->Contigs, (void***)&contigs);

    while (!ajListIterDone(j))  /* reads */
    {
	r = ajListIterGet(j);
	assemoutWriteSamAlignment(outf, r, contigs, (ajuint) i);
    }

    ajListIterDel(&j);
    AJFREE(contigs);

    return ret;
}
Example #10
0
static void acdrelations_readdatfile
            (AjPFile inf, 
	     PEdam *P)
{
  AjPStr  line           = NULL;
  const AjPStr  tok      = NULL;
  const AjPStr  subtok   = NULL;
  AjPStr  strtmp         = NULL;
  AjPList strlist        = NULL;

  AjPStr  acdtype        = NULL;
  AjPStr  relations      = NULL;

  PEdamdat dattmp        = NULL;
  AjPList  datlist       = NULL;
  
  if(!P)
    ajFatal("Null arg error 1 in acdrelations_readdatfile");
  if(!inf)
    ajFatal("Null arg error 3 in acdrelations_readdatfile");  


  /* Allocate memory */
  line      = ajStrNew();
  acdtype   = ajStrNew();
  relations = ajStrNew();
  datlist   = ajListNew();

  /* Read data from file */
  while(ajReadline(inf,&line))
    {
      /* Discard comment lines */
      if(ajStrPrefixC(line,"#")) 
	continue;

      
      
      /* Tokenise line, delimited by '|'.
         Parse first token (ACD datatype ) */
      ajStrAssignS(&acdtype, ajStrParseC(line, "|")); 
      
      /* Parse second token (EDAM relations: value ) */
      ajStrAssignS(&relations, ajStrParseC(NULL, "|")); 

      /* Parse third token (attribute:value strings block) */
      tok = ajStrParseC(NULL, "|");
        

      /* Create new string list */
      strlist = ajListstrNew();

      /* Tokenise third token itself into tokens delimited by ' ' (space)
         Parse tokens (individual attribute:value strings)*/
      if((subtok=ajStrParseC(tok, ";")))
      {
          strtmp = ajStrNew();
          ajStrAssignS(&strtmp, subtok);
          ajStrRemoveWhite(&strtmp);
          ajListstrPushAppend(strlist, strtmp);
              
          while((subtok=ajStrParseC(NULL, ";")))
          {
              strtmp = ajStrNew();
              ajStrAssignS(&strtmp, subtok);
              ajStrRemoveWhite(&strtmp);
              ajListstrPushAppend(strlist, strtmp);
          }
      }
      
      /* Write PEdamdat structure & push onto list */
      dattmp = ajEdamdatNew();
      ajStrRemoveWhite(&acdtype);
      ajStrAssignS(&dattmp->acdtype, acdtype);
      ajStrAssignS(&dattmp->edam, relations);
      dattmp->n = ajListstrToarray(strlist, &dattmp->acdattr);
      ajListPushAppend(datlist, dattmp);
      
      /* Clear nodes (but not strings) from string list */
      ajListstrFree(&strlist);
    }
  

  /* Write PEdam structure */
  ((*P)->n) = ajListToarray(datlist, (void***) &((*P)->dat));

  /* Free memory */
  ajStrDel(&line);
  ajStrDel(&acdtype);
  ajStrDel(&relations);
  ajListFree(&datlist);

  return;
}
Example #11
0
static void pdbplus_sort(AjPPdb pdb, int tS)
{
    
    AjPResidue *arr = NULL;  /* Array of Residue objects from list
			     of Residue objects in Pdb chain object.        */
    ajint n      = 0;     /* Current position in array of residues.         */
    ajint x      = 0;     /* Loop counter.                                  */
    ajint z      = 0;     /* Loop counter.                                  */
    ajint siz    = 0;     /* Size of array of residues.                     */
    ajint start  = 0;     /* Start position of element.                     */
    ajint end    = 0;     /* End position of element.                       */
    ajint esiz   = 0;     /* Size of current element.                       */
    ajint eNum   = 0;     /* Sequential count of elements.                  */
    ajint resnum = 0;     /* Residue number of last residue, Idx value.     */
    char   etype = ' ';   /* Element type.                                  */
    AjBool foundStart =ajFalse; /* True if we have found the start 
				   of an element of any size*/
    int numHelices = 0;
    int numStrands = 0;
	
 



    for(z=0; z < pdb->Nchn; z++)
    {
        /* Use ajListToArray to convert the list of residues for the current
	   chain to an array.  Returns size of array of pointers */

        siz = ajListToarray((AjPList)pdb->Chains[z]->Residues,
			  (void ***)&arr);

	/* Loop through the array to identify, index  &
	   then write SSE data to residues in the array. */
        for(eNum=1, foundStart=ajFalse, n=0; 
	    n<siz; 
	    resnum = arr[n]->Idx, n++)
        {
            /* If residue is def. not in an element. */
            if((arr[n]->eStrideType == 'C')  ||
	       (arr[n]->eStrideType == 'B')  ||
	       (arr[n]->eStrideType == 'b')  ||
	       (arr[n]->eStrideType == 'T')  ||
	       (arr[n]->eStrideType == '.'))
            {
	        /* If element start already found, this residue defines
		   the end of an element. */
                if (foundStart)
                {
		  /* Check if element size  >= threshold size. */
                    if(esiz >= tS)  
                    {
                        end = n-1;
                        /* Write element data. */
                        pdbplus_writeElement(start, end, eNum, arr);
                        eNum++;
                    }
                    /* Element written or element < threshold size. */
                    foundStart = ajFalse;
                    esiz = 0;
                    continue;   /* Next residue in array. */		
                }
		continue;       /* Next residue in array. */
            }
	    /* This residue might be in an element of tS or greater. */
            else 
            {
                if(foundStart)
                {
                    if(arr[n]->eStrideType != etype)
                    {
		        /* Found the end of one & the start of next element.
			   Check element size (esiz) >= threshold size. */
                        if(esiz >= tS) 
                        {
                            end = n-1;
                            /* Write element data. */
                            pdbplus_writeElement(start, end, eNum, arr);
                            eNum++;
                        }
                        /* foundStart remains ajTrue. */
                        start = n;
			/*  residue is first residue of next element. */
                        esiz = 1;	 
                        etype = arr[n]->eStrideType;
                        continue;	
                    }
		    else
                    {
		        /* 
			** Residue type is same as first residue.
                        ** Increase size of element if residue number has
			** increased by 1 since the last residue but the residue
			** identity is the same as that for the first residue.
			** 'by 1' accounts for 'gaps' in the residue numbering,
                        ** e.g.caused by missing electron density. 
			*/
                        if(arr[n]->Idx == resnum+1)
			    esiz++;

                        /* Cope with cases of jumps in residue numbering -
			   such cases define the end of an element. */

                        /* If residue number has increased by more than 1,
			   end of element is found. */
                        if(arr[n]->Idx > resnum+1)
                        {
                            if (esiz >= tS)
                            {
                                /* Write element data. */
                                end = n-1;
                                pdbplus_writeElement(start, end, eNum, arr);
                                eNum++;
                            }
                            /* Current residue is the start of the next element. */
                            esiz = 1;
                            /* note-foundStart remains true. 
			       eType remains the same. */
                            start = n;
                            continue;	
                        }
                        /* If end of array is reached. */
                        if ((n == (siz-1)) && (esiz >= tS))
                        {
                            end = n;
                            pdbplus_writeElement(start, end, eNum, arr);
                        }
                    }
                }
                else
                    /* We've not found the start yet. */
                {
                    start = n;
                    etype = arr[n]->eStrideType;
                    foundStart = ajTrue;
                    esiz = 1;
                    continue;		
                }
            }
	}

        /* Count numHelices, num Strands. */
	numHelices = 0;
        numStrands = 0;

        /* Loop through array of residues again. */
        for(n=0, x=0; n<siz; n++)   
        {
	  /* eStrideNum starts at 1. */
            if(!(arr[n]->eStrideNum))		
                continue;
            else if (arr[n]->eStrideNum > x)
                { 
		    if((arr[n]->eStrideType == 'H')  ||
		       (arr[n]->eStrideType == 'G')  ||
		       (arr[n]->eStrideType == 'I'))
		    {
			numHelices++;
                        x++;
                        continue;
		    }
		    else
		    {
			/* eNum will always be 0 for eStrideType's of 
			   C, T, B & b. */
                        numStrands++;
                        x++;
                        continue;
		    }
		}
	}	
	
        pdb->Chains[z]->numHelices = numHelices;
        pdb->Chains[z]->numStrands = numStrands;
	AJFREE(arr); 
    }
}
Example #12
0
/* @prog domainreso ***********************************************************
**
** Removes low resolution domains from a DCF file (domain
** classification file).
**
******************************************************************************/
int main(ajint argc, char **argv)
{

    AjPList     cpdb_path  = NULL; /* Location of coordinate files for input */
    AjPStr      cpdb_name  = NULL; /* Name of coordinate file                */
    AjPStr      temp       = NULL; /* temp string                            */
    AjPStr      temp2      = NULL; /* temp string                            */
    AjPList     entry      = NULL; /* List of pdb codes with resolution      */
                                   /* ABOVE the threshold                    */
    AjPStr     *entryarr   = NULL; /* entry as an array                      */
    

    AjPFile     fptr_cpdb  = NULL; /* Pointer to current coordinate file     */
    AjPFile     dcfin      = NULL; /* DCF input file                         */
    AjPFile     dcfout     = NULL; /* DCF output file                        */

    AjPPdb      pdb        = NULL; /* Pdb object pointer                     */    
    AjPDomain   domain     = NULL; /* Domain structure                       */
 

    float       threshold  = 0.0;  /* Resolution threshold                   */
    ajint       num        = 0;    /* number of nodes in list                */

    ajint       type       = 0;    /* Type of domain (ajSCOP or ajCATH) in 
				      the DCF file                           */


    
    
                                           
    /* Read data from acd */
    embInitPV("domainreso",argc,argv,"DOMAINATRIX",VERSION);
    cpdb_path     = ajAcdGetDirlist("cpdbpath");    
    threshold     = ajAcdGetFloat("threshold");
    dcfin         = ajAcdGetInfile("dcfinfile");
    dcfout        = ajAcdGetOutfile("dcfoutfile");




    
    /* Allocate strings etc. */
    cpdb_name     = ajStrNew();
    temp          = ajStrNew();

    /* Create list . */
    entry    = ajListNew();


    /* Create list of files in CPDB directory. */
    

    /* Determine number of nodes on list    */
    num = ajListGetLength(cpdb_path);



/*
 domainreso reads a directory of clean coordinate files file, creates a list 
 of the files, then reads every list entry and extracts the resolution of the 
 structure.  If the value is less than a threshold (user defined) then the 
 domain identifier is pushed onto a list.  The DCF file (domain classification 
 file) is then read and domain identifiers compared to those on the list, if 
 found then the domain structure data is written the new DCF file.    
*/


    type = ajDomainDCFType(dcfin);


    /* Start of main application loop                         */
    /* Produce list of pdb codes with resolution              */
    /* ABOVE the threshold.                                   */
    while(ajListPop(cpdb_path,(void **)&temp))
    {
        /* Open coordinate file. */
        if((fptr_cpdb=ajFileNewInNameS(temp))==NULL)
        {
	    ajWarn("Could not open cpdb file");
            ajStrDel(&temp);
            continue;       
        }
	ajFmtPrint("%S\n", temp);
	fflush(stdout);
	
	

        /* Read coordinate data file. */ 
	pdb = ajPdbReadFirstModelNew(fptr_cpdb);
        

        /* Check if resolution is above threshold. */
        if(pdb->Reso > threshold)
	{
	    /* assign ID to list. */
	    temp2=ajStrNew();
	    ajStrAssignS(&temp2, pdb->Pdb);
	    ajListPush(entry, (AjPStr) temp2);
	}        
	
        /* Close coordinate file and tidy up*/
        ajPdbDel(&pdb);
        ajFileClose(&fptr_cpdb);
	ajStrDel(&temp);
    }
    num = ajListGetLength(entry);
    

    /* Sort the list of pdb codes & convert to an array. */
    ajListSort(entry, domainreso_StrComp);
    ajListToarray(entry, (void ***)&entryarr);
    
    
    /* Read DCF file and compare IDs to those in list          
     if not present then write domain structure data to output. . */
    while((domain=(ajDomainReadCNew(dcfin, "*", type))))
    {
	/* DOMAIN id not found in the list of domains with resolution 
	   above the threshold, so include it in the output file. */
	if((domainreso_StrBinSearchDomain(ajDomainGetId(domain), 
					  entryarr, num))==-1)
	    ajDomainWrite(dcfout, domain);

        /* Delete domain structure. */
        ajDomainDel(&domain);
    }


    /* Tidy up. */
    ajStrDel(&temp2);

    ajStrDel(&cpdb_name);
    ajFileClose(&dcfout);
    ajFileClose(&dcfin);
    ajListFree(&cpdb_path);
    ajListFree(&entry);
    AJFREE(entryarr);
    
  
    /* Return. */
    ajExit();
    return 0;
}