Beispiel #1
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq;
    AjPFile outf;
    AjPCod codon;
    AjPStr substr;
    ajint beg;
    ajint end;
    ajint ccnt;


    embInit("cusp", argc, argv);

    seqall = ajAcdGetSeqall("sequence");
    outf   = ajAcdGetOutfile("outfile");

    ccnt   = 0;
    substr = ajStrNew();
    codon  = ajCodNewCodenum(0);
    ajCodSetNameS(codon, ajFileGetPrintnameS(outf));

    while(ajSeqallNext(seqall, &seq))
    {
	beg = ajSeqallGetseqBegin(seqall);
	end  = ajSeqallGetseqEnd(seqall);
	ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1);
	ajCodSetTripletsS(codon,substr,&ccnt);
    }

    ajCodCalcUsage(codon,ccnt);

    ajCodSetDescC(codon, "CUSP codon usage file");
    ajCodWrite(codon, outf);
    ajFileClose(&outf);

    ajStrDel(&substr);
    ajCodDel(&codon);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);

    embExit();

    return 0;
}
Beispiel #2
0
int main(ajint argc, char **argv)
{
    AjPList    pdb_path     =NULL;  /* Path of pdb files */
    AjPStr     pdb_name     =NULL;  /* Name of pdb file  */
    AjPDirout  ccf_path    =NULL;   /* Path of ccf files */
    AjPStr     ccf_name    =NULL;   /* Name of ccf file  */
    AjPStr     pdbid        =NULL;  /* PDB code          */
    AjPStr     pdbid_temp   =NULL;  /* PDB code          */
    
    AjBool     ccfnaming   =ajFalse;   
    /* True == use the pdbid code to name the output file, 
       False== use the name of the original pdb file*/
    
    /* Mask non-amino acid groups in protein chains that do not 
       contain a C-alpha atom. The group will not appear in either 
       the CO or SQ records of the clean coordinate file */
    AjBool     camask     =ajFalse;  
    
    
    /* Mask amino acids in protein chains that do not contain a 
       C-alpha atom. The amino acid will appear not appear in the 
       CO record but will still be present in the SQ record of the 
       clean coordinate file */
    AjBool     camask1    =ajFalse;   
    
    /* Mask residues or groups in protein chains with a single atom only */
    AjBool     atommask     =ajFalse;  
    
    AjPStr     temp         =NULL; /* Temp string */   
    AjPStr     msg          =NULL; /* Error message */
   AjPStr     base_name    =NULL; /* Name of pdb file w/o path or 
				      extension */
    
    
    AjPFile    pdb_inf      =NULL; /* pdb input file pointer */
    AjPFile    ccf_outf    =NULL; /* ccf output file pointer */
    AjPFile    logf         =NULL; /* log file pointer*/ 
    
    AjPPdb     pdb          =NULL; /* Pdb structure (for parsed data) */
    
    ajint      min_chain_size=0; /* Minimum length of a SEQRES chain 
				    for it to be parsed */
    ajint      max_mismatch=0; /* Max. no. residues to trim when checking
				  for missing C-terminal SEQRES residues. */
    ajint      max_trim=0;   /* Maximum number of permissible 
				  mismatches between the ATOM and 
				  SEQRES sequences */
    ajint      pos          =0; /* Location of the file extension in 
				   the pdb file name */
    
    
    
    
    /*     THIS_DIAGNOSTIC  
	   tempstr=ajStrNew();    
	   ajStrAssignC(&tempstr,     "diagnostics");
	   tempfile=ajFileNewOutNameS(tempstr);
	   ajStrDel(&tempstr);*/
    
    
    /* Initialise strings */
    ccf_name     = ajStrNew();
    pdb_name      = ajStrNew();
    temp          = ajStrNew();
    msg           = ajStrNew();
    base_name     = ajStrNew();
    pdbid         = ajStrNew();    
    pdbid_temp    = ajStrNew();    
    
    
    /* Read data from acd */
    embInitPV("pdbparse",argc,argv,"STRUCTURE",VERSION);

    pdb_path     = ajAcdGetDirlist("pdbpath");
    ccf_path     = ajAcdGetOutdir("ccfoutdir");
    logf         = ajAcdGetOutfile("logfile");
    min_chain_size=ajAcdGetInt("chnsiz");
    max_mismatch =ajAcdGetInt("maxmis");
    max_trim     =ajAcdGetInt("maxtrim");
    ccfnaming    = ajAcdGetBoolean("ccfnaming");
    camask       = ajAcdGetBoolean("camask");
    camask1      = ajAcdGetBoolean("camaska");
    atommask     = ajAcdGetBoolean("atommask");
    
    
    /* Check directories*/
    
    /*Start of main application loop*/
    while(ajListPop(pdb_path,(void **)&temp))
    {
	ajFmtPrint("Processing %S\n", temp);   
	ajFmtPrintF(logf, "%S\n", temp);    
	
	
		
	
	/* Read pdb file*/
	if((pdb_inf=ajFileNewInNameS(temp))==NULL)
	{
	    ajFmtPrintS(&msg, "Could not open for reading %S ", 
			temp);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", temp); 
	    ajStrDel(&temp);	
	    continue;	    
	}
	
	
	
	/* Assign pdb id code from file name */
	ajStrAssignS(&pdbid, temp);
	ajFilenameTrimPathExt(&pdbid);

	if(MAJSTRGETLEN(pdbid)>4)
	{
	    /* The file name is longer than expected (and probably contains a 
	       prefix). Take the last four characters to be the pdbid code */
	    ajStrAssignSubS(&pdbid_temp, pdbid, pos-4, pos-1);
	    ajStrAssignS(&pdbid, pdbid_temp);
	}
	else if(MAJSTRGETLEN(pdbid)<4)
	    ajFatal("Could not determine pdbid code from file name (%S)", pdbid);
	

	/* Parse pdb file and write pdb structure */
	if(!(pdb=ajPdbReadRawNew(pdb_inf, pdbid, min_chain_size, max_mismatch, 
				 max_trim, camask, camask1, atommask, logf)))
	{	
	    ajFmtPrintS(&msg, "Clean coordinate file not generated for %S", temp);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "NO_OUTPUT", temp); 
	    
	    ajFileClose(&pdb_inf);
	    ajStrDel(&temp);
	    continue;
	}
	
	
	/* Open clean coordinate file for writing*/
	if(ccfnaming)
	    ajStrAssignS(&ccf_name, pdb->Pdb);
	else
	    ajStrAssignS(&ccf_name, temp);
	ajStrFmtLower(&ccf_name);

	
	if(!(ccf_outf=ajFileNewOutNameDirS(ccf_name, ccf_path)))
	{
	    ajFmtPrintS(&msg, "Could not open %S for writing", 
			ccf_name);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_OPEN", ccf_name); 
	    ajFileClose(&pdb_inf);
	    ajPdbDel(&pdb);
	    ajStrDel(&temp);	
	    continue;
	}  
	
	
	
	
	
	/* Write pdb file */
	if(!ajPdbWriteAll(ccf_outf, pdb))
	{
	    ajFmtPrintS(&msg, "Could not write file %S", ccf_name);
	    ajWarn(ajStrGetPtr(msg));
	    ajFmtPrintF(logf, "%-15s%S\n//\n", "FILE_WRITE", ccf_name); 
	    
	    ajFmtPrintS(&temp, "rm %S", ccf_name);
	    ajFmtPrint("%S", temp);
	    ajSysSystem(temp);

	    ajFileClose(&pdb_inf);
	    ajFileClose(&ccf_outf);
	    ajPdbDel(&pdb);
	    ajStrDel(&temp);	
	    continue;
	}
	
	
	
	/* Tidy up*/
	ajFileClose(&pdb_inf);
	ajFileClose(&ccf_outf);
	ajPdbDel(&pdb);   
	ajStrDel(&temp);	
	
	
	ajFmtPrintF(logf, "//\n");    
    }

    /*End of main application loop*/        





    /*Tidy up */
    ajListFree(&pdb_path);
    ajStrDel(&pdb_name);
    ajDiroutDel(&ccf_path);
    ajStrDel(&ccf_name);
    ajStrDel(&base_name);
    ajStrDel(&pdbid);
    ajStrDel(&pdbid_temp);
    ajStrDel(&msg);
    
    ajFileClose(&logf);
    
    
    
    /* DIAGNOSTIC
       ajFileClose(&tempfile);
       */
    
    /* Return */
    ajExit();
    return 0;
}	    
Beispiel #3
0
int main(int argc, char **argv)
{
    AjPSeqall  seqall;
    AjPSeq     seq;
    AjPFile    outf;
    AjPCod     codon;
    AjPStr     substr;
    AjBool     sum;
    
    ajint ccnt;
    ajint beg;
    ajint end;

    float Nc;
    double td;
    

    embInit("chips", argc, argv);

    seqall = ajAcdGetSeqall("seqall");
    outf   = ajAcdGetOutfile("outfile");
    sum    = ajAcdGetBoolean("sum");
    
    codon  = ajCodNewCodenum(0);

    ccnt = 0;
    substr = ajStrNew();

    while(ajSeqallNext(seqall, &seq))
    {
	beg = ajSeqallGetseqBegin(seqall);
	end = ajSeqallGetseqEnd(seqall);
	ajStrAssignSubS(&substr,ajSeqGetSeqS(seq),beg-1,end-1);
	ajStrFmtUpper(&substr);
	ajCodSetTripletsS(codon,substr,&ccnt);
	if(!sum)
	{
	    ajCodCalcUsage(codon,ccnt);
	    td = ajCodCalcNc(codon);
	    Nc = (float) td;
	    
	    ajFmtPrintF(outf,"%-20s Nc = %.3f\n",ajSeqGetNameC(seq),Nc);
	    ajCodClearData(codon);
	}
    }

    if(sum)
    {
	ajCodCalcUsage(codon,ccnt);
	td = ajCodCalcNc(codon);
	Nc = (float) td;
	
	ajFmtPrintF(outf,"# CHIPS codon usage statistics\n\n");
	ajFmtPrintF(outf,"Nc = %.3f\n",Nc);
    }
    
    ajFileClose(&outf);
    ajCodDel(&codon);
    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajStrDel(&substr);

    embExit();

    return 0;
}
Beispiel #4
0
int main(int argc, char **argv)
{
    /*         
    ** All pointers set to NULL for safety. 
    ** Variables names and initialisation values aligned for clarity.
    */

    AjBool    boo   = ajFalse;
    ajint     n1    = 0;
    ajint     n2    = 0;
    ajlong    l1    = 0;     /* long int */
    float     f1    = 0.0;
    double    d1    = 0.0;   /* there is no long double */
    size_t    size  = 100;   /* Reserved memory size.  Could be any value you know in advance. */


    embInit("demostringnew", argc, argv);
    
    demostringnew_msg("/* Starting string values */");
    

    /* Functions with the prefix ajStr are for manipulating EMBOSS strings.
       Functions with the prefix ajChar are for manipulating C-type (char*) string 
       See filesection and datasection sections in ajstr.c */


    /*
    ** String constructor functions 
    ** See "@section constructors" in ajstr.c
    */

    /* Construct a new string with no starting value or reserved size.
       There is no equivlent function for C-type (char*) strings */
    str0  = ajStrNew ();

    /* Construct a new string with a reserved size but no starting value */
    txt1 = ajCharNewRes(size);  
    str1 = ajStrNewRes (size);    

    /* Construct a new C-type (char*) string with a starting value ... */
    txt2 = ajCharNewC ("Starting value");   /* ... copied from a C-type (char*) string */
    str2 = ajStrNewC  (txt2);                /* ... copied from a C-type (char*) string */
    txt3 = ajCharNewS (str2);                /* ... copied from a string */
    str3 = ajStrNewS  (str2);                /* ... copied from a string */

    /* Construct a new string with a reserved size and starting value ... */
    txt4 = ajCharNewResC("Starting value, reserved size", size);  /* ... copied from a C-type (char*) string)*/
    str4 = ajStrNewResC (txt4, size);                              /* ... copied from a C-type (char*) string */
    /* or str4 = ajStrNewResLenC(txt4, size, strlen(txt4)); to specify string length */
    txt5 = ajCharNewResS(str4, size);                              /* ... copied from a string */
    str5 = ajStrNewResS (str4, size);                              /* ... copied from a string */

    demostringnew_msg("/* After string constructor functions */");    





    /*
    ** String destructor functions 
    ** See "@section destructors" in ajstr.c) 
    */
    
    /* Destruct a string */
    ajCharDel(&txt1);
    ajCharDel(&txt2);
    ajCharDel(&txt3);
    ajCharDel(&txt4);
    ajCharDel(&txt5);
    ajStrDel (&str0);
    ajStrDel (&str1);
    ajStrDel (&str3);
    ajStrDel (&str5);

    /* str2 & str4 still in memory */
    demostringnew_msg("/* After string destructor functions */");    





    /*
    ** String (de)referencing functions 
    ** See "@section destructors" in ajstr.c)
    */

    str0 = ajStrNewRef(str2);
    /* or ajStrAssignRef(&str0, str2); */
    demostringnew_msg("/* After string reference */");    

    ajStrDelStatic(&str0);
    demostringnew_msg("/* After string dereference */");    





    /*
    ** String assignment functions 
    ** See "@section assignment" in ajstr.c)
    */

    /* Still only str2 & str4 in memory */

    /* Assign a string value using ... */
    ajStrAssignC(&str1, "Assigned value");     /* ... a C-type (char*) string */
    /*    or ajStrAssignLenC(&str1, "Assigned value", strlen("Assigned value")); to specify string length. */
    ajStrAssignS(&str3, str1);                 /* ... a string                */
    ajStrAssignK(&str5, 'A');                  /* ... a character             */
    demostringnew_msg("/* After string assignment 1 */");    

    ajStrAssignSubC(&str1, "Assigned value", 0, 11);
    ajStrAssignSubS(&str3, str1, 0, 9);
    demostringnew_msg("/* After string assignment 2 */");    

    /* The assignment functions allocate memory if necessary so str1, str3 and str5 will be created for you.  It's bad practice to use this mechanism however because it's not obvious the string has been allocated (and needs freeing).  Much cleaner to call the construct (ajStrNew) explicitly. */



    /* Assign a string with a reserved size and value using ... */
    ajStrAssignResC(&str1, size, "Assigned value, reserved size");  /* ... a C-type (char*) string */
    ajStrAssignResS(&str3, size, str1);                             /* ... a string                */
    demostringnew_msg("/* After string assignment 3 */");    



    /* Assign a string value only if the string is empty using ... */
    str0 = ajStrNew();
    ajStrAssignEmptyC(&str0, "New value if string was empty");    /* ... a C-type (char*) string */ 
    ajStrAssignEmptyS(&str1, str0);                               /* ... a string                */
    demostringnew_msg("/* After string assignment 4 */");    

    /* Now str0-5 in memory.  The above code is for illustrative purposes: it's much cleaner to put all the constructors / destructors at the top / bottom of the code where possible. */


    /* Assign all strings intuitive values */
    txt0 = ajCharNewResC("TEXT 0", 100);   
    txt1 = ajCharNewResC("TEXT 1", 100);   
    txt2 = ajCharNewResC("Text 2", 100);   
    txt3 = ajCharNewResC("Text 3", 100);   
    txt4 = ajCharNewResC("Text 4", 100);   
    txt5 = ajCharNewResC("Text 5", 100);
    ajStrAssignC(&str0, "STRING 0");   
    ajStrAssignC(&str1, "STRING 1");   
    ajStrAssignC(&str2, "String 2");   
    ajStrAssignC(&str3, "String 3");   
    ajStrAssignC(&str4, "String 4 WITHSOMETEXTINABLOCK");   
    ajStrAssignC(&str5, "String 5 WITHSOMETEXTINABLOCK");   
    demostringnew_msg("/* After string assignment 5 */");    





    /*
    ** String formatting functions 
    ** See "@section formatting" in ajstr.c
    */
    ajCharFmtLower(txt0);
    ajCharFmtLower(txt1);
    ajStrFmtLower(&str0);
    ajStrFmtLowerSub(&str1, 0, 2);

    ajCharFmtUpper(txt2);
    ajCharFmtUpper(txt3);
    ajStrFmtUpper(&str2);
    ajStrFmtUpperSub(&str3, 0, 2);
    demostringnew_msg("/* After string formatting 1 */");    


    ajStrFmtTitle(&str0);
    ajStrFmtQuote(&str1);
    ajStrFmtBlock(&str4, 3);
    demostringnew_msg("/* After string formatting 2 */");    


    /* See also ajStrFmtWrap, ajStrFmtWrapLeft
       ... these need checking. */





    /*
    ** String conversion functions 
    ** See "@section datatype to string conversion" in ajstr.c
    */
    n1 = n2 = l1 = 1;
    f1 = d1 = 0.5;
    ajStrFromBool( &str0, boo);
    ajStrFromInt(&str1, n1);
    ajStrFromLong(&str2, l1);
    ajStrFromFloat(&str3, f1, 5);
    ajStrFromDouble(&str4, d1, 5);
    ajStrFromDoubleExp(&str5, d1, 5);
    demostringnew_msg("/* After datatype to string conversion */");    


    /*
    ** String conversion functions 
    ** See "@section string to datatype conversion" in ajstr.c
    */
    ajStrToBool(str0, &boo);
    ajStrToInt(str1, &n1);
    ajStrToLong(str2, &l1);
    ajStrToDouble(str4, &d1);
    ajUser("/* After string to datatype conversion */\n"
	   "boo (from str0): %B\nn1 (from str1): %d\nl1 (from str2): %d", 
	   boo, n1, l1);
    ajFmtPrint("f1 (from str3): %f\nd1 (from str4): %f\n", f1, d1);
    /* Check ajUser ... doesn't support %f */
    /* See also  ajStrToHex */





    /* Assign all strings new values */
    strcpy(txt0, "Text String");
    strcpy(txt1, "TEXT STRING");
    strcpy(txt2, "Text*");
    strcpy(txt3, "Text");
    strcpy(txt4, "Text String 4");
    strcpy(txt5, "Text String 5");

    ajStrAssignC(&str0, "String");   
    ajStrAssignC(&str1, "STRING");   
    ajStrAssignC(&str2, "String*");   
    ajStrAssignC(&str3, "*String");   
    ajStrAssignC(&str4, "String 4");   
    ajStrAssignC(&str5, "String 5");   
    demostringnew_msg("/* After resetting strings */");    





    /*
    ** String comparison functions 
    ** See "@section comparison" in ajstr.c
    */
    ajUserDumpC("/* String comparison functions */");
    boo = ajCharMatchC(txt0, txt1);
    ajUser("ajCharMatchC(txt0 txt1); == %B", boo);
    boo = ajCharMatchCaseC(txt0, txt1);
    ajUser("ajCharMatchCaseC(txt0 txt1); == %B", boo);
    boo = ajCharMatchC(txt0, txt2);
    ajUser("ajCharMatchC(txt0,txt2); == %B", boo);
    boo = ajCharMatchWildC(txt0, txt2);
    ajUser("ajCharMatchWildC(txt0,txt2); == %B", boo);
    boo = ajCharMatchWildS(txt0, str2);
    ajUser("ajCharMatchWildS(txt0,str2); == %B", boo);
    /* See also ajCharMatchWildNextC, ajCharMatchWildWordC
       ... these need checking & documentation updated. */

    boo = ajCharPrefixC(txt0, txt3);
    ajUser("ajCharPrefixC(txt0, txt3); == %B", boo);
    boo = ajCharPrefixS(txt0, str0);
    ajUser("ajCharPrefixS(txt0, str0); == %B", boo);
    boo = ajCharPrefixCaseC(txt5, txt1);
    ajUser("ajCharPrefixCaseC(txt5, txt1); == %B", boo);
    boo = ajCharPrefixCaseC(txt1, txt5);
    ajUser("ajCharPrefixCaseC(txt1, txt5); == %B", boo);
    boo = ajCharPrefixCaseS(txt0, str0);
    ajUser("ajCharPrefixCaseS(txt0, str0); == %B", boo);
    boo = ajCharSuffixC(txt0, txt3);
    ajUser("ajCharSuffixC(txt0, txt3); === %B", boo);
    boo = ajCharSuffixS(txt0, str0);
    ajUser("ajCharSuffixS(txt0, str0); == %B", boo);

    /* See also ajCharSuffixCaseC, ajCharSuffixCaseC, ajCharSuffixCaseS, ajCharSuffixCaseS
       ... these need checking. */
    boo =  ajStrMatchC (str0, txt0);
    ajUser("ajStrMatchC (str0, txt0); == %B", boo);
    boo = ajStrMatchS(str0, str1);
    ajUser("ajStrMatchS(str0, str1); == %B", boo);
    boo = ajStrMatchCaseC(str0, txt0);
    ajUser("ajStrMatchCaseC(str0, txt0); == %B", boo);
    boo = ajStrMatchCaseS(str0, str0);
    ajUser("ajStrMatchCaseS(str0, str0); == %B", boo);


    /*
    ajUser("== %B", boo);

    boo = ajStrMatchWildC(str2, const char* text);
     ajStrMatchWildS  (const AjPStr thys, const AjPStr wild);
     ajStrMatchWildWordC (const AjPStr str, const char* text);
     ajStrMatchWildWordS (const AjPStr str, const AjPStr text);
     ajStrPrefixC(const AjPStr str, const char* txt2);
     ajStrPrefixS(const AjPStr str, const AjPStr str2);
     ajStrPrefixCaseC (const AjPStr str, const char* pref);
     ajStrPrefixCaseS (const AjPStr str, const AjPStr pref);
     ajStrSuffixC (const AjPStr thys, const char* suff);
     ajStrSuffixS (const AjPStr thys, const AjPStr suff);
    */





    /**************************************************************************/
    /* String substitution functions (See "@section substitution" in ajstr.c) */
    /**************************************************************************/

    /*
AjBool     ajStrExchangeCC(AjPStr* Pstr, const char* txt, const char* txtnew);
AjBool     ajStrExchangeCS(AjPStr* Pstr, const char* txt,
                           const AjPStr strnew);
AjBool     ajStrExchangeKK(AjPStr* Pstr, char chr, char chrnew);
AjBool     ajStrExchangeSC(AjPStr* Pstr, const AjPStr str,
                           const char* txtnew);
AjBool     ajStrExchangeSS(AjPStr* Pstr, const AjPStr str,
                           const AjPStr strnew);
AjBool     ajStrExchangeSetCC(AjPStr* Pstr, const char* oldc,
                              const char* newc);
AjBool     ajStrExchangeSetSS(AjPStr* Pstr, const AjPStr str,
                            const AjPStr strnew);
AjBool     ajStrRandom(AjPStr *s);
AjBool     ajStrReverse(AjPStr* Pstr);
    */


    embExit();
    return 0;
}
Beispiel #5
0
AjPStr embScopToPdbid(const AjPStr scop, AjPStr *pdb)
{
    ajStrAssignSubS(pdb, scop, 1, 4);

    return *pdb;
}
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
			  const AjPPatternSeq pat, AjBool reverse)
{
    const void *tidy;
    ajuint hits;
    ajuint i;
    AjPPatComp pattern;
    EmbPMatMatch m = NULL;
    AjPFeature sf  = NULL;
    AjPSeq revseq  = NULL;
    AjPList list   = ajListNew();
    AjPStr seqstr  = ajStrNew();
    AjPStr seqname = ajStrNew();
    AjPStr tmp     = ajStrNew();
    ajint adj;
    ajint begin;
    AjBool isreversed;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    begin = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    ajStrAssignS(&seqname,ajSeqGetNameS(seq));
    pattern = ajPatternSeqGetCompiled(pat);

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
			begin-1,adj-1);
        ajSeqstrReverse(&seqstr);
    }
    else
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
			begin-1,adj-1);

    ajStrFmtUpper(&seqstr);
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
	   "'%S'\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
	   seqstr);*/

    ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
	    pattern->pattern, pat->Protein, reverse);
    embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
                       ajPatternSeqGetMismatch(pat),&hits,&tidy);

    ajDebug ("embPatternSeqSearch: found %d hits\n",hits);

    if(!reverse)
	ajListReverse(list);

    for(i=0;i<hits;++i)
    {
        ajListPop(list,(void **)&m);

 	if (reverse)
	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                           adj - m->start - m->len + begin + 1,
                           adj - m->start + begin,
                           0.0, '-', 0);
	else
        {
	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                   m->start,
                                   m->start + m->len - 1,
                                   0.0);
            else
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                               m->start,
                               m->start + m->len - 1,
                               0.0, '.', 0);
        }
        
	if(isreversed)
	    ajFeatReverse(sf, seqlen);

	/*
	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
	       sf->Start, sf->End, sf->Strand);
	*/

	ajFeatSetScore(sf, (float) (m->len - m->mm));

        ajFmtPrintS(&tmp, "*pat %S: %S",
                    ajPatternSeqGetName(pat),
                    ajPatternSeqGetPattern(pat));
        ajFeatTagAdd(sf,NULL,tmp);

        if(m->mm)
        {
            ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
            ajFeatTagAdd(sf, NULL, tmp);
        }

        embMatMatchDel(&m);
    }

    ajStrDel(&seqname);
    ajStrDel(&seqstr);
    ajStrDel(&tmp);
    ajListFree(&list);

    if (reverse)
        ajSeqDel(&revseq);

    return;
}
static void restover_printHits(const AjPSeq seq, const AjPStr seqcmp,
			       AjPFile outf,
			       AjPList l, const AjPStr name, ajint hits,
			       ajint begin, ajint end,
			       ajint mincut, ajint maxcut, AjBool plasmid,
			       ajint sitelen,
			       AjBool limit, const AjPTable table,
			       AjBool alpha, AjBool frags,
			       AjBool html)
{
    EmbPMatMatch m = NULL;
    AjPStr ps = NULL;
    ajint *fa = NULL;
    ajint *fx = NULL;
    ajint fc = 0;
    ajint fn = 0;
    ajint fb = 0;
    ajint last = 0;
    AjPStr overhead = NULL;

    const AjPStr value = NULL;

    ajint i;
    ajint c = 0;

    ajint hang1;
    ajint hang2;


    ps = ajStrNew();
    fn = 0;

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Restrict of %S from %d to %d\n",name,begin,end);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"#\n");

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Minimum cuts per enzyme: %d\n",mincut);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Maximum cuts per enzyme: %d\n",maxcut);

    if(html)
	ajFmtPrintF(outf,"<BR>");
    ajFmtPrintF(outf,"# Minimum length of recognition site: %d\n",
		sitelen);
    if(html)
	ajFmtPrintF(outf,"<BR>");

    hits = embPatRestrictRestrict(l,hits,!limit,alpha);

    if(frags)
    {
	fa = AJALLOC(hits*2*sizeof(ajint));
	fx = AJALLOC(hits*2*sizeof(ajint));
    }


    ajFmtPrintF(outf,"# Number of hits with any overlap: %d\n",hits);

    if(html)
	ajFmtPrintF(outf,"<BR>");

    if(html)
	ajFmtPrintF(outf,"</p><table  border cellpadding=4 "
		    "bgcolor=\"#FFFFF0\">\n");
    if(html)
	ajFmtPrintF(outf,
		    "<th>Base Number</th><th>Enzyme</th><th>Site</th>"
		    "<th>5'</th><th>3'</th><th>[5'</th><th>3']</th>\n");
    else
	ajFmtPrintF(outf,"# Base Number\tEnzyme\t\tSite\t\t5'\t3'\t"
		    "[5'\t3']\n");

    for(i=0;i<hits;++i)
    {
	ajListPop(l,(void **)&m);
	ajDebug("hit %d start:%d cut1:%d cut2:%d\n",
		i, m->start, m->cut1, m->cut2);

	hang1 = (ajint)m->cut1 - (ajint)m->start;
	hang2 = (ajint)m->cut2 - (ajint)m->start;

	if(!plasmid && (hang1>100 || hang2>100))
	{
	    embMatMatchDel(&m);
	    continue;
	}

	if(limit)
	{
	    value=ajTableFetchS(table,m->cod);
	    if(value)
		ajStrAssignS(&m->cod,value);
	}

	if(m->cut2 >= m->cut1)
	    ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut1, m->cut2-1);
	else
	{
	    ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq), m->cut2, m->cut1-1);
	    ajStrReverse(&overhead);
	}

	ajDebug("overhead:%S seqcmp:%S\n", overhead, seqcmp);

	/* Print out only those who have the same overhang. */
	if(ajStrMatchCaseS(overhead, seqcmp))
	{
	    if(html)
	    {
		ajFmtPrintF(outf,
			    "<tr><td>%-d</td><td>%-16s</td><td>%-16s"
			    "</td><td>%d</td><td>%d</td></tr>\n",
			    m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat),
			    m->cut1,m->cut2);
	    }
	    else
		ajFmtPrintF(outf,"\t%-d\t%-16s%-16s%d\t%d\t\n",
			    m->start,ajStrGetPtr(m->cod),ajStrGetPtr(m->pat),
			    m->cut1,m->cut2);
	}

	if(frags)
	    fa[fn++] = m->cut1;

	if(m->cut3 || m->cut4)
	{
	    if(m->cut4 >= m->cut3)
		ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq),
				m->cut3, m->cut4-1);
	    else
	    {
		ajStrAssignSubS(&overhead, ajSeqGetSeqS( seq),
				m->cut4, m->cut3-1);
		ajStrReverse(&overhead);
	    }

	    if(ajStrMatchCaseS(overhead, seqcmp))
	    {
		if(html)
		    ajFmtPrintF(outf,
				"<tr><td>%-d</td><td>%-16s</td><td>%-16s"
				"</td><td></td><td></td><td>%d</td><td>%d"
				"</td></tr>\n",
				m->start,ajStrGetPtr(m->cod),
				ajStrGetPtr(m->pat),
				m->cut1,m->cut2);
		else
		    ajFmtPrintF(outf,"\t%-d\t%-16s%-16s\t\t%d\t%d\t\n",
				m->start,ajStrGetPtr(m->cod),
				ajStrGetPtr(m->pat),
				m->cut1,m->cut2);
	    }
	}

	/* I am not sure what fragments are doing so I left it in ...*/
	/* used in the report tail in restrict - restover does much the same */
	if(m->cut3 || m->cut4)
	{
	    if(frags)
		fa[fn++] = m->cut3;
	    /*	       ajFmtPrintF(*outf,"%d\t%d",m->cut3,m->cut4);*/
	}
	ajStrDel(&overhead);

	embMatMatchDel(&m);
    }



    if(frags)
    {
	ajSortIntInc(fa,fn);
	ajFmtPrintF(outf,"\n\nFragment lengths:\n");
	if(!fn || (fn==1 && plasmid))
	    ajFmtPrintF(outf,"    %d\n",end-begin+1);
	else
	{
	    last = -1;
	    fb = 0;
	    for(i=0;i<fn;++i)
	    {
		if((c=fa[i])!=last)
		    fa[fb++]=c;
		last = c;
	    }
	    fn = fb;
	    /* Calc lengths */

	    for(i=0;i<fn-1;++i)
		fx[fc++] = fa[i+1]-fa[i];
	    if(!plasmid)
	    {
		fx[fc++] = fa[0]-begin+1;
		fx[fc++] = end-fa[fn-1];
	    }
	    else
		fx[fc++] = (fa[0]-begin+1)+(end-fa[fn-1]);

	    ajSortIntDec(fx,fc);
	    for(i=0;i<fc;++i)
		ajFmtPrintF(outf,"    %d\n",fx[i]);
	}
	AJFREE(fa);
	AJFREE(fx);
    }


    ajStrDel(&ps);

    if(html)
	ajFmtPrintF(outf,"</table>\n");

    return;
}
int main(int argc, char **argv)
{
    AjPSeq seq;
    AjPGraph graph = 0;
    AjPFile   outf = NULL;
    AjPFile file   = NULL;
    AjPStr buffer  = NULL;
    float twist[4][4][4];
    float roll[4][4][4];
    float tilt[4][4][4];
    float rbend;
    float rcurve;
    float bendscale;
    float curvescale;
    float twistsum = (float) 0.0;
    float pi       = (float) 3.14159;
    float pifac    = (pi/(float) 180.0);
    float pi2      = pi/(float) 2.0;
    ajint *iseq    = NULL;
    float *x;
    float *y;
    float *xave;
    float *yave;
    float *curve;
    float *bend;
    const char *ptr;
    ajint i;
    ajint ii;
    ajint k;
    ajint j;
    char residue[2];
    float maxbend;
    float minbend;
    float bendfactor;
    float maxcurve;
    float mincurve;
    float curvefactor;

    float fxp;
    float fyp;
    float yincr;
    float yy1;
    ajint ixlen;
    ajint iylen;
    ajint ixoff;
    ajint iyoff;
    float ystart;
    float defheight;
    float currentheight;
    ajint count;
    ajint portrait = 0;
    ajint title    = 0;
    ajint numres;
    ajint ibeg;
    ajint iend;
    ajint ilen;
    AjPStr sstr = NULL;
    ajint ipos;
    float dx;
    float dy;
    float rxsum;
    float rysum;
    float yp1;
    float yp2;
    double td;
    
    embInit("banana", argc, argv);
    seq    = ajAcdGetSeq("sequence");
    file   = ajAcdGetDatafile("anglesfile");
    outf   = ajAcdGetOutfile("outfile");
    graph  = ajAcdGetGraph("graph");
    numres = ajAcdGetInt("residuesperline");

    ibeg = ajSeqGetBegin(seq);
    iend = ajSeqGetEnd(seq);

    ajStrAssignSubS(&sstr, ajSeqGetSeqS(seq), ibeg-1, iend-1);
    ilen = ajStrGetLen(sstr);

    AJCNEW0(iseq,ilen+1);
    AJCNEW0(x,ilen+1);
    AJCNEW0(y,ilen+1);
    AJCNEW0(xave,ilen+1);
    AJCNEW0(yave,ilen+1);
    AJCNEW0(curve,ilen+1);
    AJCNEW0(bend,ilen+1);

    ptr= ajStrGetPtr(sstr);

    for(ii=0;ii<ilen;ii++)
    {
	if(*ptr=='A' || *ptr=='a')
	    iseq[ii+1] = 0;
	else if(*ptr=='T' || *ptr=='t')
	    iseq[ii+1] = 1;
	else if(*ptr=='G' || *ptr=='g')
	    iseq[ii+1] = 2;
	else if(*ptr=='C' || *ptr=='c')
	    iseq[ii+1] = 3;
	else
	    ajErr("%c is not an ATCG hence not valid",*ptr);

	ptr++;
    }


    if(!file)
	ajErr("Banana failed to open angle file");

    ajReadline(file,&buffer);		/* 3 junk lines */
    ajReadline(file,&buffer);
    ajReadline(file,&buffer);

    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	{
	    if(ajReadline(file,&buffer))
	    {
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",
		       &twist[ii][0][k],&twist[ii][1][k],&twist[ii][2][k],
		       &twist[ii][3][k]);
	    }
	    else
		ajErr("Error reading angle file");

	    for(j=0;j<4;j++)
		twist[ii][j][k] *= pifac;
	}


    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	    if(ajReadline(file,&buffer))
	    {
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&roll[ii][0][k],
		       &roll[ii][1][k],&roll[ii][2][k],&roll[ii][3][k]);
	    }
	    else
		ajErr("Error reading angle file");


    for(k=0;k<4;k++)
	for(ii=0;ii<4;ii++)
	    if(ajReadline(file,&buffer))
		sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&tilt[ii][0][k],
		       &tilt[ii][1][k],&tilt[ii][2][k],&tilt[ii][3][k]);
	    else
		ajErr("Error reading angle file");


    if(ajReadline(file,&buffer))
	sscanf(ajStrGetPtr(buffer),"%f,%f,%f,%f",&rbend,&rcurve,
	       &bendscale,&curvescale);
    else
	ajErr("Error reading angle file");

    ajFileClose(&file);
    ajStrDel(&buffer);


    for(ii=1;ii<ilen-1;ii++)
    {
	twistsum += twist[iseq[ii]][iseq[ii+1]][iseq[ii+2]];
	dx = (roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum)) +
	    (tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*sinfban(twistsum-pi2));
	dy = roll[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum) +
	    tilt[iseq[ii]][iseq[ii+1]][iseq[ii+2]]*cosfban(twistsum-pi2);

	x[ii+1] = x[ii]+dx;
	y[ii+1] = y[ii]+dy;

    }

    for(ii=6;ii<ilen-6;ii++)
    {
	rxsum = 0.0;
	rysum = 0.0;
	for(k=-4;k<=4;k++)
	{
	    rxsum+=x[ii+k];
	    rysum+=y[ii+k];
	}
	rxsum+=(x[ii+5]*(float)0.5);
	rysum+=(y[ii+5]*(float)0.5);
	rxsum+=(x[ii-5]*(float)0.5);
	rysum+=(y[ii-5]*(float)0.5);

	xave[ii] = rxsum*(float)0.1;
	yave[ii] = rysum*(float)0.1;
    }

    for(i=(ajint)rbend+1;i<=ilen-(ajint)rbend-1;i++)
    {
	td = sqrt(((x[i+(ajint)rbend]-x[i-(ajint)rbend])*
		   (x[i+(ajint)rbend]-x[i-(ajint)rbend])) +
		  ((y[i+(ajint)rbend]-y[i-(ajint)rbend])*
		   (y[i+(ajint)rbend]-y[i-(ajint)rbend])));
	bend[i] = (float) td;
	bend[i]*=bendscale;
    }

    for(i=(ajint)rcurve+6;i<=ilen-(ajint)rcurve-6;i++)
    {
	td = sqrt(((xave[i+(ajint)rcurve]-
		    xave[i-(ajint)rcurve])*(xave[i+(ajint)rcurve]-
					    xave[i-(ajint)rcurve]))+
		  ((yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])*
		   (yave[i+(ajint)rcurve]-yave[i-(ajint)rcurve])));
	curve[i] = (float) td;
    }
    

    if(outf)
    {
	ajFmtPrintF(outf,"Base   Bend      Curve\n");
	ptr = ajStrGetPtr(sstr);
	for(ii=1;ii<=ilen;ii++)
	{
	    ajFmtPrintF(outf,"%c    %6.1f   %6.1f\n",
			*ptr, bend[ii], curve[ii]);
	    ptr++;
	}
	ajFileClose(&outf);
    }

    if(graph)
    {
	maxbend  = minbend  = 0.0;
	maxcurve = mincurve = 0.0;
	for(ii=1;ii<=ilen;ii++)
	{
	    if(bend[ii] > maxbend)
		maxbend = bend[ii];
	    if(bend[ii] < minbend)
		minbend = bend[ii];
	    if(curve[ii] > maxcurve)
		maxcurve = curve[ii];
	    if(curve[ii] < mincurve)
		mincurve = curve[ii];
	}

	ystart = 75.0;

	ajGraphAppendTitleS(graph, ajSeqGetUsaS(seq));

        ajGraphicsSetPagesize(960, 768);

	ajGraphOpenWin(graph,(float)-1.0, (float)numres+(float)10.0,
		       (float)0.0, ystart+(float)5.0);

	ajGraphicsGetParamsPage(&fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff);

	if(portrait)
        {
            ixlen = 768;
            iylen = 960;
        }
        else
        {
            ixlen = 960;
            iylen = 768;
        }

	ajGraphicsGetCharsize(&defheight,&currentheight);
	if(!currentheight)
	{
	    defheight = currentheight = (float) 4.440072;
	    currentheight = defheight *
		((float)ixlen/ ((float)(numres)*(currentheight+(float)1.0)))
		    /currentheight;
	}
	ajGraphicsSetCharscale(((float)ixlen/((float)(numres)*
					  (currentheight+(float)1.0)))/
			    currentheight);
	ajGraphicsGetCharsize(&defheight,&currentheight);

	yincr = (currentheight + (float)3.0)*(float)0.3;

	if(!title)
	    yy1 = ystart;
	else
	    yy1 = ystart-(float)5.0;

	count = 1;

	residue[1]='\0';

	bendfactor = (3*yincr)/maxbend;
	curvefactor = (3*yincr)/maxcurve;

	ptr = ajStrGetPtr(sstr);

	yy1 = yy1-(yincr*((float)5.0));
	for(ii=1;ii<=ilen;ii++)
	{
	    if(count > numres)
	    {
		yy1 = yy1-(yincr*((float)5.0));
		if(yy1<1.0)
		{
		    if(!title)
			yy1=ystart;
		    else
			yy1 = ystart-(float)5.0;

		    yy1 = yy1-(yincr*((float)5.0));
		    ajGraphNewpage(graph,AJFALSE);
		}
		count = 1;
	    }
	    residue[0] = *ptr;

	    ajGraphicsDrawposTextAtend((float)(count)+(float)2.0,yy1,residue);

	    if(ii>1 && ii < ilen)
	    {
		yp1 = yy1+yincr + (bend[ii]*bendfactor);
		yp2 = yy1+yincr + (bend[ii+1]*bendfactor);
		ajGraphicsDrawposLine((float)count+(float)1.5,yp1,
			    (float)(count)+(float)2.5,yp2);
	    }

	    ipos = ilen-(ajint)rcurve-7;
	    if(ipos < 0)
		ipos = 0;

	    if(ii>rcurve+5 && ii<ipos)
	    {
		yp1 = yy1+yincr + (curve[ii]*curvefactor);
		yp2 = yy1+yincr + (curve[ii+1]*curvefactor);
		ajGraphicsDrawposLine((float)count+(float)1.7,yp1,
			    (float)(count)+(float)2.3,yp2);
	    }

	    ajGraphicsDrawposLine((float)count+(float)1.5,yy1+yincr,
			(float)(count)+(float)2.5,yy1+yincr);

	    count++;
	    ptr++;
	}

	ajGraphicsClose();
    }

    AJFREE(iseq);
    AJFREE(x);
    AJFREE(y);
    AJFREE(xave);
    AJFREE(yave);
    AJFREE(curve);
    AJFREE(bend);

    ajStrDel(&sstr);

    ajSeqDel(&seq);
    ajFileClose(&file);
    ajFileClose(&outf);
    ajGraphxyDel(&graph);

    embExit();

    return 0;
}
Beispiel #9
0
int main(int argc, char **argv)
{
    AjPFile outf = NULL;

    AjPSeq sequence = NULL;
    AjPStr substr   = NULL;
    AjPStr seqstr = NULL;
    AjPStr revstr = NULL;

    AjPStr p1;
    AjPStr p2;

    PPrimer eric = NULL;
    PPrimer fred = NULL;

    PPrimer f;
    PPrimer r;

    PPair pair;

    AjPList forlist  = NULL;
    AjPList revlist  = NULL;
    AjPList pairlist = NULL;

    AjBool targetrange;
    AjBool isDNA  = ajTrue;
    AjBool dolist = ajFalse;

    ajint primerlen    = 0;
    ajint minprimerlen = 0;
    ajint maxprimerlen = 0;
    ajint minprodlen   = 0;
    ajint maxprodlen   = 0;
    ajint prodlen      = 0;

    ajint seqlen = 0;
    ajint stepping_value = 1;

    ajint targetstart = 0;
    ajint targetend   = 0;

    ajint limit    = 0;
    ajint limit2   = 0;
    ajint lastpos  = 0;
    ajint startpos = 0;
    ajint endpos   = 0;

    ajint begin;
    ajint end;
    ajint v1;
    ajint v2;

    ajint overlap;

    float minpmGCcont   = 0.;
    float maxpmGCcont   = 0.;
    float minprodGCcont = 0.;
    float maxprodGCcont = 0.;
    float prodTm;
    float prodGC;

    ajint i;
    ajint j;

    ajint neric=0;
    ajint nfred=0;
    ajint npair=0;

    float minprimerTm = 0.0;
    float maxprimerTm = 0.0;

    float saltconc = 0.0;
    float dnaconc  = 0.0;

    embInit ("prima", argc, argv);

    substr = ajStrNew();

    forlist  = ajListNew();
    revlist  = ajListNew();
    pairlist = ajListNew();

    p1 = ajStrNew();
    p2 = ajStrNew();


    sequence = ajAcdGetSeq("sequence");
    outf     = ajAcdGetOutfile("outfile");

    minprimerlen = ajAcdGetInt("minprimerlen");
    maxprimerlen = ajAcdGetInt("maxprimerlen");
    minpmGCcont  = ajAcdGetFloat("minpmGCcont");
    maxpmGCcont  = ajAcdGetFloat("maxpmGCcont");
    minprimerTm  = ajAcdGetFloat("mintmprimer");
    maxprimerTm  = ajAcdGetFloat("maxtmprimer");

    minprodlen    = ajAcdGetInt("minplen");
    maxprodlen    = ajAcdGetInt("maxplen");
    minprodGCcont = ajAcdGetFloat("minpgccont");
    maxprodGCcont = ajAcdGetFloat("maxpgccont");

    saltconc = ajAcdGetFloat("saltconc");
    dnaconc  = ajAcdGetFloat("dnaconc");

    targetrange = ajAcdGetToggle("targetrange");
    targetstart = ajAcdGetInt("targetstart");
    targetend   = ajAcdGetInt("targetend");

    overlap = ajAcdGetInt("overlap");
    dolist  = ajAcdGetBoolean("list");

    seqstr = ajSeqGetSeqCopyS(sequence);
    ajStrFmtUpper(&seqstr);

    begin  = ajSeqGetBegin(sequence);
    end    = ajSeqGetEnd(sequence);
    seqlen = end-begin+1;

    ajStrAssignSubC(&substr,ajStrGetPtr(seqstr),begin-1,end-1);
    revstr = ajStrNewC(ajStrGetPtr(substr));
    ajSeqstrReverse(&revstr);

    AJCNEW0(entropy, seqlen);
    AJCNEW0(enthalpy, seqlen);
    AJCNEW0(energy, seqlen);

    /* Initialise Tm calculation arrays */
    ajMeltTempSave(ajStrGetPtr(substr),0,seqlen,saltconc,dnaconc,1,
	  &entropy, &enthalpy, &energy);


    ajFmtPrintF(outf, "\n\nINPUT SUMMARY\n");
    ajFmtPrintF(outf, "*************\n\n");

    if(targetrange)
	ajFmtPrintF
	    (outf, "Prima of %s from positions %d to %d bps\n",
	     ajSeqGetNameC(sequence),targetstart, targetend);
    else
	ajFmtPrintF(outf, "Prima of %s\n", ajSeqGetNameC(sequence));

    ajFmtPrintF(outf, "PRIMER CONSTRAINTS:\n");
    ajFmtPrintF
	(outf, "PRIMA DOES NOT ALLOW PRIMER SEQUENCE AMBIGUITY OR ");
    ajFmtPrintF(outf,"DUPLICATE PRIMER ENDPOINTS\n");
    ajFmtPrintF(outf,
		"Primer size range is %d-%d\n",minprimerlen,maxprimerlen);
    ajFmtPrintF(outf,
		"Primer GC content range is %.2f-%.2f\n",minpmGCcont,
		maxpmGCcont);
    ajFmtPrintF(outf,"Primer melting Temp range is %.2f - %.2f C\n",
		minprimerTm, maxprimerTm);

    ajFmtPrintF (outf, "PRODUCT CONSTRAINTS:\n");

    ajFmtPrintF(outf,"Product GC content range is %.2f-%.2f\n",
		minprodGCcont, maxprodGCcont);

    ajFmtPrintF(outf, "Salt concentration is %.2f (mM)\n", saltconc);
    ajFmtPrintF(outf, "DNA concentration is %.2f (nM)\n", dnaconc);



    if(targetrange)
	ajFmtPrintF(outf, "Targeted range to amplify is from %d to %d\n",
		    targetstart,targetend);
    else
    {
	ajFmtPrintF(outf,"Considering all suitable Primer pairs with ");
	ajFmtPrintF(outf,"Product length ranges %d to %d\n\n\n", minprodlen,
		    maxprodlen);
    }


    ajFmtPrintF(outf, "\n\nPRIMER/PRODUCT PAIR CALCULATIONS & OUTPUT\n");
    ajFmtPrintF(outf, "*****************************************\n\n");


    if(seqlen-minprimerlen < 0)
	ajFatal("Sequence too short");

    if(targetrange)
    {
	ajStrAssignSubC(&p1,ajStrGetPtr(substr),targetstart-begin,targetend-begin);

	prodGC = ajMeltGC(substr,seqlen);
	prodTm = ajMeltTempProd(prodGC,saltconc,seqlen);

	if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
	{
	    ajFmtPrintF(outf,
			"Product GC content [%.2f] outside acceptable range\n",
			prodGC);
	    embExitBad();
	    return 0;
	}

	prima_testtarget(substr, revstr, targetstart-begin, targetend-begin,
			 minprimerlen, maxprimerlen,
			 seqlen, minprimerTm, maxprimerTm, minpmGCcont,
			 maxpmGCcont, minprodGCcont, maxprodGCcont, saltconc,
			 dnaconc, pairlist, &npair);
    }



    if(!targetrange)
    {

    limit   = seqlen-minprimerlen-minprodlen+1;
    lastpos = seqlen-minprodlen;
    limit2  = maxprodlen-minprodlen;

    /* Outer loop selects all possible product start points */
    for(i=minprimerlen; i<limit; ++i)
    {
	startpos = i;
	ajDebug("Position in sequence %d\n",startpos);
	endpos = i+minprodlen-1;
	/* Inner loop selects all possible product lengths  */
	for(j=0; j<limit2; ++j, ++endpos)
	{
	    if(endpos>lastpos)
		break;

	    v1 = endpos-startpos+1;
	    ajStrAssignSubC(&p1,ajStrGetPtr(substr),startpos,endpos);
	    prodGC = ajMeltGC(p1,v1);
	    prodTm = ajMeltTempProd(prodGC,saltconc,v1);

	    if(prodGC<minprodGCcont || prodGC>maxprodGCcont)
		continue;

	    /* Only accept primers with acceptable Tm and GC */
	    neric = 0;
	    nfred = 0;
	    prima_testproduct(substr, startpos, endpos, primerlen,
			      minprimerlen, maxprimerlen,minpmGCcont,
			      maxpmGCcont, minprimerTm, maxprimerTm,
			      minprodlen, maxprodlen, prodTm, prodGC, seqlen,
			      &eric,&fred,forlist,revlist,&neric,&nfred,
			      stepping_value, saltconc,dnaconc, isDNA, begin);
	    if(!neric)
		continue;



	    /* Now reject those primers with self-complementarity */

	    prima_reject_self(forlist,revlist,&neric,&nfred);
	    if(!neric)
		continue;

	    /* Reject any primers that could bind elsewhere in the
               sequence */
	    prima_test_multi(forlist,revlist,&neric,&nfred,substr,revstr,
			     seqlen);



	    /* Now select the least complementary pair (if any) */
	    prima_best_primer(forlist, revlist, &neric, &nfred);
	    if(!neric)
		continue;

	    AJNEW(pair);
	    ajListPop(forlist,(void **)&f);
	    ajListPop(revlist,(void **)&r);
	    pair->f = f;
	    pair->r = r;
	    ++npair;
	    ajListPush(pairlist,(void *)pair);
	}
     }

  }


    if(!targetrange)
    {
	/* Get rid of primer pairs nearby the top scoring ones */
	prima_TwoSortscorepos(&pairlist);
	prima_prune_nearby(pairlist, &npair, maxprimerlen-1);
	ajListSort(pairlist,prima_PosCompare);
	prima_check_overlap(pairlist,&npair,overlap);
    }



    if(npair)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"%d pairs found\n\n",npair);
	else
	    ajFmtPrintF(outf,
			"Closest primer pair to specified product is:\n\n");

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	    ajFmtPrintF(outf,"\n\t\tForward\t\t\t\t\tReverse\n\n");
    }



    for(i=0;i<npair;++i)
    {
	if(!targetrange)
	    ajFmtPrintF(outf,"[%d]\n",i+1);

	ajListPop(pairlist,(void **)&pair);


	prodlen = pair->r->start - (pair->f->start + pair->f->primerlen);

	if((maxprimerlen<26 && seqlen<999999 && !dolist))
	{
	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;

	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"%6d %-25.25s %d\t", v1+begin, ajStrGetPtr(p1),
			v2+begin);


	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajFmtPrintF(outf,
			"%6d %-25.25s %d\n", v1+begin, ajStrGetPtr(p2), v2+begin);


	    ajFmtPrintF(outf,"       Tm  %.2f C  (GC %.2f%%)\t\t       ",
			pair->f->primerTm,pair->f->primGCcont*100.);
	    ajFmtPrintF(outf,"Tm  %.2f C  (GC %.2f%%)\n",
			pair->r->primerTm,pair->r->primGCcont*100.);

	    ajFmtPrintF(outf,"             Length: %-32dLength: %d\n",
			pair->f->primerlen,pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:    %.2f C\t\t\t",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));
	    ajFmtPrintF(outf,"     Tma:    %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));


	    ajFmtPrintF(outf,"       Product GC: %.2f%%\n",
			pair->f->prodGC * 100.0);
	    ajFmtPrintF(outf,"       Product Tm: %.2f C\n",
			pair->f->prodTm);
	    ajFmtPrintF(outf,"       Length:     %d\n\n\n",prodlen);
	}
	else
	{
	    ajFmtPrintF(outf,"    Product from %d to %d\n",pair->f->start+
			pair->f->primerlen+begin,pair->r->start-1+begin);
	    ajFmtPrintF(outf,"                 Tm: %.2f C   GC: %.2f%%\n",
			pair->f->prodTm,pair->f->prodGC*(float)100.);
	    ajFmtPrintF(outf,"                 Length: %d\n\n\n",prodlen);


	    v1 = pair->f->start;
	    v2 = v1 + pair->f->primerlen -1;
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Forward: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->f->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->f->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->f->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->f->primerTm,pair->f->prodTm));

	    v1 = pair->r->start;
	    v2 = v1 + pair->r->primerlen -1;
	    ajStrAssignSubS(&p2,substr,v1,v2);
	    ajSeqstrReverse(&p2);
	    ajStrAssignSubS(&p1,substr,v1,v2);
	    ajFmtPrintF(outf,"    Reverse: 5' %s 3'\n",ajStrGetPtr(p1));
	    ajFmtPrintF(outf,"             Start: %d\n",v1+begin);
	    ajFmtPrintF(outf,"             End:   %d\n",v2+begin);
	    ajFmtPrintF(outf,"             Tm:    %.2f C\n",
			pair->r->primerTm);
	    ajFmtPrintF(outf,"             GC:    %.2f%%\n",
			pair->r->primGCcont*(float)100.);
	    ajFmtPrintF(outf,"             Len:   %d\n",
			pair->r->primerlen);
	    ajFmtPrintF(outf,"             Tma:   %.2f C\n\n\n",
			ajAnneal(pair->r->primerTm,pair->f->prodTm));
	}

	prima_PrimerDel(&pair->f);
	prima_PrimerDel(&pair->r);
	AJFREE(pair);
    }



    ajStrDel(&seqstr);
    ajStrDel(&revstr);
    ajStrDel(&substr);
    ajStrDel(&p1);
    ajStrDel(&p2);

    ajListFree(&forlist);
    ajListFree(&revlist);
    ajListFree(&pairlist);

    ajFileClose(&outf);
    ajSeqDel(&sequence);

    AJFREE(entropy);
    AJFREE(enthalpy);
    AJFREE(energy);

    embExit();

    return 0;
}
Beispiel #10
0
static void prima_reject_self(AjPList forlist,AjPList revlist, ajint *neric,
			      ajint *nfred)
{
    ajint count;
    ajint j;
    ajint i;
    PPrimer tmp;

    ajint len;
    ajint cut;
    AjPStr str1;
    AjPStr str2;
    ajint x;


    str1 = ajStrNew();
    str2 = ajStrNew();

    /* deal with forwards */
    count = *neric;
    for(i=0;i<*neric;++i)
    {
	ajListPop(forlist,(void **)&tmp);
	len = tmp->primerlen;
	cut = (len/2)-1;
	ajStrAssignSubS(&str1,tmp->substr,0,cut);
	ajStrAssignSubS(&str2,tmp->substr,cut+1,len-1);
	x = prima_primalign(str1,str2);
	if(x<SIMLIMIT)
	    ajListPushAppend(forlist,(void *)tmp);
	else
	{
	    prima_PrimerDel(&tmp);
	    --count;
	}
    }
    *neric = count;

    if (!*neric)
    {
	ajStrDel(&str1);
	ajStrDel(&str2);
	while(ajListPop(revlist,(void**)&tmp))
	    prima_PrimerDel(&tmp);
	*nfred=0;
	return;
    }



    /****** reverses ********/

    count = *nfred;

    for(j=0; j<*nfred; ++j)
    {
	ajListPop(revlist,(void **)&tmp);
	len = tmp ->primerlen;
	cut = (len/2)-1;
	ajStrAssignSubS(&str1,tmp->substr,0,cut);
	ajStrAssignSubS(&str2,tmp->substr,cut+1,len-1);
	x = prima_primalign(str1,str2);

	if(x<SIMLIMIT)
	    ajListPushAppend(revlist,(void *)tmp);
	else
	{
	    --count;
	    prima_PrimerDel(&tmp);
	}
    }
    *nfred = count;

    if(!*nfred)
    {
	while(ajListPop(forlist,(void**)&tmp))
	    prima_PrimerDel(&tmp);
	*neric=0;
    }
    ajStrDel(&str1);
    ajStrDel(&str2);

    return;
}
Beispiel #11
0
static void prima_testtarget(const AjPStr seqstr, const AjPStr revstr,
			     ajint targetstart,
			     ajint targetend, ajint minprimerlen,
			     ajint maxprimerlen, ajint seqlen,
			     float minprimerTm, float maxprimerTm,
			     float minpmGCcont, float maxpmGCcont,
			     float minprodGCcont, float maxprodGCcont,
			     float saltconc, float dnaconc,
			     AjPList pairlist, ajint *npair)
{


    AjPStr fstr;
    AjPStr rstr;

    AjPStr str1;
    AjPStr str2;
    PPrimer f;
    PPrimer r;

    PPair ppair;

    ajint i;
    ajint j;
    ajint forstart = 0;
    ajint forend;
    ajint revstart = 0;
    ajint revend;
    ajint Limit;
    ajint tnum;
    ajint thisplen;
    ajint cut;

    float primerTm = 0.0;
    float primGCcont = 0.0;
    float prodgc = 0.0;

    AjBool found = ajFalse;
    AjBool revfound = ajFalse;
    AjBool isDNA = ajTrue;

    ajint flen = 0;
    ajint rlen = 0;

    float ftm = 0.0;
    float rtm = 0.0;
    float fgc = 0.0;
    float rgc = 0.0;
    ajint fsc = 0;
    ajint rsc = 0;

    const char *s;
    const char *s2;
    const char *p;
    ajint  pv;
    ajint  plimit;
    ajint  pcount;
    ajint  k;

    (void) minprodGCcont;
    (void) maxprodGCcont;

    fstr = ajStrNew();
    rstr = ajStrNew();
    str1 = ajStrNew();
    str2 = ajStrNew();



    tnum=maxprimerlen-minprimerlen+1;

    /******FORWARDS  *******/

    for(i=targetstart-minprimerlen; i>-1; --i)
    {
	forstart = i;
	forend = i+minprimerlen-1;


	for(j=0; j<tnum; ++j,++forend)
	{
	    if(forend==targetstart)
		break;

	    ajStrAssignSubC(&fstr, ajStrGetPtr(seqstr), forstart, forend);

	    thisplen = ajStrGetLen(fstr);
	    primerTm = ajMeltTempSave("",forstart,thisplen,
                                      saltconc, dnaconc, isDNA,
                                      &entropy, &enthalpy, &energy);

	    if(primerTm <minprimerTm || primerTm>maxprimerTm)
		continue;

	    primGCcont= ajMeltGC(fstr, thisplen);
	    if(primGCcont< minpmGCcont || primGCcont >maxpmGCcont)
		continue;


	    /*instead of calling the self-reject function */
	    cut = (thisplen/2)-1;

	    ajStrAssignSubS(&str1, fstr, 0, cut);
	    ajStrAssignSubS(&str2, fstr, cut+1, thisplen-1);

	    if((fsc=prima_primalign(str1, str2)) > SIMLIMIT)
		continue;

	    /* Test for match with rest of sequence */
	    s  = ajStrGetPtr(seqstr);
	    s2 = ajStrGetPtr(revstr);
	    p  = ajStrGetPtr(fstr);
	    pv = thisplen;
	    pcount = 0;
	    plimit = seqlen-pv+1;
	    for(k=0;k<plimit && pcount<2;++k)
	    {
		if(prima_seq_align(s+k,p,pv)>SIMLIMIT2)
		    ++pcount;
		if(prima_seq_align(s2+k,p,pv)>SIMLIMIT2)
		    ++pcount;
	    }

	    if(pcount<2)
	    {
		found = ajTrue;
		flen  = thisplen;
		ftm   = primerTm;
		fgc   = primGCcont;
		break;
	    }
	}

	if(found)
	    break;
    }



    /******* REVERSES IN TARGETRANGE *****/


    Limit = seqlen-minprimerlen;

    if(found)
	for(i=targetend+1; i<Limit; ++i)
	{
	    revstart = i;
	    revend = i+minprimerlen-1;

	    for(j=0; j<tnum; ++j,++revend)
	    {
		if(revend==seqlen)
		    break;

		ajStrAssignSubC(&rstr, ajStrGetPtr(seqstr), revstart, revend);
		ajSeqstrReverse(&rstr);

		thisplen = ajStrGetLen(rstr);
		primerTm = ajMeltTempSave("", revstart, thisplen,
                                          saltconc, dnaconc, 1,
                                          &entropy, &enthalpy, &energy);

		if(primerTm <minprimerTm || primerTm>maxprimerTm)
		    continue;

		primGCcont= ajMeltGC(rstr, thisplen);
		if(primGCcont< minpmGCcont || primGCcont >maxpmGCcont)
		    continue;

		/*instead of calling the self-reject function */
		cut = (thisplen/2)-1;

		ajStrAssignSubS(&str1, rstr, 0, cut);
		ajStrAssignSubS(&str2, rstr, cut+1, thisplen-1);

		if((rsc=prima_primalign(str1, str2)) < SIMLIMIT)
		    continue;

		/* Test for match with rest of sequence */
		s  = ajStrGetPtr(seqstr);
		s2 = ajStrGetPtr(revstr);
		p  = ajStrGetPtr(rstr);
		pv = thisplen;
		pcount = 0;
		plimit = seqlen-pv+1;
		for(k=0;k<plimit && pcount<2;++k)
		{
		    if(prima_seq_align(s+k,p,pv)>SIMLIMIT2)
			++pcount;

		    if(prima_seq_align(s2+k,p,pv)>SIMLIMIT2)
			++pcount;
		}

		if(pcount<2)
		{
		    revfound = ajTrue;
		    rlen     = thisplen;
		    rtm      = primerTm;
		    rgc      = primGCcont;
		    break;
		}
	    }

	    if(revfound)
		break;
	}


    if(found && !revfound)
    {
	found = ajFalse;
	ajWarn("No reverse primers found in targetrange");
	*npair = 0;
	return;
    }



    if(!found)
    {
	ajWarn("No forward primers found in targetrange");
	*npair = 0;
	return;
    }

    ajStrAssignSubC(&str1,ajStrGetPtr(seqstr),forstart+flen,revstart-1);
    prodgc = ajMeltGC(str1,revstart-(forstart+flen));



    AJNEW0(f);
    f->substr     = ajStrNewC(ajStrGetPtr(fstr));
    f->start      = forstart;
    f->primerlen  = flen;
    f->primerTm   = ftm;
    f->primGCcont = fgc;
    f->score      = fsc;
    f->prodGC     = prodgc;
    f->prodTm     = ajMeltTempProd(prodgc,saltconc,revstart-(forstart+flen));


    AJNEW0(r);
    r->substr     = ajStrNewC(ajStrGetPtr(rstr));
    r->start      = revstart;
    r->primerlen  = rlen;
    r->primerTm   = rtm;
    r->primGCcont = rgc;
    r->score      = rsc;


    AJNEW0(ppair);
    ppair->f = f;
    ppair->r = r;
    ajListPush(pairlist,(void *)ppair);
    *npair = 1;

    return;
}
Beispiel #12
0
static void domainalign_ProcessStampFile(AjPStr in, 
					 AjPStr out,
					 AjPDomain domain, 
					 ajint noden, 
					 AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer.          */
    AjPFile   inf = NULL;  /* Input file pointer.           */
    AjPStr  temp1 = NULL;  /* Temporary string.             */
    AjPStr  temp2 = NULL;  /* Temporary string.             */
    AjPStr  temp3 = NULL;  /* Temporary string.             */
    AjPStr   line = NULL;  /* Line of text from input file. */
    ajint     blk = 1;     /* Count of the current block in the input file.
			      Block 1 is the numbering and protein sequences, 
			      Block 2 is the secondary structure, 
			      Block 3 is the Very/Less/Post similar records*/
    AjBool     ok = ajFalse;
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1    = ajStrNew();
    temp2    = ajStrNew();
    temp3    = ajStrNew();


    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
	ajFatal("Could not open input file in domainalign_ProcessStampFile");
    



    /* Start of code for reading input file. 
       Ignore everything up to first line beginning with 'Number'. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* ajFileReadLine will trim the tailing \n. */
	if((ajStrGetCharPos(line, 1)=='\0'))
	{
	    ok = ajTrue;
	    break;
	}
    }
    
    
    
    /* Read rest of input file. */
    if(ok)
    {
	/* Write DOMAIN classification records to file. */
	if(!(outf=ajFileNewOutNameS(out)))
	 ajFatal("Could not open output file in domainalign_ProcessStampFile");

	
	if((domain->Type == ajSCOP))
	{
	    ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	    ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",
			    75," \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	else
	{
	    ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	    ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	    ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",
			    75," \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75,
			    " \t\n\r");
	    ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75,
			    " \t\n\r");
	    ajFmtPrintF(outf,"# XX\n");
	}
	if((domain->Type == ajSCOP))
	{
	    if(noden==1) 
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Class);
	    else if(noden==2)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Scop->Sunid_Fold);
	    else if(noden==3)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Superfamily);
	    else if(noden==4) 	
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Scop->Sunid_Family);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}
	else
	{
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX",
			    domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
	}   



	while((ajReadlineTrim(inf,&line)))
	{
	    /* Increment counter for block of file. */
	    if((ajStrGetCharPos(line, 1)=='\0'))
	    {
		blk++;
		if(blk==4)
		    blk=1;
	    
		continue;
	    }



	    /* Block of numbering line and protein sequences. */
	    if(blk==1)
	    {
		/* Print the number line out as it is. */
		if(ajStrPrefixC(line,"Number"))
		    ajFmtPrintF(outf,"\n# %7s %S\n"," ", line);
		else
		{
		    /* Read only the 7 characters
		       of the domain identifier
		       code in. */
		    ajFmtScanS(line, "%S", &temp1);
		    ajStrAssignSubS(&temp2, temp1, 0, 6);


		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);
		    ajStrExchangeSetCC(&temp3, " ", "X");
		    ajFmtPrintF(logf, "Replaced ' ' in STAMP alignment "
				"with 'X'\n");
		    ajStrFmtUpper(&temp3);
		

		    /* Write domain id code and sequence out. */
		    ajFmtPrintF(outf,"%-15S%7d %S%7d\n",
				temp2, 0, temp3, 0);
		}
	    }
	    /* Secondary structure filled with '????' (unwanted). */
	    else if(blk==2)
	    {
		continue;
	    }
	    /* Similarity lines. */
	    else
	    {
		if(ajStrPrefixC(line,"Post"))
		{
		    /* Read the sequence. */
		    ajStrAssignSubS(&temp3, line, 13, 69);

		    /* Write post similar line out. */
		    ajFmtPrintF(outf,"%-15s%7s %S\n","# Post_similar", " ",
				temp3);
		}
		/* Ignore Very and Less similar lines. */
		else continue;
	    }
	}
    }
    else /* ok == ajFalse. */
    {
	ajWarn("\n***********************************************\n"
	       "* STAMP was called but output file was EMPTY! *\n"
	       "*   NO OUTPUT FILE GENERATED FOR THIS NODE.   *\n"
	       "***********************************************\n");
	ajFmtPrintF(logf, "STAMP called but output file empty.  "
		    "No output file for this node!");
    }
    


    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
Beispiel #13
0
/* @funcstatic domainalign_ProcessTcoffeeFile *********************************
**
** Parses tcoffee output.
**
** @param [r] in [AjPStr] Name of TCOFFEE input file
** @param [r] align [AjPStr] Name of sequence alignment file for output
** @param [r] domain [AjPDomain] Domain being aligned
** @param [r] noden [ajint] Node-level of alignment** 
** @param [r] logf [AjPFile] Log file
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_ProcessTcoffeeFile(AjPStr in, 
					   AjPStr align, 
					   AjPDomain domain,
					   ajint noden, 
					   AjPFile logf)
{
    AjPFile  outf = NULL;  /* Output file pointer. */
    AjPFile   inf = NULL;  /* Input file pointer. */
    AjPStr  temp1 = NULL;  /* Temporary string. */
    AjPStr  temp2 = NULL;  /* Temporary string. */
    AjPStr  temp3 = NULL;  /* Temporary string. */
    AjPStr   line = NULL;  /* Line of text from input file. */
    
    
    /* Initialise strings. */
    line    = ajStrNew();
    temp1   = ajStrNew();
    temp2   = ajStrNew();
    temp3   = ajStrNew();



    /* Open input and output files. */
    if(!(inf=ajFileNewInNameS(in)))
        ajFatal("Could not open input file in domainalign_ProcessTcoffeeFile");
    if(!(outf=ajFileNewOutNameS(align)))
        ajFatal("Could not open output file in domainalign_ProcessTcoffeeFile");
    

    /*Write DOMAIN classification records to file*/
    if((domain->Type == ajSCOP))
    {
	ajFmtPrintF(outf,"# TY   SCOP\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Scop->Class);
	ajFmtPrintSplit(outf,domain->Scop->Fold,"\n# XX\n# FO   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Scop->Family,"# XX\n# FA   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    else
    {
	ajFmtPrintF(outf,"# TY   CATH\n# XX\n");
	ajFmtPrintF(outf,"# CL   %S",domain->Cath->Class);
	ajFmtPrintSplit(outf,domain->Cath->Architecture,"\n# XX\n# AR   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Topology,"# XX\n# TP   ",75," \t\n\r");
	ajFmtPrintSplit(outf,domain->Cath->Superfamily,"# XX\n# SF   ",75," \t\n\r");
	ajFmtPrintF(outf,"# XX\n");
    }
    
    if((domain->Type == ajSCOP))
    {
	if(noden==1) 
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Class);
	else if(noden==2)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Fold);
	else if(noden==3)
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Scop->Sunid_Superfamily);
	else if(noden==4) 	
	    ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Scop->Sunid_Family);
	else
	    ajFatal("Node number error in domainalign_ProcessStampFile");
    }	
    else
    {
	    if(noden==5) 
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Class_Id);
	    else if(noden==6)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n", domain->Cath->Arch_Id);
	    else if(noden==7)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Topology_Id);
	    else if(noden==8)
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Superfamily_Id);
	    else if(noden==9)  
		ajFmtPrintF(outf,"# SI   %d\n# XX\n",domain->Cath->Family_Id);
	    else
		ajFatal("Node number error in domainalign_ProcessStampFile");
    }   


    
    /* Start of code for reading input file. */
    /*Ignore everything up to first line beginning with 'Number'*/
    while((ajReadlineTrim(inf,&line)))
        /* ajFileReadLine will trim the tailing \n. */
        if((ajStrGetCharPos(line, 1)=='\0'))
            break;

    
    /* Read rest of input file. */
    while((ajReadlineTrim(inf,&line)))
    {
      if((ajStrGetCharPos(line, 1)=='\0'))
        continue; 
        
       /* Print the number line out as it is. */
            else if(ajStrPrefixC(line,"CLUSTAL"))
              continue;
	    else if(ajStrPrefixC(line," "))
                ajFmtPrintF(outf,"\n");
        /* write out a block of protein sequences. */
        else
        {
               /* Read only the 7 characters of the domain identifier code in. */
               ajFmtScanS(line, "%S %S", &temp1,&temp3);
                 ajStrAssignSubS(&temp2, temp1, 0, 6);
  
  
         /* Read the sequence
                   ajStrAssignSubS(&temp3, line, 13, 69);
                   ajStrExchangeSetCC(&temp3, " ", "X");
                   ajStrFmtUpper(&temp3);*/
                
  
                   /* Write domain id code and sequence out. */
                   ajFmtPrintF(outf,"%-13S%S\n",temp2, temp3);              
        }
    }
    

    /* Clean up and close input and output files. */
    ajFileClose(&outf);
    ajFileClose(&inf);
    ajStrDel(&line);
    ajStrDel(&temp1);
    ajStrDel(&temp2);
    ajStrDel(&temp3);
    

    /* All done. */
    return;
}
Beispiel #14
0
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
Beispiel #15
0
static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r,
					AjPAssemContig const * contigs,
					ajint ncontigs)
{
    AjPAssemTag    t = NULL;
    AjIList l = NULL;
    AjPStr qualstr = NULL;
    AjPStr tmp  = NULL;
    ajint  POS  = 0;
    AjPStr CIGAR = NULL;
    const char* RNEXT = NULL;
    AjPStr SEQ  = NULL;
    AjPStr QUAL = NULL;
    AjPStr SEQunpadded  = NULL;
    AjPStr QUALunpadded = NULL;
    AjPStr consensus = NULL;
    AjBool rc= ajFalse;
    AjBool ret = ajTrue;
    const char* refseq = NULL;
    const AjPAssemContig contig = NULL;

    ajuint k = 0;

    if(r->Reference>=ncontigs)
	ajDie("assemoutWriteSamAlignment: reference sequence number"
		" '%d' is larger than or equal to known number of reference"
		" sequences '%d'. Problem while processing read '%S'.",
		r->Reference,
		ncontigs,
		r->Name);

    contig = (r->Reference==-1 ? NULL : contigs[r->Reference]);

    ajStrAssignRef(&SEQ, r->Seq);
    consensus = contig==NULL? NULL : contig->Consensus;

    if (r->Rnext==-1)
	RNEXT= "*";
    else if(r->Rnext==r->Reference)
	RNEXT = "=";
    else
	RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name);

    if (r->Flag & BAM_FREVERSE)
    {
	rc = ajTrue;
	qualstr = ajStrNewS(r->SeqQ);

	if(!r->Reversed)
	{
	    ajStrReverse(&qualstr);
	    ajSeqstrReverse(&SEQ);
	}

	QUAL = qualstr;
	POS = r->y1;
	ajStrAssignSubS(&tmp, SEQ,
		ajStrGetLen(r->Seq) - r->y2,
		ajStrGetLen(r->Seq) - r->x2
	);

    }
    else
    {
	rc= ajFalse;
	POS = r->x1;
	QUAL = r->SeqQ;
	ajStrAssignSubS(&tmp, SEQ,
		r->x2-1,
		r->y2-1
	);
    }

    if(r->Cigar==NULL && consensus)
    {
	refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1);

	CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp));

	SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ));
	QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ));

	for(k=0; k< ajStrGetLen(SEQ); k++)
	{
	    if (ajStrGetCharPos(SEQ, k) == '*')
		continue;

	    ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k));
	    ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k));
	}

	ajDebug("cigar: %S\n", CIGAR);

	ajStrAssignS(&tmp, CIGAR);

	if(rc)
	{
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintS(&CIGAR, "%dS%S",
		            ajStrGetLen(SEQ) - r->y2, tmp);
	    if(r->x2 > 1)
		ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1);
	}
	else
	{
	    if(r->x2 > 1)
		ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp);
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintAppS(&CIGAR, "%dS",
		               ajStrGetLen(SEQ) - r->y2);
	}
	ajStrDel(&tmp);
    }
    else if(r->Cigar==NULL)
    {
	ajErr("both CIGAR string and consensus sequence not available");
	ret = ajFalse;
	ajStrAssignK(&CIGAR, '*');
    }
    else if(!ajStrGetLen(r->Cigar))
	ajStrAssignK(&CIGAR, '*');
    else if(ajStrGetLen(r->Cigar))
    {
	if(!ajStrGetLen(SEQ))
	    ajStrAssignK(&SEQ, '*');

	if(!ajStrGetLen(QUAL))
	    ajStrAssignK(&QUAL, '*');
    }

    ajStrDel(&tmp);

    ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S",
	    r->Name,
	    r->Flag,
	    (contig==NULL ? "*" : ajStrGetPtr(contig->Name)),
	    POS,
	    r->MapQ,
	    (CIGAR ? CIGAR : r->Cigar),
	    RNEXT,
	    r->Pnext,
	    r->Tlen,
	    (r->Cigar ? SEQ  : SEQunpadded),
	    (r->Cigar ? QUAL : QUALunpadded));

    l = ajListIterNewread(r->Tags);
    while (!ajListIterDone(l))
    {
	t = ajListIterGet(l);

	/* TODO: array type, 'B' */

	/* In SAM, all single integer types are mapped to int32_t [SAM spec] */
	ajFmtPrintF(outf, "\t%S:%c:",
		t->Name,
		(t->type == 'c' || t->type == 'C' ||
		 t->type == 's' || t->type == 'S'
				|| t->type == 'I') ? 'i' : t->type
	);

	if(t->x1 || t->y1)
	    ajFmtPrintF(outf, " %u %u", t->x1, t->y1);

	if(t->Comment && ajStrGetLen(t->Comment)>0)
	    ajFmtPrintF(outf, "%S", t->Comment);

    }
    ajListIterDel(&l);

    ajFmtPrintF(outf, "\n");

    if(qualstr)
	ajStrDel(&qualstr);

    ajStrDel(&SEQ);
    ajStrDel(&CIGAR);
    ajStrDel(&SEQunpadded);
    ajStrDel(&QUALunpadded);

    return ret;
}
Beispiel #16
0
int main(int argc, char **argv)
{

    AjPFile inf	 = NULL;
    AjPFile inf2 = NULL;
    AjPFeattable tab = NULL;
    AjPReport report = NULL;

    AjPSeq sequence = NULL;

    AjPStr redatanew = NULL;
    AjPStr str	     = NULL;
    AjPStr regexp    = NULL;
    AjPStr temp	     = NULL;
    AjPStr text      = NULL;
    AjPStr docdata   = NULL;
    AjPStr data      = NULL;
    AjPStr accession = NULL;
    AjPStr name      = NULL;
    EmbPPatMatch match = NULL;
    AjPStr savereg = NULL;
    AjPStr fthit   = NULL;

    AjBool full;
    AjBool prune;

    ajint i;
    ajint number;
    ajint start;
    ajint end;
    ajint length;
    ajint zstart;
    ajint zend;
    const char *p;
    ajint seqlength;
    AjPStr tmpstr  = NULL;
    AjPStr tailstr = NULL;
    AjPFeature gf;

    embInit("patmatmotifs", argc, argv);

    ajStrAssignC(&fthit, "SO:0001067");

    savereg   = ajStrNew();
    str       = ajStrNew();
    regexp    = ajStrNew();
    temp      = ajStrNew();
    data      = ajStrNew();
    accession = ajStrNew();
    text      = ajStrNew();
    name      = ajStrNew();

    sequence = ajAcdGetSeq("sequence");
    report   = ajAcdGetReport("outfile");
    full     = ajAcdGetBoolean("full");
    prune    = ajAcdGetBoolean("prune");

    ajSeqFmtUpper(sequence);		/* prosite regexs are all upper case */
    tab = ajFeattableNewSeq(sequence);
    ajStrAssignC(&tailstr, "");

    seqlength = ajStrGetLen(str);
    str       = ajSeqGetSeqCopyS(sequence);

    redatanew = ajStrNewC("PROSITE/prosite.lines");
    docdata   = ajStrNewC("PROSITE/");

    inf = ajDatafileNewInNameS(redatanew);
    if(!inf)
	ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running");

    ajFmtPrintAppS(&tmpstr, "Full: %B\n", full);
    ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune);
    ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf);
    ajReportSetHeaderS(report, tmpstr);

    while(ajReadlineTrim(inf, &regexp))
    {
	p=ajStrGetPtr(regexp);
	if(*p && *p!=' ' && *p!='^')
	{
	    p=ajSysFuncStrtok(p," ");
	    ajStrAssignC(&name,p);
	    if(prune)
		if(ajStrMatchCaseC(name,"myristyl") ||
		   ajStrMatchCaseC(name,"asn_glycosylation") ||
		   ajStrMatchCaseC(name,"camp_phospho_site") ||
		   ajStrMatchCaseC(name,"pkc_phospho_site") ||
		   ajStrMatchCaseC(name,"ck2_phospho_site") ||
		   ajStrMatchCaseC(name,"tyr_phospho_site"))
		{
		    for(i=0;i<4;++i)
			ajReadlineTrim(inf, &regexp);
		    continue;
		}
	    p=ajSysFuncStrtok(NULL," ");
	    ajStrAssignC(&accession,p);
	}

	if(ajStrPrefixC(regexp, "^"))
	{
	    p = ajStrGetPtr(regexp);

	    ajStrAssignC(&temp,p+1);
	    ajStrAssignC(&savereg,p+1);

	    match = embPatMatchFind(temp, str, ajFalse, ajFalse);
	    number = embPatMatchGetNumber(match);

	    for(i=0; i<number; i++)
	    {
		seqlength = ajStrGetLen(str);

		start = 1+embPatMatchGetStart(match, i);

		end = 1+embPatMatchGetEnd(match, i);

		length = embPatMatchGetLen(match, i);

		gf = ajFeatNew(tab, NULL, fthit, start, end,
			       (float) length, ' ', 0);

		ajFmtPrintS(&tmpstr, "*motif %S", name);
		ajFeatTagAddSS(gf, NULL, tmpstr);

		if(start-5<0)
		    zstart = 0;
		else
		    zstart = start-5;

		if(end+5> seqlength)
		    zend = end;
		else
		    zend = end+5;


		ajStrAssignSubS(&temp, str, zstart, zend);
	    }


	    if(full && number)
	    {
		ajStrAssignC(&redatanew,ajStrGetPtr(docdata));
		ajStrAppendC(&redatanew,ajStrGetPtr(accession));
		inf2 = ajDatafileNewInNameS(redatanew);
		if(!inf2)
		    continue;

		/*
		** Insert Prosite documentation from files made by
		** prosextract.c
		*/
		ajFmtPrintAppS(&tailstr, "Motif: %S\n", name);
		ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number);
		while(ajReadlineTrim(inf2, &text))
		    ajFmtPrintAppS(&tailstr, "%S\n", text);

		ajFmtPrintAppS(&tailstr, "\n***************\n\n");
		ajFileClose(&inf2);

	    }
	    embPatMatchDel(&match);
	}
    }

    ajReportSetTailS(report,tailstr);
    ajReportWrite(report, tab, sequence);

    ajReportDel(&report);
    ajFeattableDel(&tab);

    ajStrDel(&temp);
    ajStrDel(&regexp);
    ajStrDel(&savereg);
    ajStrDel(&str);
    ajStrDel(&data);
    ajStrDel(&docdata);
    ajStrDel(&text);
    ajStrDel(&redatanew);
    ajStrDel(&accession);
    ajSeqDel(&sequence);
    ajStrDel(&tailstr);
    ajStrDel(&fthit);
    ajStrDel(&name);
    ajStrDel(&tmpstr);

    ajFeattableDel(&tab);
    ajFileClose(&inf);

    embExit();

    return 0;
}
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
Beispiel #18
0
/* @funcstatic seqwords_keysearch  ********************************************
**
** Search swissprot with terms structure and writes a hitlist structure
**
** @param [r] inf   [AjPFile]     File pointer to swissprot database
** @param [r] terms [AjPTerms]    Terms object pointer
** @param [w] hits  [EmbPHitlist*] Hitlist object pointer
**
** @return [AjBool] True on success
** @@
******************************************************************************/
static AjBool seqwords_keysearch(AjPFile inf, 
				 AjPTerms terms,
				 EmbPHitlist *hits)
{
    AjPStr   line           =NULL;	/* Line of text. */
    AjPStr   id             =NULL;	/* Line of text. */
    AjPStr   temp           =NULL;
    ajint    s              =0;         /* Temp. start of hit value. */
    ajint    e              =0;         /* Temp. end of hit value. */
    AjPInt   start          =NULL;      /* Array of start of hit(s). */
    AjPInt   end            =NULL;      /* Array of end of hit(s). */
    ajint    nhits          =0;         /* Number of hits. */
    ajint    x              =0;         
    AjBool   foundkw        =ajFalse;
    AjBool   foundft        =ajFalse;


    /* Check for valid args. */
    if(!inf)
	return ajFalse;
    

    

    /* Allocate strings and arrays. */
    line       = ajStrNew();
    id         = ajStrNew();
    start      = ajIntNew();
    end        = ajIntNew();



    /* Start of main loop. */
    while((ajReadlineTrim(inf,&line)))
    {
	/* Parse the AC line. */
	if(ajStrPrefixC(line,"AC"))
	{
	    /* Copy accesion number and remove the ';' from the end. */
	    ajFmtScanS(line, "%*s %S", &id);
	    ajStrExchangeCC(&id, ";", "\0");
	    
	    
	    /* Reset flags & no. hits. */
	    foundkw=ajFalse;
	    foundft=ajFalse;
	    nhits=0;
	}
	
	
	/* Search the description and keyword lines with search terms. */
	else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW"))))
	{
	    /* 
	     ** Search terms have a leading and trailing space to prevent 
	     ** them being found as substrings within other words.  To 
	     ** catch cases where a DE or KW line begins with a search 
	     ** term, we must add a leading and trailing space to line.
	     ** We must first remove punctation from the line to be parsed.
	     */
	    ajStrExchangeSetCC(&line, ".,;:", "    ");
	    ajStrAppendK(&line, ' ');
	    ajStrInsertC(&line, 0, " ");


	    for (x = 0; x < terms->N; x++)
		/* Search term is found. */
		if((ajStrFindCaseS(line, terms->Keywords[x])!=-1))
		{	
		    foundkw=ajTrue;
		    break;
		}
	}

	
	/* Search the feature table line with search terms. */
	else if((ajStrPrefixC(line,"FT   DOMAIN")))
	{
	    /*	
	     ** Search terms have a leading and trailing space to prevent 
	     ** them being found as substrings within other words.  To 
	     ** catch cases where a FT line ends with a search 
	     ** term, we must add a  trailing space to line 
	     ** We must first remove punctation from the line to be parsed.
	     */
	    ajStrExchangeSetCC(&line, ".,;:", "    ");
	    ajStrAppendK(&line, ' ');
	    

	    for (x = 0; x < terms->N; x++)
		if((ajStrFindCaseS(line, terms->Keywords[x])!=-1))
		{
		    /* Search term is found. */
		    foundft = ajTrue;
		    nhits++;
		    
		    /* Assign start and end of hit. */
		    ajFmtScanS(line, "%*s %*s %d %d", &s, &e);


		    ajIntPut(&start, nhits-1, s);
		    ajIntPut(&end, nhits-1, e);
		    break;
		}
	}
	

	/* Parse the sequence. */
	else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) ||
					     (foundft == ajTrue))))
	{
	    /* Allocate memory for temp. sequence. */
	    temp       = ajStrNew();


	    /* Read the sequence into hitlist structure. */
	    while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//"))
		/* Read sequence line into temp. */
		ajStrAppendC(&temp,ajStrGetPtr(line)+3);
 

	    /* Clean up temp. sequence. */
	    ajStrRemoveWhite(&temp);


	    /*Priority is given to domain (rather than full length) sequence.*/
	    if(foundft)
	    {
		for(x=0;x<nhits;x++)
		{
		    /* Increment counter of hits for subsequent hits*/
		    (*hits)->N++;

		    
		    /* Reallocate memory for array of hits in hitlist
                       structure. */
		    AJCRESIZE((*hits)->hits, (*hits)->N);
		    (*hits)->hits[(*hits)->N-1]=embHitNew();
		    ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model,
				 "KEYWORD");
		    

		    /* Assign start and end of hit. */
		    (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x);
		    (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x);
				

		    /* Extract sequence within specified range */
		    ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, 
				(*hits)->hits[(*hits)->N - 1]->Start - 1, 
				(*hits)->hits[(*hits)->N - 1]->End - 1);
		    

		    /* Put id into structure */
		    ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id);
		}
	    }
	    else
	    {
		/* Increment counter of hits */
		(*hits)->N++;

		    
		/* Reallocate memory for array of hits in hitlist structure */
		AJCRESIZE((*hits)->hits, (*hits)->N);
		(*hits)->hits[(*hits)->N-1]=embHitNew();
		ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD");

		/* Extract whole sequence */
		ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); 
		(*hits)->hits[(*hits)->N - 1]->Start = 1; 
		(*hits)->hits[(*hits)->N - 1]->End =
		    ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); 


		/* Put id into structure */
		ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id);
	    }

	    /* Free temp. sequence */
	    ajStrDel(&temp);
	}
    }


    /* Clean up */
    ajStrDel(&line);
    ajStrDel(&id);
    ajIntDel(&start);
    ajIntDel(&end);

    return ajTrue;
}
Beispiel #19
0
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info)
{
    ajint n;
    ajint maxlen;
    ajint len;
    char **seqs;
    const AjPSeq seq = NULL;
    ajint i=0;
    const AjPStr fmt=NULL;
    const char *p=NULL;
    char  c='\0';
    /*
    char *q=NULL;
    AjPSelexseq   sqdata=NULL;
    AjPSelexdata sdata=NULL;
    */
    ajint cnt=0;
    info->name = NULL;
    info->rf=NULL;
    info->cs=NULL;
    info->desc=NULL;
    info->acc=NULL;
    info->au=NULL;
    info->flags=0;

    AjPStr tmpstr = NULL;

    ajSeqsetFill(seqset);

    fmt = ajSeqsetGetFormat(seqset);
    n = ajSeqsetGetSize(seqset);
    ajSeqsetFmtUpper(seqset);

    maxlen = ajSeqsetGetLen(seqset);


    /* First allocate and copy sequences */
    AJCNEW0(seqs,n);
    for(i=0; i<n; ++i)
    {
        seqs[i] = ajCharNewRes(maxlen+1);
        strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i)));
    }

    info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        strcpy(info->sqinfo[i].name,"");
        strcpy(info->sqinfo[i].id,"");
        strcpy(info->sqinfo[i].acc,"");
        strcpy(info->sqinfo[i].desc,"");
        info->sqinfo[i].len = 0;
        info->sqinfo[i].start = 0;
        info->sqinfo[i].stop = 0;
        info->sqinfo[i].olen = 0;
        info->sqinfo[i].type = 0;
        info->sqinfo[i].ss = NULL;
        info->sqinfo[i].sa =NULL;
    }

    AJCNEW0(info->wgt,n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        info->wgt[i] = ajSeqsetGetseqWeight(seqset,i);
    }
    info->nseq = n;
    info->alen = maxlen;

    for(i=0; i<n; ++i)
    {
        seq = ajSeqsetGetseqSeq(seqset,i);
        if((len=ajStrGetLen(ajSeqGetNameS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len);
            strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ID;
            strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_NAME;
        }
        if((len=ajStrGetLen(ajSeqGetAccS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len);
            strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ACC;
        }
    }
    seq = ajSeqsetGetseqSeq(seqset,0);
    info->cs = ajCharNewS(ajSeqGetSeqS(seq));
    info->name = ajCharNewS(ajSeqGetNameS(seq));
    info->acc = ajCharNewS(ajSeqGetAccS(seq));
    info->desc = ajCharNewS(ajSeqGetDescS(seq));
    info->rf = ajCharNewS(ajSeqGetSeqS(seq));

    /*
        info->rf = ajCharNewS(seq);

    	len = ajStrGetLen(seq->Selexdata->name);
    	info->name = ajCharNewRes(len+1);
    	strcpy(info->name,ajStrGetPtr(seq->Selexdata->name));
    	len = ajStrGetLen(seq->Selexdata->de);
    	info->desc = ajCharNewRes(len+1);

    	sdata = seq->Selexdata;
    	strcpy(info->desc,ajStrGetPtr(sdata->de));
    	len = ajStrGetLen(sdata->ac);
    	info->acc = ajCharNewRes(len+1);
    	strcpy(info->acc,ajStrGetPtr(sdata->ac));
    	len = ajStrGetLen(sdata->au);
    	info->au = ajCharNewRes(len+1);
    	strcpy(info->au,ajStrGetPtr(sdata->au));
    	if(sdata->tc[0] || sdata->tc[1])
    	{
    	    info->flags |= AINFO_TC;
    	    info->tc1 = sdata->tc[0];
    	    info->tc2 = sdata->tc[1];
    	}
    	if(sdata->nc[0] || sdata->nc[1])
    	{
    	    info->flags |= AINFO_NC;
    	    info->nc1 = sdata->nc[0];
    	    info->nc2 = sdata->nc[1];
    	}
    	if(sdata->ga[0] || sdata->ga[1])
    	{
    	    info->flags |= AINFO_GA;
    	    info->ga1 = sdata->ga[0];
    	    info->ga2 = sdata->ga[1];
    	}

    	for(i=0;i<n;++i)
    	{
    	    seq = ajSeqsetGetseqSeq(seqset,i);
    	    sqdata = seq->Selexdata->sq;
    	    if((len=ajStrGetLen(sqdata->name)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name));
    		else
    		    strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63);
    		info->sqinfo[i].name[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_NAME;
    	    }
    / *
    	    if((len=ajStrGetLen(sqdata->id)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id));
    		else
    		    strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63);
    		info->sqinfo[i].id[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ID;
    	    }
    * /

    	    strcpy(info->sqinfo[i].id,info->sqinfo[i].name);
    	    info->sqinfo[i].flags |= SQINFO_ID;
    	    if((len=ajStrGetLen(sqdata->ac)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac));
    		else
    		    strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63);
    		info->sqinfo[i].acc[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ACC;
    	    }
    	    if((len=ajStrGetLen(sqdata->de)))
    	    {
    		if(len<127)
    		    strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de));
    		else
    		    strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127);
    		info->sqinfo[i].desc[127]='\0';
    		info->sqinfo[i].flags |= SQINFO_DESC;
    	    }
    	    if(sqdata->start || sqdata->stop || sqdata ->len)
    	    {
    		info->sqinfo[i].start = sqdata->start;
    		info->sqinfo[i].stop  = sqdata->stop;
    		info->sqinfo[i].olen  = sqdata->len;
    		info->sqinfo[i].flags |= SQINFO_START;
    		info->sqinfo[i].flags |= SQINFO_STOP;
    		info->sqinfo[i].flags |= SQINFO_OLEN;
    	    }

    	    if(ajStrGetLen(seq->Selexdata->ss))
    	    {

    		info->sqinfo[i].ss = ajCharNewRes(maxlen+1);
    		p = ajStrGetPtr(seq->Selexdata->ss);
    		q = info->sqinfo[i].ss;
    		while((c==*p))
    		{
    		    if(c=='.' || c==' ' || c=='_' || c=='-')
    			*q++ = c;
    		    ++p;
    		}
    		*q = '\0';
    		info->sqinfo[i].flags |= SQINFO_SS;
    	    }
    	}
        }
    / *
        }
    */


    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].type = kOtherSeq;
        if(ajSeqsetIsDna(seqset))
            info->sqinfo[i].type = kDNA;
        if(ajSeqsetIsRna(seqset))
            info->sqinfo[i].type = kRNA;
        if(ajSeqsetIsProt(seqset))
            info->sqinfo[i].type = kAmino;
        info->sqinfo[i].flags |= SQINFO_TYPE;

        seq = ajSeqsetGetseqSeq(seqset,i);

        p = ajSeqGetSeqC(seq);
        cnt = 0;
        while((c=*p))
        {
            if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~'))
                ++cnt;
            ++p;
        }
        info->sqinfo[i].len = cnt;
        info->sqinfo[i].flags |= SQINFO_LEN;
    }


    *retseqs = seqs;
    ajStrDel(&tmpstr);

    return;
}