char *newSpDisplayId(char *oldSpDisplayId)
/* Convert from old Swiss-Prot display ID to new display ID */
{
static struct sqlConnection *conn=NULL;
char condStr[255];
char *newSpDisplayId;

if (conn==NULL)
    {
    conn = sqlConnect(PROTEOME_DB_NAME);
    if (conn == NULL) return NULL;
    }
    
safef(condStr, sizeof(condStr), "oldDisplayId='%s'", oldSpDisplayId);
newSpDisplayId = sqlGetField(PROTEOME_DB_NAME, "spOldNew", "newDisplayId", condStr);
    
return(newSpDisplayId);
}		   
Example #2
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2;

char query2[256];
struct sqlResult *sr2;
char **row2;
    
char *chp0, *chp;
char *kgID;
FILE *o1, *o2;
char cond_str[256];
char *database;
char *proteinDB;
boolean doingAlias, bothDone;

char *answer;
char *symbol, *alias, *aliases;

if (argc != 3) usage();
database  = cloneString(argv[1]);
proteinDB = cloneString(argv[2]);

conn = hAllocConn(database);
conn2= hAllocConn(database);
o1 = fopen("j.dat", "w");
o2 = fopen("jj.dat", "w");

doingAlias = TRUE;
bothDone   = FALSE;

while (!bothDone)
    {
    if (doingAlias)
	{
    	sqlSafef(query2, sizeof query2, "select symbol, aliases from %s.hgnc;", proteinDB);
	}
    else
	{
        sqlSafef(query2, sizeof query2, "select symbol, prvSymbols from %s.hgnc;", proteinDB);
    	}
    
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    while (row2 != NULL)
	{
	symbol		= row2[0];
	aliases		= row2[1];

	if ( (symbol  != NULL) && (strlen(symbol) != 0) )
	    {
            sqlSafefFrag(cond_str, sizeof cond_str, "geneSymbol = '%s'", symbol);
            answer = sqlGetField(database, "kgXref", "kgID", cond_str);
	    if (answer != NULL)
		{
		kgID = strdup(answer);
		fprintf(o2, "%s\t%s\n", kgID, symbol);
		}
	    if ( (aliases  != NULL) && (strlen(aliases) != 0) && (answer != NULL) )
		{
		kgID = strdup(answer);
    
		chp0 = aliases; 
	    	while (chp0 != NULL)
		    {
		    while (*chp0 == ' ') chp0++;
		    chp = strstr(chp0, ",");
		    if (chp == NULL)
			{
			alias = strdup(chp0);
			
			/* get rid of quote character in some aliases */
			if (*alias == '"') 
			    {
			    *(alias + strlen(alias) - 1) = '\0';
			    alias++;
			    printf("%s\n", alias);fflush(stdout);
			    }
			chp0 = NULL;
			}
		    else
			{
			*chp = '\0';
			
			/* get rid of quote character in some aliases */
			if (*chp0 == '"') 
			    {
			    *(chp0 + strlen(chp0) - 1) = '\0';
			    chp0++;
			    printf("%s\n", chp0);fflush(stdout);
			    }
			alias = strdup(chp0);
			chp0 = chp+1;
			}
		    if (kgID != NULL)
			{
			fprintf(o1, "%s\t%s\t%s\n", kgID, symbol, alias);
			fprintf(o2, "%s\t%s\n", kgID, alias);
			}
		    }
		}
	    }
	row2 = sqlNextRow(sr2);
	}
    sqlFreeResult(&sr2);

    if (doingAlias) 
	{
	doingAlias = FALSE;
	}
    else
	{
	bothDone = TRUE;
	}
    }
fclose(o1);
fclose(o2);

/* geneAlias.tab has 3 columns, the 2nd is HUGO.symbol 
   and 3rd contains aliases and withdraws */

mustSystem("cat  j.dat|sort|uniq  >geneAlias.tab");

/*  kgAliasM.tab has 2 columns, all entries from HUGO.symbol, HUGO.aliass, 
    and HUGO.withdraws are listed in the 2nd column. */
mustSystem("cat jj.dat|sort|uniq  >kgAliasM.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
    
return(0);
}
Example #3
0
void doMiddle(struct cart *theCart)
/* Print the body of an html file.   */
{
char cond_str[255];
struct sqlConnection *conn;
char *proteinAC;
char *chp, *chp1, *chp9;
char *debugTmp = NULL;
char *chromStr, *cdsStartStr, *cdsEndStr, posStr[255];

char *supportedGenomeDatabase;

char *answer;
char *queryID;

/* Initialize layout and database. */
cart = theCart;

/* Uncomment this to see parameters for debugging. */
/* Be careful though, it breaks if custom track
 * is more than 4k */
/*
{ struct dyString *state = cgiUrlString();
  hPrintf("State: %s\n", state->string);
}
*/

queryID = cartOptionalString(cart, "proteinID");
if (sameString(queryID, ""))
    {
    hUserAbort("Please go back and enter a gene symbol or a Swiss-Prot/TrEMBL protein ID.\n");
    }

if (cgiVarExists("db"))
    {
    /* if db is known, get key variables set */
    proteinInSupportedGenome = TRUE;
    database = cgiOptionalString("db");
    organism = hDbOrganism(database);
    protDbName = hPdbFromGdb(database);
    proteinID  = strdup(queryID);
    }
else
    {
    protCntInSwissByGene = searchProteinsInSwissProtByGene(queryID);
    /* no CGI 'db' variable means it did not come in from GB but from pbGateway */
    /* search existing GB databases to see if this protein can be found */
    protCntInSupportedGenomeDb =
        searchProteinsInSupportedGenomes(queryID, &supportedGenomeDatabase);
    if ((protCntInSupportedGenomeDb > 1) || protCntInSwissByGene >= 1)
        {
	/* more than 1 proteins match the query ID, present selection web page */
	proteinInSupportedGenome = 1;
	presentProteinSelections(queryID, protCntInSwissByGene, protCntInSupportedGenomeDb);
	return;
	}
    else
        {
	if (protCntInSupportedGenomeDb == 1)
	    {
	    /* one and only one protein found in a genome DB that support KG and PB */
	    proteinInSupportedGenome = TRUE;
	    database = strdup(supportedGenomeDatabase);
	    organism = hDbOrganism(database);
	    protDbName = hPdbFromGdb(database);
            proteinID=strdup(queryID);
	    }
	else
	    {
	    /* not found in genome DBs that support KG/PB */
	    /* now search PROTEOME_DB_NAMES to see if this protein is there. */

	    answer = uniProtFindPrimAcc(queryID);
	    if (answer == NULL)
		{
	        hUserAbort("'%s' does not seem to be a valid UniProtKB protein ID or a gene "
	                   "symbol.<br><br>Click <A HREF=\"../cgi-bin/pbGateway\">here</A> "
	                   "to start another query.", queryID);
                }

	    proteinInSupportedGenome = FALSE;
	    database = strdup(GLOBAL_PB_DB);
	    organism = strdup("");
            protDbName = strdup(PROTEOME_DB_NAME);
	    proteinID = strdup(answer);
	    }
	}

    if (proteinInSupportedGenome)
        {
        spConn = sqlConnect(database);
        sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", queryID);
        proteinID = sqlGetField(database, "kgSpAlias", "spID", cond_str);

        sqlSafefFrag(cond_str, sizeof(cond_str), "spID='%s'", proteinID);
        answer = sqlGetField(database, "kgXref", "spDisplayID", cond_str);

	sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s'", answer);
        chromStr    = sqlGetField(database, "knownGene", "chrom", cond_str);
	if (chromStr)
	    {
	    cdsStartStr = sqlGetField(database, "knownGene", "cdsStart", cond_str);
	    cdsEndStr   = sqlGetField( database, "knownGene", "cdsEnd", cond_str);
	    safef(posStr, sizeof(posStr), "%s:%s-%s", chromStr, cdsStartStr, cdsEndStr);
	    positionStr = strdup(posStr);
	    cartSetString(cart, "position", positionStr);
	    cartSetString(cart, "organism", organism);
	    }
	}
    }
/* print out key variables for debugging */
/* printf("<br>before enter main section: <br>proteinInSupportedGenome=%d<br>proteinID=%s <br>database=%s <br>organism=%s <br>protDbName=%s\n",
proteinInSupportedGenome, proteinID, database, organism, protDbName);fflush(stdout);
*/

if (hTableExists(database, "kgProtMap2"))
    {
    kgVersion = KG_III;
    strcpy(kgProtMapTableName, "kgProtMap2");
    }

debugTmp = cartUsualString(cart, "hgDebug", "off");
if(sameString(debugTmp, "on"))
    hgDebug = TRUE;
else
    hgDebug = FALSE;
conn  = hAllocConn(database);
hgsid     = cartOptionalString(cart, "hgsid");
if (hgsid != NULL)
    {
    safef(hgsidStr, sizeof(hgsidStr), "&hgsid=%s", hgsid);
    }
else
    {
    strcpy(hgsidStr, "");
    }

/* check proteinID to see if it is a valid SWISS-PROT/TrEMBL accession or display ID */
/* then assign the accession number to global variable proteinID */
sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", proteinID);
proteinAC = sqlGetField(protDbName, "spXref3", "accession", cond_str);
if (proteinAC == NULL)
    {
    sqlSafefFrag(cond_str, sizeof(cond_str), "displayID='%s'", proteinID);
    proteinAC = sqlGetField(protDbName, "spXref3", "accession", cond_str);
    if (proteinAC == NULL)
	{
	hUserAbort("'%s' does not seem to be a valid Swiss-Prot/TrEMBL protein ID or gene symbol.<br><br>Click <A HREF=\"../cgi-bin/pbGateway\">here</A> to start another query."
	, proteinID);
	}
    else
	{
	protDisplayID = proteinID;
	proteinID = proteinAC;
	}
    }
else
    {
    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", proteinID);
    protDisplayID = sqlGetField(protDbName, "spXref3", "displayID", cond_str);
    }

if (proteinInSupportedGenome)
    {
    if (kgVersion == KG_III)
        {
        sqlSafefFrag(cond_str, sizeof(cond_str), "spId='%s'", proteinID);
        mrnaID = sqlGetField(database, "kgXref", "kgId", cond_str);
	}
    else
        {
        sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s'", protDisplayID);
        mrnaID = sqlGetField(database, "knownGene", "name", cond_str);
        }
    }
else
    {
    mrnaID = NULL;
    positionStr = NULL;
    }

sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", proteinID);
description = sqlGetField(protDbName, "spXref3", "description", cond_str);

if (positionStr != NULL)
    {
    chp = strstr(positionStr, ":");
    *chp = '\0';
    prevGBChrom = cloneString(positionStr);

    chp1 = chp + 1;
    chp9 = strstr(chp1, "-");
    *chp9 = '\0';
    prevGBStartPos = atoi(chp1);
    chp1 = chp9 + 1;
    prevGBEndPos   = atoi(chp1);
    }
else
    {
    prevGBChrom    = NULL;
    prevGBStartPos = -1;
    prevGBEndPos   = -1;
    }

/* Do main display. */
if (cgiVarExists("pbt.psOutput"))
    handlePostscript();
else
    {
    doTrackForm(NULL, NULL);
    }
}
static void synonymPrint(struct section *section, 
	struct sqlConnection *conn, char *id)
/* Print out SwissProt comments - looking up typeId/commentVal. */
{
char *protAcc = getSwissProtAcc(conn, spConn, id);
char *spDisplayId;
char *refSeqAcc = "";
char *mrnaAcc = "";
char *oldDisplayId;
char condStr[255];
char *kgProteinID;
char *parAcc; /* parent accession of a variant splice protein */
char *chp;

if (isRgdGene(conn))
    {
    rgdGene2SynonymPrint(section,conn, id);
    return;
    }
if (sqlTablesExist(conn, "kgAlias"))
    printAlias(id, conn);
if (sameWord(genome, "Zebrafish"))
    {
    char *xrefTable = "ensXRefZfish";
    char *geneIdCol = "ensGeneId";
    /* get Gene Symbol and RefSeq accession from Zebrafish-specific */
    /* cross-reference table */
    printGeneSymbol(id, xrefTable, geneIdCol, conn);
    refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn);
    hPrintf("<B>ENSEMBL ID:</B> %s", id);
    }
else
    {
    char query[256];
    char *toRefTable = genomeOptionalSetting("knownToRef");
    if (toRefTable != NULL && sqlTableExists(conn, toRefTable))
        {
	safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable,
		id);
	refSeqAcc = emptyForNull(sqlQuickString(conn, query));
	}
    if (sqlTableExists(conn, "kgXref"))
	{
	safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id);
	mrnaAcc = emptyForNull(sqlQuickString(conn, query));
	}
    if (sameWord(genome, "C. elegans"))
	hPrintf("<B>WormBase ID:</B> %s<BR>", id);
    else
	hPrintf("<B>UCSC ID:</B> %s<BR>", id);
    }
    
if (refSeqAcc[0] != 0)
    {
    hPrintf("<B>RefSeq Accession: </B> <A HREF=\"");
    printOurRefseqUrl(stdout, refSeqAcc);
    hPrintf("\">%s</A><BR>\n", refSeqAcc);
    }
else if (mrnaAcc[0] != 0)
    {
    safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc);
    if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL)
        {
    	hPrintf("<B>Representative RNA: </B> <A HREF=\"");
    	printOurMrnaUrl(stdout, mrnaAcc);
    	hPrintf("\">%s</A><BR>\n", mrnaAcc);
    	}
    else
    /* do not show URL link if it is not found in gbCdnaInfo */
    	{
    	hPrintf("<B>Representative RNA: %s </B>", mrnaAcc);
    	}
    }
if (protAcc != NULL)
    {
    kgProteinID = cloneString("");
    if (hTableExists(sqlGetDatabase(conn), "knownGene")
        && (isNotEmpty(cartOptionalString(cart, hggChrom)) &&
	      differentWord(cartOptionalString(cart, hggChrom),"none")))
    	{
    	safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%s and txEnd=%s", 
	        id, cartOptionalString(cart, hggChrom), 
    	        cartOptionalString(cart, hggStart), 
		cartOptionalString(cart, hggEnd));
    	kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr);
    	}

    hPrintf("<B>Protein: ");
    if (strstr(kgProteinID, "-") != NULL)
        {
	parAcc = cloneString(kgProteinID);
	chp = strstr(parAcc, "-");
	*chp = '\0';
	
        /* show variant splice protein and the UniProt link here */
	hPrintf("<A HREF=\"http://www.uniprot.org/uniprot%s\" "
	    "TARGET=_blank>%s</A></B>, splice isoform of ",
	    kgProteinID, kgProteinID);
        hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" "
	    "TARGET=_blank>%s</A></B>\n",
	    parAcc, parAcc);
	}
    else
        {
        hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" "
	    "TARGET=_blank>%s</A></B>\n",
	    protAcc, protAcc);
	}
    /* show SWISS-PROT display ID if it is different than the accession ID */
    /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */
    spDisplayId = spAnyAccToId(spConn, protAcc);
    if (spDisplayId == NULL) 
    	{
	errAbort("<br>%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc);
	}
	
    if (strstr(spDisplayId, protAcc) == NULL)
	{
	hPrintf(" (aka %s", spDisplayId);
	/* show once if the new and old displayId are the same */
 	oldDisplayId = oldSpDisplayId(spDisplayId);
	if (oldDisplayId != NULL)
 	    {
            if (!sameWord(spDisplayId, oldDisplayId)
                && !sameWord(protAcc, oldDisplayId))
	    	{
	    	hPrintf(" or %s", oldDisplayId);
	    	}
	    }
	hPrintf(")<BR>\n");
	}
    }
printCcds(id, conn);

}
Example #5
0
int getSuperfamilies(char *proteinID)
/* preserved here for previous older genomes.
   Newer genomes should be using getSuperfamilies2(). 6/16/04 Fan*/
{
struct sqlConnection *conn, *conn2;
char query[MAXNAMELEN];
struct sqlResult *sr;
char **row;

char cond_str[255];

char *genomeID, *seqID, *modelID, *eValue, *sfID, *sfDesc;

char *region;
int  done;

char *ensPep;
char *transcriptName;

char *chp, *chp2;
int  ii = 0;
int  int_start, int_end;

   
if (!hTableExists(database, "sfAssign")) return(0);
 
conn  = hAllocConn(database);
conn2 = hAllocConn(database);

if (hTableExists(database, "ensemblXref3")) 
    {	
    /* use ensemblXref3 for Ensembl data release after ensembl34d */
    sqlSafefFrag(cond_str, sizeof(cond_str), "tremblAcc='%s'", proteinID);
    ensPep = sqlGetField(database, "ensemblXref3", "protein", cond_str);
    if (ensPep == NULL)
	{
   	sqlSafefFrag(cond_str, sizeof(cond_str), "swissAcc='%s'", proteinID);
   	ensPep = sqlGetField(database, "ensemblXref3", "protein", cond_str);
	if (ensPep == NULL) return(0);
	}
    }
else
    {
    if (! (hTableExists(database, "ensemblXref") || hTableExists(database, "ensTranscript") ) )
       return(0);
    
    /* two steps query needed because the recent Ensembl gene_xref 11/2003 table does not have 
       valid translation_name */
    sqlSafefFrag(cond_str, sizeof(cond_str), "external_name='%s'", protDisplayID);
    transcriptName = sqlGetField(database, "ensGeneXref", "transcript_name", cond_str);
    if (transcriptName == NULL)
        {
        return(0); 
        }
    else
        {
        sqlSafefFrag(cond_str, sizeof(cond_str), "transcript_name='%s';", transcriptName);
        ensPep = sqlGetField(database, "ensTranscript", "translation_name", cond_str);
        if (ensPep == NULL) 
	    {
	    hFreeConn(&conn);
    	    return(0); 
    	    }
    	}
    }

ensPepName = ensPep;

sqlSafef(query, sizeof(query), "select * from %s.sfAssign where seqID='%s' and evalue <= 0.02;", database, ensPep);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if (row == NULL) return(0);
    
while (row != NULL)
    {      
    genomeID = row[0];
    seqID    = row[1];
    modelID  = row[2];
    region   = row[3];
    eValue   = row[4];
    sfID     = row[5];
    /* sfDesc   = row[6]; */
    /* !!! the recent Suprefamily sfAssign table does not have valid sf description */
    sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s;", sfID);
    sfDesc = sqlGetField(database, "sfDes", "description", cond_str);

    /* !!! refine logic here later to be defensive against illegal syntax */
    chp = region;
    done = 0;
    while (!done)
	{
	chp2  = strstr(chp, "-");
	*chp2 = '\0';
	chp2++;

	sscanf(chp, "%d", &int_start);
			
	chp = chp2;
	chp2  = strstr(chp, ",");
	if (chp2 != NULL) 
	    {
	    *chp2 = '\0';
	    }
	else
	    {
	    done = 1;
	    }
	chp2++;
	sscanf(chp, "%d", &int_end);

 	sfId[ii]    = atoi(sfID);
	sfStart[ii] = int_start;
	sfEnd[ii]   = int_end;
	strncpy(superfam_name[ii], sfDesc, MAXNAMELEN-1);
	ii++;
	chp = chp2;
	}

    row = sqlNextRow(sr);
    }

sqlFreeResult(&sr);
hFreeConn(&conn);
hFreeConn(&conn2);
  
return(ii);
}
Example #6
0
void doAnomalies(char *aa, int len, int *yOffp)
/* draw the AA Anomalies track */
{
char res;
int index;

char cond_str[255];
char *answer;
    
int xx, yy;
int i, j;
	
char *chp;
int aaResCnt[20];
double aaResFreqDouble[20];
int abnormal;
int ia = -1;
double pctLow[20], pctHi[20];

/* count frequency for each residue for current protein */
chp = aa;
for (j=0; j<20; j++) 
    {
    aaResCnt[j] = 0;
       
    /* get cutoff threshold value pairs */
    sqlSafefFrag(cond_str, sizeof(cond_str), "AA='%c'", aaAlphabet[j]);
    answer = sqlGetField(database, "pbAnomLimit", "pctLow", cond_str);
    pctLow[j] = (double)(atof(answer));
    answer = sqlGetField(database, "pbAnomLimit", "pctHi", cond_str);
    pctHi[j] = (double)(atof(answer));
    }

for (i=0; i<len; i++)
    {
    for (j=0; j<20; j++)
        {
        if (*chp == aaChar[j])
            {
            aaResCnt[j] ++;
            break;
	    }
        }
    chp++;
    }

for (j=0; j<20; j++)
    {
    aaResFreqDouble[j] = ((double)aaResCnt[j])/((double)len);
    }

currentYoffset = *yOffp;
    
for (index=0; index < len; index++)
    {
    res = aa[index];
    
    ia = -1;
    for (j=0; j<20; j++)
	{
	if (res == aaChar[j])
	    {
	    ia = j;
	    break;
	    }
	}

    /* skip non-standard AA alphabets */
    if (ia == -1) continue;

    calxy(index, *yOffp, &xx, &yy);

    abnormalColor = pbRed;
    abnormal = chkAnomaly(aaResFreqDouble[ia], pctLow[ia], pctHi[ia]);
    if (abnormal > 0)
	{
	vgBox(g_vg, xx, yy-5, 1*pbScale, 5, abnormalColor);
	}
    else
	{
	if (abnormal < 0)
	    {
	    vgBox(g_vg, xx, yy, 1*pbScale, 5, abnormalColor);
	    }
	}
    vgBox(g_vg, xx, yy, 1*pbScale, 1, MG_BLACK);
    }

calxy0(0, *yOffp, &xx, &yy);
vgBox(g_vg, 0, yy-10, xx, 20, bkgColor);

trackTitle = cloneString("AA Anomalies");
vgTextRight(g_vg, xx-25, yy-4, 10, 10, MG_BLACK, g_font, trackTitle);
trackTitleLen = strlen(trackTitle);
mapBoxTrackTitle(xx-25-trackTitleLen*6, yy-6, trackTitleLen*6+12, 14, trackTitle, "pepAnom");

/* update y offset */
*yOffp = *yOffp + 15;
}
Example #7
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn;
    
FILE *inf;
FILE *o1;

char cond_str[256];
char *database;
char *proteinFileName;
char *outputFileName;
char *answer;
char *alias;

char *id;
char *chp0, *chp1, *chp2, *chp;

char *kgID;
char line[2000];

if (argc != 4) usage();
    
database         = cloneString(argv[1]);
proteinFileName  = cloneString(argv[2]);
outputFileName   = cloneString(argv[3]);

conn = hAllocConn(database);

o1 = mustOpen(outputFileName, "w");
    
if ((inf = mustOpen(proteinFileName, "r")) == NULL)
    {		
    fprintf(stderr, "Can't open file %s.\n", proteinFileName);
    exit(8);
    }
	
while (fgets(line, 1000, inf) != NULL)
    {
    chp = strstr(line, "ID   ");
    if (chp != line)
	{
	fprintf(stderr, "expected ID line, but got: %s\n", line);
	exit(1);
	} 
    chp = chp + strlen("ID   ");
    id = chp;
    chp = strstr(id, " ");
    *chp = '\0';
    id = strdup(id);
        
    sqlSafefFrag(cond_str, sizeof cond_str, "proteinID = '%s'", id);
    answer = sqlGetField(database, "knownGene", "name", cond_str);
    kgID = NULL;
    if (answer != NULL)
	{
	kgID = strdup(answer);
	}

    if (fgets(line, 1000, inf) == NULL) 
	{
	break;
	}
    do 
	{
	/* "//" signal end of a record */		
	if ((line[0] == '/') && (line[1] == '/')) break;

	// work on GN (Gene Name) line only
	chp = strstr(line, "GN   ");
	if (chp != NULL)
	    {
	    chp = line + strlen(line) -2;
	    if (*chp == '.') 
		{
		*chp = '\0';
		}
	    else
		{
		chp++;
		*chp = '\0';
		}
	    		
	    chp0 = line + 5;
	    while (chp0 != NULL)
	    	{
            	while (*chp0 == ' ') chp0++;

            	chp1 = strstr(chp0, " OR ");
            	chp2 = strstr(chp0, " AND ");

		chp = NULL;
		if (chp1 != NULL)
		    {
		    if (chp2 != NULL)
			{	
			if (chp1 < chp2)
			    {
			    chp = chp1;
			    }
			else
			    {
			    chp = chp2;
			    }
			}
		    else
			{
			chp = chp1;
			}
		    }

		if (chp2!= NULL)
		    {
		    if (chp1 != NULL)
			{	
			if (chp1 < chp2)
			    {
			    chp = chp1;
			    }
			else
			    {
			    chp = chp2;
			    }
			}
		    else
			{
			chp = chp2;
			}
		    }

            	if (chp == NULL)
            	    {
                    alias = strdup(chp0);
                    chp0 = NULL;
                    }
            	else 
                    {
                    *chp = '\0';
                    alias = strdup(chp0);
                    chp0 = chp+4;
                    }

 	    	if (kgID != NULL)
		    {
		    // clean up "(XXXX" or "XXXX)"
		    if (*alias == '(') alias++;
		    chp = strstr(alias, ")");
		    if (chp != NULL) *chp = '\0';

		    fprintf(o1, "%s\t%s\n", kgID, alias);
		    }
	    	}
	    }
    	} while (fgets(line, 1000, inf) != NULL);
    }
fclose(o1);
hFreeConn(&conn);
return(0);
}
static void gadPrint(struct section *section, 
	struct sqlConnection *conn, char *geneId)
/* Print out GAD section. */
{
int refPrinted = 0;
boolean showCompleteGadList;

char condStr[256];
char query[256];
struct sqlResult *sr;
char **row;
struct dyString *currentCgiUrl;
char *upperDisease;

char *url = 
cloneString("http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=gene=");
char *itemName;

if (url != NULL && url[0] != 0)
    {
    safef(condStr, sizeof(condStr), 
    "k.kgId='%s' and k.geneSymbol = g.geneSymbol", geneId);
    itemName = sqlGetField(database, "kgXref k, gadAll g", "k.geneSymbol", condStr);
    showCompleteGadList = FALSE;
    if (cgiOptionalString("showAllRef") != NULL)
    	{
        if (sameWord(cgiOptionalString("showAllRef"), "Y") ||
	    sameWord(cgiOptionalString("showAllRef"), "y") )
	    {
	    showCompleteGadList = TRUE;
	    }
	}
    currentCgiUrl = cgiUrlString();
   
    printf("<B>Genetic Association Database: ");
    printf("<A HREF=\"%s'%s'\" target=_blank>", url, itemName);
    printf("%s</B></A>\n", itemName);

    printf("<BR><B>CDC HuGE Published Literature:  ");
    printf("<A HREF=\"%s%s%s\" target=_blank>", 
           "http://hugenavigator.net/HuGENavigator/searchSummary.do?firstQuery=",
           itemName, 
	   "&publitSearchType=now&whichContinue=firststart&check=n&dbType=publit&Mysubmit=go");
    printf("%s</B></A>\n", itemName);

    /* List diseases associated with the gene */
    safef(query, sizeof(query),
    "select distinct broadPhen from gadAll where geneSymbol='%s' and association = 'Y' order by broadPhen",
    itemName);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    
    if (row != NULL) 
    	{
	upperDisease = replaceChars(row[0], "'", "''");
	touppers(upperDisease);
	printf("<BR><B>Positive Disease Associations:  </B>");
	printf("<A HREF=\"%s%s%s%s%s\" target=_blank>",
	"http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=upper(DISEASE)%20like%20'%25",
	cgiEncode(upperDisease), "%25'%20AND%20upper(GENE)%20%20like%20'%25", itemName, "%25'");
	printf("%s</B></A>\n", row[0]);
        row = sqlNextRow(sr);
    	}
    while (row != NULL)
        {
	upperDisease = replaceChars(row[0], "'", "''");
	touppers(upperDisease);
	printf(", <A HREF=\"%s%s%s%s%s\" target=_blank>",
	"http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=upper(DISEASE)%20like%20'%25",
	cgiEncode(upperDisease), "%25'%20AND%20upper(GENE)%20%20like%20'%25", itemName, "%25'");
	printf("%s</B></A>\n", row[0]);
        row = sqlNextRow(sr);
	}
    sqlFreeResult(&sr);

    refPrinted = 0;
    safef(query, sizeof(query), 
       "select broadPhen,reference,title,journal, pubMed, conclusion from gadAll where geneSymbol='%s' and association = 'Y' order by broadPhen",
       itemName);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    
    if (row != NULL) printf("<BR><B>Related Studies: </B><OL>");
    while (row != NULL)
        {
        printf("<LI><B>%s </B>", row[0]);

	printf("<br>%s, %s, %s.\n", row[1], row[2], row[3]);
	if (!sameWord(row[4], ""))
	    {
	    printf(" [PubMed ");
	    printf("<A HREF=\"%s%s%s'\" target=_blank>",
	    "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=Abstract&list_uids=",
	    row[4],"&query_hl=1&itool=genome.ucsc.edu");
	    printf("%s</B></A>]\n", row[4]);
	    }
	printf("<br><i>%s</i>\n", row[5]);
	
	printf("</LI>\n");
        refPrinted++;
        if ((!showCompleteGadList) && (refPrinted >= 3)) break;
	row = sqlNextRow(sr);
    	}
    sqlFreeResult(&sr);
    printf("</OL>");
    
    if ((!showCompleteGadList) && (row != NULL))
    	{
        printf("<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; more ...  </B>");
        printf(
	      "<A HREF=\"%s?showAllRef=Y&%s&#35;gad\">click here to view the complete list</A> ", 
	      "hgGene", currentCgiUrl->string);
    	}
    }
}
Example #9
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn2, *conn3, *conn4;
 
char query2[256], query3[256];
struct sqlResult *sr2, *sr3;
char **row2, **row3;

char *accession;
char *extDB;
char *extAC;
char condStr[255];

char *id, *subId, *avStr, *pos;
char *baseAAStr, *subsAAStr;
char baseAA, subsAA;

char *genomeDb;
char *aaSeq;
char ch;

int  aaPos, aaLen;
int nTotal = 0;
int nOK    = 0;
int nBase  = 0;
int nErr   = 0;
int nSubs  = 0;
boolean gotAMatch = FALSE;

FILE   *outf;

if (argc != 3) usage();
genomeDb = argv[1];
   
outf = fopen(argv[2], "w");
conn2= hAllocConn();
conn3= hAllocConn();
conn4= hAllocConn();
	
/* loop thru all recordd in the omimAvPos table */
sqlSafef(query2, sizeof query2, "select * from %s.omimAvPos", genomeDb);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    id 		= row2[0];
    subId 	= row2[1];
    avStr	= row2[2];
    pos 	= row2[3];
    baseAAStr	= row2[4];
    subsAAStr	= row2[5]; 
    
    baseAA	= *baseAAStr;
    subsAA	= *subsAAStr;
    aaPos 	= atoi(pos);
    
    /* find corresponding protein for each OMIM record */
    sqlSafef(query3, sizeof query3,  
        "select distinct accession, extDB, extAC from %s.spXref2 where extAC='%s' and extDB='MIM';",
    	    PROTEOME_DB_NAME, id);

    sr3 = sqlMustGetResult(conn3, query3);
    row3 = sqlNextRow(sr3);

    while (row3 != NULL)
	{
   	accession = row3[0];
	extDB	  = row3[1];     
	extAC	  = row3[2];

	nTotal++;
        
	gotAMatch = FALSE;
	
	/* get protein sequence */
	sqlSafefFrag(condStr, sizeof condStr, "acc='%s'", accession);
	aaSeq = sqlGetField(UNIPROT_DB_NAME, "protein", "val", condStr);
	aaLen = strlen(aaSeq);

	/* check AA (both base and substitition) of the AV entry against 
	   AA in the protein sequence */
	if (aaPos <= aaLen)
	    {
	    ch = *(aaSeq+aaPos-1);
	    if (ch == baseAA)
	    	{
		gotAMatch = TRUE;
		nOK++;
		nBase++;
		}
	    else
	    	{
	    	if (ch == subsAA)
	    	    {
		    gotAMatch = TRUE;
		    nOK++;
		    nSubs++;
		    }
		}
	    
	    if (gotAMatch) 
	    	{
	        fprintf(outf, "%s\t%s\t%s\t%s\n", id, subId, accession, pos);
	    	}
	    else 
	    	{
		nErr++;
		}
	    }
	else
	    {
	    nErr++;
	    }
	
	row3 = sqlNextRow(sr3);
	}
    sqlFreeResult(&sr3);
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

hFreeConn(&conn2);
hFreeConn(&conn3);
fclose(outf);

fprintf(stderr, "nTotal\t= %6d\n", nTotal);
fprintf(stderr, "nOk\t= %6d\n", nOK);
fprintf(stderr, "nBase\t= %6d\n", nBase);
fprintf(stderr, "nSub\t= %6d\n", nSubs);
fprintf(stderr, "nErr\t= %6d\n", nErr);

return(0);
}
Example #10
0
int main(int argc, char *argv[])
{
    struct sqlConnection *conn, *conn2, *conn3, *conn5;
    char query2[256], query3[256], query5[256];
    struct sqlResult *sr2, *sr3, *sr5;
    char **row2, **row3, **row5;
    char cond_str[512];

    char *chp;
    FILE *o1, *o2;

    boolean hasKGmRNA;

    char *proteinDisplayID;
    char *gbAC;

    char *locusID;		/* LocusLink ID */
    char *refAC;		/* Refseq accession.version */
    char *giNCBI2;		/* NCBI gi for the protein record associated with the CDS */
    char *revStatus;	/* review status */
    char *proteinAC2;	/* protein accession.version */
    char *taxID2;		/* tax id */

    char *protDbName;

    char *refSeq;

    char *name, *chrom, *strand, *txStart, *txEnd, *cdsStart, *cdsEnd,
         *exonCount, *exonStarts, *exonEnds;

    char *gseq, *hseq, *swissprot;
    int alignmentID=0;

    if (argc != 4) usage();

    dbName = argv[1];
    protDbName = argv[2];
    genomeReadOnly = argv[3];

    sprintf(tempDbName, "%sTemp", dbName);

    hSetDb(genomeReadOnly);

    conn = hAllocConn();
    conn2= hAllocConn();
    conn3= hAllocConn();
    conn5= hAllocConn();


    o1 = fopen("dnaGene.tab", "w");
    o2 = fopen("j.dat", "w");

// scan all RefSeq entries

    sqlSafef(query2, sizeof query2, "select * from %s.locus2Ref0;", tempDbName);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    while (row2 != NULL)
    {
        locusID 	= row2[0];
        refAC 	= row2[1];
        giNCBI2 	= row2[2];
        revStatus 	= row2[3];
        proteinAC2 	= row2[4];
        taxID2 	= row2[5];

        refSeq = strdup(refAC);
        chp = strstr(refAC, ".");
        if (chp != NULL) *chp = '\0';

        proteinDisplayID = NULL;

        /* check if the locusID of this RefSeq points to a KG mRNA */
        hasKGmRNA = checkMrna(locusID);

        /* check if this RefSeq has 'g' type sequence(s) referenced */
        sqlSafefFrag(cond_str, sizeof cond_str, "locusID=%s and seqType='g';", locusID);
        gseq = sqlGetField(tempDbName, "locus2Acc0", "gbac", cond_str);

        /* process only 'g' type record which does not have corresponding KG entry */
        if ((!hasKGmRNA) && (gseq != NULL))
        {
            sqlSafefFrag(cond_str, sizeof cond_str, "name='%s'", refAC);
            hseq = sqlGetField(genomeReadOnly, "refGene", "name", cond_str);
            if (hseq != NULL)
            {
                sqlSafefFrag(cond_str, sizeof cond_str, "refseq='%s';", refAC);
                swissprot = sqlGetField(protDbName, "hugo", "swissprot", cond_str);
                if (swissprot != NULL)
                {
                    if (strlen(swissprot) >0)
                    {
                        // HUGO has an entry with swissprot ID, get display ID
                        sqlSafefFrag(cond_str, sizeof cond_str, "accession='%s';", swissprot);
                        proteinDisplayID = sqlGetField(protDbName,
                                                       "spXref2", "displayID", cond_str);
                        if (proteinDisplayID == NULL)
                        {
                            fprintf(stderr, "%s: a HUGO.swissprot, ", swissprot);
                            fprintf(stderr, "but not a SP Primary AC.\n");
                            fflush(stdout);
                        }
                    }
                    else
                    {
                        //printf("HGNC has a non-NULL but empty swissprot field ");
                        //printf("for %s\n", refAC);fflush(stdout);
                    }
                }

                // not finding it in HUGO does not mean not a valid one for sure
                if (proteinDisplayID == NULL)
                {
                    // get gbAC and check if spXref2 actually has it
                    sqlSafef(query3, sizeof query3, "select gbAC from %s.locus2Acc0 where locusID=%s;",
                             tempDbName, locusID);
                    sr3 = sqlMustGetResult(conn3, query3);
                    row3 = sqlNextRow(sr3);

                    while (row3 != NULL)
                    {
                        gbAC = row3[0];
                        chp = strstr(gbAC, ".");
                        if (chp != NULL) *chp = '\0';
                        sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", gbAC);
                        proteinDisplayID = sqlGetField(protDbName,
                                                       "spXref2", "displayID", cond_str);
                        if (proteinDisplayID == NULL)
                        {
                            //printf("%s %s is in refGene, but has no SWISS-PROT.\n",
                            //	locusID, refAC);
                            //fflush(stdout);
                        }
                        else
                        {
                            //printf("%s %s got 2nd chance.\n", refAC, gbAC);fflush(stdout);
                            break;
                        }
                        row3 = sqlNextRow(sr3);
                    }
                    sqlFreeResult(&sr3);
                }

                if (proteinDisplayID != NULL)
                {
                    // generate KG entry

                    sqlSafef(query5, sizeof query5, "select * from %s.refGene where name='%s';", genomeReadOnly, refAC);
                    sr5 = sqlMustGetResult(conn5, query5);
                    row5 = sqlNextRow(sr5);
                    while (row5 != NULL)
                    {
                        name 	= row5[0];
                        chrom 	= row5[1];
                        strand	= row5[2];
                        txStart 	= row5[3];
                        txEnd   	= row5[4];
                        cdsStart	= row5[5];
                        cdsEnd	= row5[6];
                        exonCount   = row5[7];
                        exonStarts  = row5[8];
                        exonEnds    = row5[9];

                        fprintf(o1, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tdna%d\n",
                                name,
                                chrom,
                                strand,
                                txStart,
                                txEnd,
                                cdsStart,
                                cdsEnd,
                                exonCount,
                                exonStarts,
                                exonEnds,

                                proteinDisplayID,
                                alignmentID);
                        alignmentID++;

                        fprintf(o2, "%s\t%c\t%s\n", name, 'g', proteinAC2);
                        row5 = sqlNextRow(sr5);
                    }
                    sqlFreeResult(&sr5);
                }
            }
        }
        row2 = sqlNextRow(sr2);
    }

    fclose(o1);
    fclose(o2);
    sqlFreeResult(&sr2);
    hFreeConn(&conn);
    hFreeConn(&conn2);
    hFreeConn(&conn5);
    system("sort j.dat|uniq >dnaLink.tab");
    system("rm j.dat");
    return(0);
}
void doStamps(char *proteinID, char *mrnaID, char *aa, struct vGfx *vg, int *yOffp)
/* draw proteome browser stamps */
{
int i,j,l;

char cond_str[200];
char *valStr;
char valStr2[50];
char *answer;
double pI=0.0;
double exonCount;
char *chp;
int len;
int cCnt;

int xPosition;
int yPosition;
int stampWidth, stampHeight;

int aaResCnt[30];
double aaResFreqDouble[30];
int aaResFound;
int totalResCnt;

double molWeight=0.0;
double hydroSum;
struct pbStamp *stampDataPtr;

for (j=0; j<23; j++)
    {
    aaResCnt[j] = 0;
    }

l=len = strlen(aa);
chp = aa;
for (i=0; i<l; i++)
    {
    aaResFound = 0;
    for (j=0; j<23; j++)
    	{
        if (*chp == aaAlphabet[j])
            {
            aaResFound = 1;
            aaResCnt[j] ++;
            }
        }
    chp++;
    }

totalResCnt = 0;
for (i=0; i<23; i++)
    {
    totalResCnt = totalResCnt + aaResCnt[i];
    }

for (i=0; i<20; i++)
    {
    aaResFreqDouble[i] = ((double)aaResCnt[i])/((double)totalResCnt);
    }

AllocVar(stampPictPtr);

stampWidth  = 75*(1+pbScale/3);
stampHeight = 60*(1+pbScale/3);
xPosition   = 15;
yPosition   = *yOffp + 135;
if (pbScale >= 6) yPosition = yPosition + 20;

boundaryColor = vgFindColorIx(g_vg, 170, 170, 170);

/* draw pI stamp */

safef(cond_str, sizeof(cond_str), "accession='%s'", proteinID);
answer = sqlGetField(database, "pepPi", "count(*)", cond_str);

/* either 0 or multiple rows are not valid */
if (strcmp(answer, "1") == 0)
    {
    answer = sqlGetField(database, "pepPi", "pI", cond_str);
    pI     = (double)atof(answer);
    stampDataPtr = getStampData("pepPi");
    setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    drawPbStamp(stampDataPtr, stampPictPtr);
    drawXScaleNonInt(stampDataPtr, stampPictPtr, 2);
    safef(valStr2, sizeof(valStr2), "%.1f", pI);
    markStamp(stampDataPtr, stampPictPtr, pI, valStr2, tx, ty);
    pbStampFree(&stampDataPtr);
    }
else
    {
    stampDataPtr = getStampData("pepPi");
    setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    drawPbStamp(stampDataPtr, stampPictPtr);
    drawXScale(stampDataPtr, stampPictPtr, 2);
    safef(valStr2, sizeof(valStr2), "N/A");
    markStamp0(stampDataPtr, stampPictPtr, pI, valStr2, tx, ty);
    pbStampFree(&stampDataPtr);
    }

/* skip Mol Wt, if it is GSID */
if (!hIsGsidServer())
    {
    /* draw Mol Wt stamp */
    safef(cond_str, sizeof(cond_str), "accession='%s'", proteinID);
    answer = sqlGetField(database, "pepMwAa", "MolWeight", cond_str);
    if (answer != NULL)
    	{
    	safef(valStr2, sizeof(valStr2), "%s Da", answer);
    	molWeight  = (double)atof(answer);
    	stampDataPtr = getStampData("pepMolWt");
    	xPosition = xPosition + stampWidth + stampWidth/8;
    	setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    	drawPbStamp(stampDataPtr, stampPictPtr);
    	drawXScaleMW(stampDataPtr, stampPictPtr, 50000);
    	markStamp(stampDataPtr, stampPictPtr, molWeight, valStr2, tx, ty);
    	pbStampFree(&stampDataPtr);
    	}
    else
    	{
    	safef(valStr2, sizeof(valStr2), "N/A");
    	stampDataPtr = getStampData("pepMolWt");
    	xPosition = xPosition + stampWidth + stampWidth/8;
    	setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    	drawPbStamp(stampDataPtr, stampPictPtr);
    	drawXScaleMW(stampDataPtr, stampPictPtr, 50000);
    	markStamp0(stampDataPtr, stampPictPtr, molWeight, valStr2, tx, ty);
    	pbStampFree(&stampDataPtr);
    	}
    }
    
if (!proteinInSupportedGenome)
	{
	if (!hIsGsidServer())
	    xPosition = xPosition + stampWidth + stampWidth/8;
	goto skip_exon;
	}
	
/* draw exon count stamp */
if (kgVersion == KG_III)
    {
    safef(cond_str, sizeof(cond_str), "qName='%s'", mrnaID);
    }
else
    {
    safef(cond_str, sizeof(cond_str), "qName='%s'", proteinID);
    }
answer = sqlGetField(database, kgProtMapTableName, "blockCount", cond_str);
if (answer != NULL)
    {
    valStr       = cloneString(answer);
    exonCount    = (double)atoi(answer);
    stampDataPtr = getStampData("exonCnt");
    xPosition = xPosition + stampWidth + stampWidth/8;
    setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    drawPbStamp(stampDataPtr, stampPictPtr);
    drawXScale(stampDataPtr, stampPictPtr, 5);
    markStamp(stampDataPtr, stampPictPtr, exonCount, valStr, tx, ty);
    pbStampFree(&stampDataPtr);
    }
skip_exon:

if (!hIsGsidServer())
    {
    /* draw AA residual anomolies stamp */
    if (answer != NULL)
    	{
    	stampDataPtr = getStampData("pepRes");
    	xPosition = xPosition + stampWidth + stampWidth/8;
    	setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, 
		       3*stampWidth/2, stampHeight);
    	drawPbStamp(stampDataPtr, stampPictPtr);
    	for (i=0; i<20; i++)
	    {
            markResStamp(aaAlphabet[i], stampDataPtr, stampPictPtr, i, aaResFreqDouble[i], 
			tx, ty, avg, stddev);
	    }
    	pbStampFree(&stampDataPtr);
    	}

    xPosition = 15;
    yPosition = yPosition + 170;
    }

/* skip swInterPro if it is GSID */
if (!hIsGsidServer())
    {

    /* draw family size stamp */
    safef(cond_str, sizeof(cond_str), "accession='%s'", proteinID);
    answer = sqlGetField(protDbName, "swInterPro", "count(*)", cond_str);
    if (answer != NULL)
    	{
    	valStr       = cloneString(answer);
    	stampDataPtr = getStampData("intPCnt");
    	setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    	drawPbStamp(stampDataPtr, stampPictPtr);
    	drawXScale(stampDataPtr, stampPictPtr, 1);
    	markStamp(stampDataPtr, stampPictPtr, (double)(atoi(answer)), valStr, tx, ty);
    	pbStampFree(&stampDataPtr);
    	}
    else
    	{
    	valStr       = cloneString("N/A");
    	stampDataPtr = getStampData("intPCnt");
    	setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    	drawPbStamp(stampDataPtr, stampPictPtr);
    	drawXScale(stampDataPtr, stampPictPtr, 1);
    	markStamp0(stampDataPtr, stampPictPtr, (double)(atoi(answer)), valStr, tx, ty);
    	pbStampFree(&stampDataPtr);
    	}
    }

/* draw hydrophobicity stamp */
chp      = protSeq;
hydroSum = 0;
for (i=0; i<protSeqLen; i++)
    {
    hydroSum = hydroSum + aa_hydro[(int)(*chp)];
    chp++;
    }
stampDataPtr = getStampData("hydro");
xPosition = xPosition + stampWidth + stampWidth/8;
setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
drawPbStamp(stampDataPtr, stampPictPtr);
drawXScaleHydro(stampDataPtr, stampPictPtr, 1.0);
safef(valStr2, sizeof(valStr2), "%.1f", hydroSum/(double)len);
markStamp(stampDataPtr, stampPictPtr, hydroSum/(double)len, valStr2, tx, ty);
pbStampFree(&stampDataPtr);

/* draw Cystein Count stamp */
chp  = protSeq;
cCnt = 0;
for (i=0; i<len; i++)
    {
    if (*chp == 'C') cCnt ++;
    chp++;
    }
stampDataPtr = getStampData("cCnt");
xPosition = xPosition + stampWidth + stampWidth/8;
setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
drawPbStamp(stampDataPtr, stampPictPtr);
drawXScale(stampDataPtr, stampPictPtr, 10);
safef(valStr2, sizeof(valStr2), "%d", cCnt);
markStamp(stampDataPtr, stampPictPtr, (double)cCnt, valStr2, tx, ty);
pbStampFree(&stampDataPtr);

/* if it is GSID, draw AA residual anomolies here */
if (hIsGsidServer())
    {
    xPosition = 15;
    yPosition = yPosition + 170;

    /* draw AA residual anomolies stamp */
    if (answer != NULL)
    	{
    	stampDataPtr = getStampData("pepRes");
        setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, 
	3*stampWidth/2, stampHeight);
    	drawPbStamp(stampDataPtr, stampPictPtr);
    	for (i=0; i<20; i++)
            {
            markResStamp(aaAlphabet[i], stampDataPtr, stampPictPtr, i, aaResFreqDouble[i],
                         tx, ty, avg, stddev);
            }
    	pbStampFree(&stampDataPtr);
    	}
    }

/* draw AA residual anomolies stddev stamp */
if (answer != NULL)
    {
    exonCount    = (double)atof(answer);
    stampDataPtr = getStampData("pepRes");
if (hIsGsidServer())
    {
    xPosition = xPosition + stampWidth*1.62 + stampWidth/8;
    }
else
    {
    xPosition = xPosition + stampWidth + stampWidth/8;
    }

    setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, 3*stampWidth/2, stampHeight);
    
    stampDataPtr->ymin = -4.0;
    stampDataPtr->ymax =  4.0;
    for (i=0; i<20; i++)
	{
        markResStdvStamp(stampDataPtr, stampPictPtr, i, aaResFreqDouble[i], tx, ty, avg, stddev);
	}

    /* draw background after bars drawn so that "... stddev" labels do not get covered by bars */
    stampDataPtr = getStampData("pepRes");
    setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, 3*stampWidth/2, stampHeight);
    drawPbStampB(stampDataPtr, stampPictPtr);

    pbStampFree(&stampDataPtr);
    }

/* The follwing section was used to plot freq distribution for each AA so that we can view than to decide on 
   whether +/- 2 stddev is applicable and what cutoff thresholds to use.  Keep it here for possible 
   future reuse. */
/*
vertLabel = cloneString("Frequency");
for (i=strlen(vertLabel)-1; i>=0; i--)
    {
    vertLabel[i+1] = '\0';
    vgTextCentered(g_vg, 3, 45+i*10, 10, 10, MG_BLACK, g_font, vertLabel+i);
    vgTextCentered(g_vg, 3, 215+i*10, 10, 10, MG_BLACK, g_font, vertLabel+i);
    }

xPosition = xPosition + 80;
for (j=0; j<20; j++)
    {
    safef(tempStr, sizeof(tempStr), "%c", aaAlphabet[j]);
    stampDataPtr = getStampData(tempStr);

    xPosition = xPosition + stampWidth + stampWidth/8;
    setPbStampPict(stampPictPtr, stampDataPtr, xPosition, yPosition, stampWidth, stampHeight);
    drawPbStamp(stampDataPtr, stampPictPtr);
    drawXScale(stampDataPtr, stampPictPtr, 10);
    safef(valStr2, sizeof(valStr2), "%c", aaAlphabet[j]);
    markStamp(stampDataPtr, stampPictPtr, 0.0, valStr2, tx, ty);
    pbStampFree(&stampDataPtr);
    }
*/
}
void markResStdvStamp(struct pbStamp *pbStampPtr, struct pbStampPict *stampPictPtr,
 		  int iTarget, double yValueIn, double tx[], double ty[], 
		  double avg[], double stddev[])
/* mark the AA residual stddev stamp */
{
int ix, iy; 
double txmin, tymin, txmax, tymax;
double yValue, yPlotValue;
int len;
int xx,  yy;
double pctLow, pctHi;
char cond_str[255];
char *answer;
char aaChar;
   
len	= pbStampPtr->len; 
txmin	= pbStampPtr->xmin;
txmax	= pbStampPtr->xmax;

/* force fit for the stddev stamp plot */
tymin	= -4.0;
tymax	=  4.0;
   
ix	= stampPictPtr->xOrig;
iy	= stampPictPtr->yOrig;

aaChar = aaAlphabet[iTarget];
safef(cond_str, sizeof(cond_str), "AA='%c'", aaChar);
answer = sqlGetField(database, "pbAnomLimit", "pctLow", cond_str);
pctLow    = (double)(atof(answer));
answer = sqlGetField(database, "pbAnomLimit", "pctHi", cond_str);
pctHi    = (double)(atof(answer));

yScale = (double)(120)/8.0;
calStampXY(stampPictPtr, (txmax-txmin)/2.0, tymax, &xx, &yy);
  
yValue = (yValueIn - avg[iTarget])/stddev[iTarget];
if (yValue > tymax)
    {
    yPlotValue = tymax;
    }
else
    {
    if (yValue < tymin)
        {
        yPlotValue = tymin;
        }
    else
        {
        yPlotValue = yValue;
        }
    }
    
if (yValueIn > pctHi)
    {
    vLine(tx[iTarget]+0.4, 0.0, yPlotValue, 3, abnormalColor);
    }
else
    {
    if (yValueIn <= pctLow)
  	{
	vLine(tx[iTarget]+0.4, 0.0+yPlotValue, -yPlotValue, 3, abnormalColor);
	}
    else
  	{
	/* normal range */
	if ((yValueIn - avg[iTarget]) >= 0.0)
    	    {
    	    vLine(tx[iTarget]+0.4, 0.0, yPlotValue, 2, normalColor);
    	    }
	else
    	    {
    	    vLine(tx[iTarget]+0.4, 0.0+yPlotValue, -yPlotValue, 2, normalColor);
    	    }
	}
    }
}
int main(int argc, char *argv[])
{
char *skippedKgId;
char *lastValidKgId;
    
struct sqlConnection *conn2, *conn3;
struct sqlResult *sr2;
char query2[256];
char **row2;
    
char *proteinID;
FILE   *o3, *o7;
char *name, *chrom, *strand, *txStart, *txEnd, *cdsStart, *cdsEnd,
     *exonCount, *exonStarts, *exonEnds;

char *alignID;

char *chp;
int  i, j;

int  isDuplicate;
    
char *genomeDBname;
char *proteinDataDate;
char proteinsDB[40];
char spDB[40];
char *acc;

#define MAX_EXON 1000
int exStart[MAX_EXON], exEnd[MAX_EXON];
int exCount;

int aaStart[MAX_EXON], aaEnd[MAX_EXON];
    
char *sp, *ep;

int  aalen;
int  cdsS, cdsE;
int  eS, eE;
 
if (argc != 3) usage();
    
proteinDataDate = argv[1];
genomeDBname    = argv[2];
  
safef(spDB, sizeof(spDB), "sp%s", proteinDataDate);
safef(proteinsDB, sizeof(proteinsDB), "proteins%s", proteinDataDate);
 
o3 = fopen("j.dat", "w");
o7 = fopen("jj.dat", "w");

conn2= hAllocConn(genomeDBname);
conn3= hAllocConn(genomeDBname);
    
inf  = mustOpen("sorted.lis", "r");

strcpy(oldInfo, "");

skippedKgId   = cloneString("");
lastValidKgId = cloneString("");

isDuplicate   = 0;
oldMrnaStr    = cloneString("");
oldAlignStr   = cloneString("");
oldProteinStr = cloneString("");

mrnaStr       = cloneString("");
proteinStr    = cloneString("");
alignStr      = cloneString("");

while (fgets(line_in, 10000, inf) != NULL)
    {
    strcpy(line, line_in);

    chp = strstr(line, "\t");	/* chrom */
    chp ++;

    chp = strstr(chp, "\t");	/* cds block start position */
    chp ++;

    chp = strstr(chp, "\t");	/* cds block end   position */
    *chp = '\0';
    chp++;
    strcpy(newInfo, line);

    if (sameString(oldInfo, newInfo))
	{
	isDuplicate = 1;
	}
    else
	{
	/* remember previous record as old only if it is not a duplicate */
	if (!isDuplicate)
	    {
	    oldMrnaStr 	  = mrnaStr;
	    oldProteinStr = proteinStr;
	    oldAlignStr	  = alignStr;
	    }
	strcpy(oldInfo, newInfo);
	isDuplicate = 0;
	}

    chp = strstr(chp, "\t");	/* priority score */
    chp ++;
		
    chp = strstr(chp, "\t");	/* mRNA transcription length */ 
    chp ++;
		
    chp = strstr(chp, "\t");	/* mRNA date */
    chp ++;
	
    mrnaStr = chp;	
    chp = strstr(chp, "\t");	/* mRNA ID */
    *chp = '\0';
    chp ++;
    mrnaStr = cloneString(mrnaStr);

    proteinStr = chp;	
    chp = strstr(chp, "\t");	/* protein ID */
    *chp = '\0';
    chp ++;
    proteinStr = cloneString(proteinStr);

    alignID = chp;

    /* get rid of "end-of-line" character at the end of the string */
    alignStr = trimSpaces(alignID);

    if (isDuplicate)
	{
	/* only put out records for valid KG entries */
	if (!sameString(oldMrnaStr, skippedKgId) || sameString(oldMrnaStr, lastValidKgId))
	    {
	    fprintf(o7, "%s\t%s\t%s\t%s\n", oldMrnaStr, oldProteinStr, mrnaStr, proteinStr);
	    }
	}
    else
	{
	safef(query2, sizeof(query2), "select * from %sTemp.knownGene0 where alignID='%s';", genomeDBname, alignID);
	sr2 = sqlMustGetResult(conn2, query2);
    	row2 = sqlNextRow(sr2);
    	while (row2 != NULL)
	    {
 	    name 	= row2[0];
	    chrom 	= row2[1];
	    strand	= row2[2];
 	    txStart 	= row2[3];
	    txEnd       = row2[4];
	    cdsStart    = row2[5]; 
	    cdsEnd	= row2[6];
	    exonCount   = row2[7]; 
	    exonStarts  = row2[8]; 
	    exonEnds    = row2[9];	

	    proteinID = row2[10];
	    alignID   = row2[11];

	    sscanf(exonCount, "%d", &exCount);
	    sp = cloneString(exonStarts);
	    ep = cloneString(exonEnds);
	
            sscanf(cdsStart, "%d", &cdsS);
            sscanf(cdsEnd, "%d", &cdsE);

	    aalen = 0;
	    j=0;
	    for (i=0; i<exCount; i++)
		{
		chp = strstr(sp, ",");
		*chp = '\0';
		sscanf(sp, "%d", &(exStart[i]));
		chp++;
		sp = chp;

		chp = strstr(ep, ",");
		*chp = '\0';
		sscanf(ep, "%d", &(exEnd[i]));
	
		eS = exStart[i];
		eE = exEnd[i];
		
		if (cdsS > eS)
		    {
		    eS = cdsS;
		    }
		if (cdsE < eE)
		    {
		    eE = cdsE;
		    }
		if (eS > eE) 
		    {
		    eS = 0;
		    eE = 0;
		    }
	        if (eS != eE)
		    {
		    aaStart[j] = aalen;
		    aaEnd[j] = aaStart[j] + (eE- eS +1)/3 -1;
		    aalen = aalen + (eE- eS +1)/3;
			
		    j++;
		    }
		
		chp++;
		ep = chp;
		}
		
	    cdsLen = aalen;

            safef(cond_str, sizeof(cond_str), "val='%s'", proteinID);
            acc = sqlGetField(spDB, "displayId", "acc", cond_str);

            safef(cond_str, sizeof(cond_str), "acc='%s'", acc);
            aaStr=sqlGetField(spDB, "protein", "val", cond_str);
    	    aaLen = strlen(aaStr);

            if ((cdsLen >  50) || ((cdsLen * 100)/aaLen > 50))
		{
		fprintf(o3,"%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
 		    	name,
			chrom,
		        strand,
 	    		txStart,
	    		txEnd,
	 	   	cdsStart,
	    		cdsEnd,
		    	exonCount,
		    	exonStarts,
	    		exonEnds,
			
			proteinID,
			alignID);
		lastValidKgId = cloneString(name);
		}
	    else
		{
		printf("skipping %s %d \n", name, cdsLen);
		skippedKgId = cloneString(name);
		} 
	    row2 = sqlNextRow(sr2);
	    }
	sqlFreeResult(&sr2);
	}
    }
hFreeConn(&conn2);
hFreeConn(&conn3);
fclose(o3);
fclose(o7);
    
mustSystem("cat j.dat|sort|uniq  >knownGene.tab");
mustSystem("cat jj.dat|sort|uniq >duplicate.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
return(0);
}
Example #14
0
void doSamT02(char *proteinId, char *database)
/* display the UCSC SAM-T02 Protein Structure Analysis and Prediction section */ 
{
char *itemName = NULL;
char query2[256];
struct sqlResult *sr2;
char **row2;
struct sqlConnection *conn, *conn2 = hAllocConn(database);
char condStr[256];
char *chp;

char *samSubDir;
char *samHttpStr0 = NULL; /* SAM server*/
char *samHttpStr  = NULL; /* UCSC GB site */
int  homologCount;

char *homologID;
char *SCOPdomain;
char *chain;
char *bestEValStr = NULL;
float eValue, bestEVal;

char goodSCOPdomain[40];
int  first = 1;

/* return if this genome does not have SAM protein analysis results */
/* defensive logic to guard against the situation that the binary program is pushed, but the data tables are not */
conn = sqlConnect(database);
if (!(sqlTableExists(conn, "samSubdir") && sqlTableExists(conn, "protHomolog")))
    {
    return;
    }
sqlDisconnect(&conn);
if (!sameWord(database, "sacCer1"))
    {
    return;
    }
    
itemName = proteinId;    
if (sameWord(database, "sacCer1"))
    {
    samHttpStr0 = strdup("http://www.soe.ucsc.edu/research/compbio/yeast-protein-predictions");
    samHttpStr  = strdup("../goldenPath/sacCer1/sam");
    
    /* SAM analysis of SGD proteins uses SGD ID, not Swiss-Prot AC */
    itemName = getSgdId(proteinId, database);
    }
    
if (itemName == NULL) return;

sqlSafefFrag(condStr, sizeof condStr, "proteinId='%s'", itemName);
samSubDir = sqlGetField(database, "samSubdir", "subdir", condStr);
if (samSubDir == NULL) return;

hPrintf("<B>UCSC ");
hPrintf("<A HREF=\"http://www.soe.ucsc.edu/research/compbio/SAM_T02/sam-t02-faq.html\"");
hPrintf(" TARGET=_blank>SAM-T02</A>\n");
hPrintf(" Protein Structure Analysis and Prediction on %s", proteinId);
if (!sameWord(proteinId, itemName)) hPrintf(" (aka %s)", itemName);
hPrintf("</B><BR>\n");

hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>Multiple Alignment (sequence logo):</B> \n");
hPrintf("<A HREF=\"%s/%s/%s/%s.t2k.w0.5-logo.pdf\"", samHttpStr, samSubDir, itemName, itemName);
hPrintf(" TARGET=_blank>%s</A> (pdf)<BR>\n", itemName);

hPrintf("<B>&nbsp;&nbsp;&nbsp;&nbsp;Secondary Structure Predictions:</B> \n");
hPrintf("<A HREF=\"%s/%s/%s/%s.t2k.dssp-ehl2-logo.pdf\"", samHttpStr, samSubDir, itemName, itemName);
hPrintf(" TARGET=_blank>%s</A> (pdf)<BR>\n", itemName);

hPrintf("<B>&nbsp;&nbsp;&nbsp;&nbsp;Close Homologs:</B> \n");

conn2= hAllocConn(database);
sqlSafef(query2, sizeof query2, 
    "select homologID,eValue,SCOPdomain,chain from %s.protHomolog where proteinID='%s' and evalue <= 0.01 order by evalue;",
    database, itemName);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);

homologCount = 0;
strcpy(goodSCOPdomain, "dummy");
bestEVal = 100;
while (row2 != NULL)
    {
    homologID = row2[0];
    sscanf(row2[1], "%e", &eValue);
    if (first)
	{
	bestEVal = eValue;
	bestEValStr = strdup(row2[1]);
	}

    SCOPdomain = row2[2];
    chp = SCOPdomain+strlen(SCOPdomain)-1;
    while (*chp != '.') chp--;
    *chp = '\0';
    chain = row2[3];
    
    if (eValue <= 1.0e-10) 
	{
	strcpy(goodSCOPdomain, SCOPdomain);
	}
    else
	{
	if (strcmp(goodSCOPdomain,SCOPdomain) != 0)
	    {
	    goto skip;
	    }
	else
	    {
	    if (eValue > 0.1) goto skip;
	    }
	}
    if (first)
    	{
	first = 0;
	}
    else
        {
        printf(", ");
	}
					   
    hPrintf("\n<A HREF=\"http://www.rcsb.org/pdb/cgi/explore.cgi?job=graphics&pdbId=%s", homologID);
    if (strlen(chain) >= 1) 
	{
	hPrintf("\"TARGET=_blank>%s</A>(chain %s)\n", homologID, chain);
	}
    else
	{
	hPrintf("\"TARGET=_blank>%s</A>\n", homologID);
	}
    homologCount++;
	
    skip:
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);
hFreeConn(&conn2);

if (homologCount == 0)
    {
    hPrintf("None\n");
    }

hPrintf("<BR>&nbsp;&nbsp;&nbsp;&nbsp;<B>More Details:</B> \n");
hPrintf("<A HREF=\"%s/%s/%s/summary.html\"", samHttpStr0, samSubDir, itemName);
hPrintf("\" TARGET=_blank>%s</A><BR>\n", itemName);

if (homologCount > 0)
    {
    hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>3D Structure Prediction: </B> \n");
    hPrintf("<A HREF=\"%s/%s/%s/%s.t2k.undertaker-align.pdb.gz\"", 
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("\" TARGET=_blank>%s</A> (PDB format, gzipped)<BR>\n", itemName);

    hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>3D Pictures of the Best Model");
    hPrintf(" (E Value: %s):</B><BR>\n", bestEValStr);fflush(stdout);
    
    hPrintf("<TABLE><TR>\n");
    hPrintf("<TD>&nbsp;&nbsp;</TD>");
    hPrintf("<TD><IMG SRC=\"%s/%s/%s/%s.view1_200.jpg\"></A></TD>\n", 
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("<TD><IMG SRC=\"%s/%s/%s/%s.view2_200.jpg\"></A></TD>\n", 
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("<TD><IMG SRC=\"%s/%s/%s/%s.view3_200.jpg\"></A></TD>\n", 
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("</TR>\n");
    
    hPrintf("<TR>");
    hPrintf("<TD>&nbsp;&nbsp;</TD>");
    hPrintf("<TD ALIGN=CENTER>Front</TD>");
    hPrintf("<TD ALIGN=CENTER>Top</TD>");
    hPrintf("<TD ALIGN=CENTER>Side</TD>");
    hPrintf("</TR>\n");
    
    hPrintf("<TR>");
    hPrintf("<TD>&nbsp;&nbsp;</TD>");
    hPrintf("<TD ALIGN=CENTER><A HREF=\"%s/%s/%s/%s.view1_500.jpg\">500x500</A></TD>\n", 
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("<TD ALIGN=CENTER><A HREF=\"%s/%s/%s/%s.view2_500.jpg\">500x500</A></TD>\n",  
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("<TD ALIGN=CENTER><A HREF=\"%s/%s/%s/%s.view3_500.jpg\">500x500</A></TD>\n", 
    	    samHttpStr, samSubDir, itemName, itemName);
    hPrintf("</TR>\n");
    hPrintf("</TABLE>\n");
    }
else
    {
    hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>3D Structure Prediction: </B> \n");
    hPrintf("No models presented, because none has E-value <= 0.01.<BR>");
    }
hPrintf("<BR>");
}
int main(int argc, char *argv[])
{
    struct sqlConnection *conn, *conn2;
    char query2[256];
    struct sqlResult *sr2;
    char **row2;
    char cond_str[255];
    char *proteinDatabaseName;
    FILE *o1, *o2, *o3;
    FILE *fh[23];
    char temp_str[1000];;
    char *accession;
    char *aaSeq;
    char *chp;
    int i, j, len;
    int ihi, ilow;
    char *answer;
    char *protDisplayId;
    int aaResCnt[30];
    char aaAlphabet[30];
    int aaResFound;
    float fvalue1, fvalue2;
    float p1, p2;
    int icnt, jcnt;
    char *taxon;
    char *database;
    int sortedCnt;

    if (argc != 4) usage();

    strcpy(aaAlphabet, "WCMHYNFIDQKRTVPGEASLXZB");

    proteinDatabaseName = argv[1];
    taxon = argv[2];
    database = argv[3];

    o2 = mustOpen("pbResAvgStd.tab", "w");

    for (i=0; i<20; i++)
    {
        safef(temp_str, sizeof(temp_str), "%c.txt", aaAlphabet[i]);
        fh[i] = mustOpen(temp_str, "w");
    }

    conn  = hAllocConn(hDefaultDb());
    conn2 = hAllocConn(hDefaultDb());

    safef(query2, sizeof(query2), "select proteinID from %s.knownGene;", database);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    icnt = 0;
    jcnt = 0;

    for (j=0; j<MAXRES; j++)
    {
        sumJ[j] = 0;
    }

    while (row2 != NULL)
    {
        protDisplayId = row2[0];
        safef(cond_str, sizeof(cond_str),  "val='%s'", protDisplayId);
        accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str);

        if (accession == NULL)
        {
            safef(cond_str, sizeof(cond_str),  "acc='%s'", protDisplayId);
            accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str);
            if (accession == NULL)
            {
                verbose(2, "'%s' not found.\n", protDisplayId);
                goto skip;
            }
        }

        safef(cond_str, sizeof(cond_str),  "accession='%s'", accession);
        answer = sqlGetField("proteins040115", "spXref2", "biodatabaseID", cond_str);
        if (answer == NULL)
        {
            /* this protein might be a variant splice protein, and then it won't be in spXref2 */
            goto skip;
        }
        if (answer[0] != '1')
        {
            /* printf("%s not in SWISS-PROT\n", protDisplayId);fflush(stdout); */
            goto skip;
        }

        safef(cond_str, sizeof(cond_str),  "acc='%s'", accession);
        aaSeq = sqlGetField(proteinDatabaseName, "protein", "val", cond_str);
        if (aaSeq == NULL)
        {
            printf("Can't find peptide sequence for %s, exiting ...\n", protDisplayId);
            fflush(stdout);
            exit(1);
        }

        len  = strlen(aaSeq);
        if (len < 100) goto skip;

        lenDouble = (double)len;

        for (j=0; j<MAXRES; j++)
        {
            aaResCnt[j] = 0;
        }

        chp = aaSeq;
        for (i=0; i<len; i++)
        {
            aaResFound = 0;
            for (j=0; j<MAXRES; j++)
            {
                if (*chp == aaAlphabet[j])
                {
                    aaResFound = 1;
                    aaResCnt[j] ++;
                }
            }
            if (!aaResFound)
            {
                fprintf(stderr, "%c %d not a valid AA residue.\n", *chp, *chp);
            }
            chp++;
        }

        for (j=0; j<MAXRES; j++)
        {
            freq[icnt][j] = (double)aaResCnt[j]/lenDouble;
            sumJ[j] = sumJ[j] + freq[icnt][j];
        }

        for (j=0; j<20; j++)
        {
            fprintf(fh[j], "%15.7f\t%s\n", freq[icnt][j], accession);
            fflush(fh[j]);
        }
        icnt++;
        if (icnt >= MAXN)
            errAbort("Too many proteins - please set MAXN to be more than %d\n", MAXN);

skip:
        row2 = sqlNextRow(sr2);
    }

    recordCnt = icnt;
    recordCntDouble = (double)recordCnt;

    for (j=0; j<20; j++)
    {
        carefulClose(&(fh[j]));
    }

    sqlFreeResult(&sr2);
    hFreeConn(&conn);
    hFreeConn(&conn2);

    for (j=0; j<MAXRES; j++)
    {
        avg[j] = sumJ[j]/recordCntDouble;
    }

    for (j=0; j<20; j++)
    {
        sum = 0.0;
        for (i=0; i<recordCnt; i++)
        {
            sum = sum + (freq[i][j] - avg[j]) * (freq[i][j] - avg[j]);
        }
        sigma[j] = sqrt(sum/(double)(recordCnt-1));
        fprintf(o2, "%c\t%f\t%f\n", aaAlphabet[j], avg[j], sigma[j]);
    }

    carefulClose(&o2);

    o1 = mustOpen("pbAnomLimit.tab", "w");
    for (j=0; j<20; j++)
    {
        safef(temp_str, sizeof(temp_str), "cat %c.txt|sort|uniq > %c.srt",  aaAlphabet[j], aaAlphabet[j]);
        mustSystem(temp_str);

        /* figure out how many unique entries */
        safef(temp_str, sizeof(temp_str), "wc %c.srt > %c.tmp",  aaAlphabet[j], aaAlphabet[j]);
        mustSystem(temp_str);
        safef(temp_str, sizeof(temp_str), "%c.tmp",  aaAlphabet[j]);
        o3 = mustOpen(temp_str, "r");
        mustGetLine(o3, temp_str, 1000);
        chp = temp_str;
        while (*chp == ' ') chp++;
        while (*chp != ' ') chp++;
        *chp = '\0';
        sscanf(temp_str, "%d", &sortedCnt);
        safef(temp_str, sizeof(temp_str), "rm %c.tmp", aaAlphabet[j]);
        mustSystem(temp_str);

        /* cal hi and low cutoff threshold */
        ilow = (int)((float)sortedCnt * 0.025);
        ihi  = (int)((float)sortedCnt * 0.975);

        safef(temp_str, sizeof(temp_str), "%c.srt",  aaAlphabet[j]);
        o2 = mustOpen(temp_str, "r");
        i=0;
        for (i=0; i<ilow; i++)
        {
            mustGetLine(o2, temp_str, 1000);
        }
        sscanf(temp_str, "%f", &fvalue1);

        mustGetLine(o2, temp_str, 1000);
        sscanf(temp_str, "%f", &fvalue2);
        p1 = (fvalue1 + fvalue2)/2.0;

        for (i=ilow+1; i<ihi; i++)
        {
            mustGetLine(o2, temp_str, 1000);
        }
        sscanf(temp_str, "%f", &fvalue1);

        mustGetLine(o2, temp_str, 1000);
        sscanf(temp_str, "%f", &fvalue2);
        p2 = (fvalue1 + fvalue2)/2.0;
        carefulClose(&o2);

        fprintf(o1, "%c\t%f\t%f\n", aaAlphabet[j], p1, p2);
        fflush(stdout);

        for (i=0; i<recordCnt; i++)
        {
            measure[i] = freq[i][j];
        }
        safef(temp_str, sizeof(temp_str), "pbAaDist%c.tab", aaAlphabet[j]);
        calDist(measure,  recordCnt,    51,     0.0, 0.005, temp_str);
    }

    carefulClose(&o1);

    return(0);
}
Example #16
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;
char cond_str[255];
char *proteinDatabaseName;	/* example: sp031112 */
char *protDbName;		/* example: proteins031112 */
char emptyStr[1] = {""};
FILE *o2;
char *accession;
char *aaSeq;
char *chp;
int i, j, len;
int cCnt;
char *answer, *answer2;
double hydroSum;
char *protDisplayId;
int aaResCnt[30];
double aaResCntDouble[30];
char aaAlphabet[30];
int aaResFound;
int totalResCnt;
int molWtCnt;
double molWt[100000];
int pIcnt;
double pI[100000];

double aa_hydro[256];
int icnt, jExon, pcnt, ipcnt = 0;
double aaLenDouble[100000];
double avgHydro[100000];
double cCountDouble[100000];
double exonCountDouble[100000];
double interProCountDouble[100000];
char *taxon;
char *database;
char *exonCnt;
int interProCount;
char *kgId;

if (argc != 5) usage();

strcpy(aaAlphabet, "WCMHYNFIDQKRTVPGEASLXZB");

/* Ala:  1.800  Arg: -4.500  Asn: -3.500  Asp: -3.500  Cys:  2.500  Gln: -3.500 */
aa_hydro['A'] =  1.800;
aa_hydro['R'] = -4.500;
aa_hydro['N'] = -3.500;
aa_hydro['D'] = -3.500;
aa_hydro['C'] =  2.500;
aa_hydro['Q'] = -3.500;

/* Glu: -3.500  Gly: -0.400  His: -3.200  Ile:  4.500  Leu:  3.800  Lys: -3.900 */
aa_hydro['E'] = -3.500;
aa_hydro['G'] = -0.400;
aa_hydro['H'] = -3.200;
aa_hydro['I'] =  4.500;
aa_hydro['L'] =  3.800;
aa_hydro['K'] = -3.900;

/* Met:  1.900  Phe:  2.800  Pro: -1.600  Ser: -0.800  Thr: -0.700  Trp: -0.900 */ 
aa_hydro['M'] =  1.900;
aa_hydro['F'] =  2.800;
aa_hydro['P'] = -1.600;
aa_hydro['S'] = -0.800;
aa_hydro['T'] = -0.700;
aa_hydro['W'] = -0.900;

/* Tyr: -1.300  Val:  4.200  Asx: -3.500  Glx: -3.500  Xaa: -0.490 */
aa_hydro['Y'] = -1.300;
aa_hydro['V'] =  4.200;

proteinDatabaseName = argv[1];
protDbName 	    = argv[2];
taxon 	 	    = argv[3];
database 	    = argv[4];

o2 = mustOpen("pepResDist.tab", "w");

conn  = hAllocConn(database);
conn2 = hAllocConn(database);

for (j=0; j<23; j++)
    {
    aaResCnt[j] = 0;
    }

icnt = jExon = pcnt = 0;
pIcnt = 0;
molWtCnt = 0;

sqlSafef(query2, sizeof(query2), "select acc from %s.accToTaxon where taxon=%s;", proteinDatabaseName, taxon);
sr2  = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);

while (row2 != NULL)
    {
    accession = row2[0];   

    sqlSafefFrag(cond_str, sizeof(cond_str), "acc='%s'", accession);
    protDisplayId = sqlGetField(proteinDatabaseName, "displayId", "val", cond_str);
    
    sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s'", protDisplayId);
    answer = sqlGetField(database, "knownGene", "name", cond_str);

    /* count InterPro domains */
    if (answer != NULL)
	{
    	sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", accession);
    	answer2 = sqlGetField(protDbName, "swInterPro", "count(*)", cond_str);
	if (answer2 != NULL)
	    {
	    interProCount = interProCount + atoi(answer2);
	    interProCountDouble[ipcnt] = (double)(atoi(answer2));
	    ipcnt++;
	    }
	else
	    {
	    printf("%s is not in  InterPro DB.\n", accession);fflush(stdout);
	    }
	}
    
    /* count exons, using coding exons from kgProtMap2 (KG-III) table */
    sqlSafefFrag(cond_str, sizeof(cond_str), "spID='%s'", accession);
    kgId = sqlGetField(database, "kgXref", "kgID", cond_str);
    sqlSafefFrag(cond_str, sizeof(cond_str), "qName='%s'", kgId);
    answer2 = sqlGetField(database, "kgProtMap2", "blockCount", cond_str);

    if (answer2 != NULL)
	{
	exonCnt = strdup(answer2);
	if (atoi(exonCnt) == 0)
	    {
	    errAbort("%s %s has 0 block count\n", accession, protDisplayId);
	    }
	exonCountDouble[jExon] = (double)(atoi(exonCnt));
	jExon++;
	}
    else
	{
	exonCnt = emptyStr;
	}
    
    /* process Mol Wt */
    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", accession);
    answer2 = sqlGetField(database, "pepMwAa", "molWeight", cond_str);
    if (answer2 != NULL)
	{
	molWt[molWtCnt] = (double)(atof(answer2));
	molWtCnt++;
	}
    
    /* process pI */
    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", accession);
    answer2 = sqlGetField(database, "pepPi", "pI", cond_str);
    if (answer2 != NULL)
	{
	pI[pIcnt] = (double)(atof(answer2));
	pIcnt++;
	}
     
    sqlSafefFrag(cond_str, sizeof(cond_str), "acc='%s'", accession);
    aaSeq = sqlGetField(proteinDatabaseName, "protein", "val", cond_str);
    if (aaSeq == NULL)
	{
	errAbort("%s does not have protein sequence data in %s, aborting ...\n", accession, 
		 proteinDatabaseName);
	}

    len  = strlen(aaSeq);

    chp = aaSeq;
    for (i=0; i<len; i++)
	{
	aaResFound = 0;
	for (j=0; j<23; j++)
	    {
	    if (*chp == aaAlphabet[j])
		{
		aaResFound = 1;
		aaResCnt[j] ++;
		}
	    }
	if (!aaResFound)
	    {
	    warn("%c %d not a valid AA residue in %s:\n%s", *chp, *chp, accession, aaSeq);
	    }
	chp++;
	}
    
    /* calculate hydrophobicity */
    chp  = aaSeq;
    cCnt = 0;
    hydroSum = 0;
    for (i=0; i<len; i++)
	{
        hydroSum = hydroSum + aa_hydro[(int)(*chp)];

	/* count Cysteines */
	if ((*chp == 'C') || (*chp == 'c'))
	    {
	    cCnt ++;
	    }
	chp++;
	}

    aaLenDouble[icnt]  = len;
    cCountDouble[icnt] = (double)cCnt;
    avgHydro[icnt] = hydroSum/(double)len; 
    icnt++;
    row2 = sqlNextRow(sr2);
    }

totalResCnt = 0;
for (i=0; i<23; i++)
    {
    totalResCnt = totalResCnt + aaResCnt[i];
    }

/* write out residue count distribution */
for (i=0; i<20; i++)
    {
    aaResCntDouble[i] = ((double)aaResCnt[i])/((double)totalResCnt);
    fprintf(o2, "%d\t%f\n", i+1, (float)aaResCntDouble[i]);
    }
fprintf(o2, "%d\t%f\n", i+1, 0.0);
carefulClose(&o2);

/* calculate and write out various distributions */
calDist(molWt,  	 molWtCnt, 21, 0.0, 10000.0,"pepMolWtDist.tab");
calDist(pI,  	         pIcnt,    61,     3.0, 0.2, 	"pepPiDist.tab");
calDist(avgHydro,     	  icnt,    41,    -2.0, 0.1, 	"pepHydroDist.tab");
calDist(cCountDouble, 	  icnt,    51,     0.0, 1.0, 	"pepCCntDist.tab");
calDist(exonCountDouble, jExon,    31,     0.0, 1.0, 	"pepExonCntDist.tab");
calDist(interProCountDouble,  ipcnt,    16,     0.0, 1.0, 	"pepIPCntDist.tab");

sqlFreeResult(&sr2);
hFreeConn(&conn);
hFreeConn(&conn2);
return(0);
}
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2, *conn3;
char query[256], query2[256], query3[256];
struct sqlResult *sr, *sr2, *sr3;
char **row, **row2, **row3;

char *chp;
FILE *o1, *o2;

char *locusID;	/* LocusLink ID */
char *gbAC;		/* GenBank accession.version */
char *locusID2;	/* LocusLink ID */
char *refAC;	/* Refseq accession.version */
char *dbName; 
char cond_str[200];
char *kgID;
char *mapID;
char *desc;

if (argc != 2) usage();
dbName = argv[1];

conn = hAllocConn(dbName);
conn2= hAllocConn(dbName);
conn3= hAllocConn(dbName);

o1 = fopen("j.dat",  "w");
o2 = fopen("jj.dat", "w");
    
sprintf(query2,"select * from %sTemp.locus2Ref0;", dbName);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    locusID2 	= row2[0];
    refAC 	= row2[1];
    
    sprintf(query, "select * from %sTemp.locus2Acc0 where locusID=%s and seqType='m';", 
		   dbName, locusID2);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    while (row != NULL)
    	{
	locusID 	= row[0];
	gbAC 		= row[1];
	
	chp = strstr(gbAC, ".");
	if (chp != NULL) *chp = '\0';
	chp = strstr(refAC, ".");
	if (chp != NULL) *chp = '\0';
    
	sprintf(cond_str, "name='%s'", gbAC);
        kgID = sqlGetField(dbName, "knownGene", "name", cond_str);
	if (kgID != NULL)
	    {
            sprintf(query3, "select * from %sTemp.keggList where locusID = '%s'", dbName, locusID);
            sr3 = sqlGetResult(conn3, query3);
            while ((row3 = sqlNextRow(sr3)) != NULL)
                {
                mapID   = row3[1];
		desc    = row3[2];
		fprintf(o1, "%s\t%s\t%s\n", kgID, locusID, mapID);
		fprintf(o2, "%s\t%s\n", mapID, desc);
		row3 = sqlNextRow(sr3);
                }
            sqlFreeResult(&sr3);
	    }
	row = sqlNextRow(sr);
	}
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

fclose(o1);
fclose(o2);
hFreeConn(&conn);
hFreeConn(&conn2);

mustSystem("cat j.dat|sort|uniq >keggPathway.tab");
mustSystem("cat jj.dat|sort|uniq >keggMapDesc.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
return(0);
}
Example #18
0
int main(int argc, char *argv[])
    {
    struct sqlConnection *conn, *conn2, *conn3;
    char query2[256];
    struct sqlResult *sr2;
    char **row2;
    char cond_str[256];  
  
    char *protDbDate;
    char *kgID;
    char *protDisplayId;
    
    FILE *o1;
    char *kgTempDb;
    char spDb[255],proteinsDb[255];
    char *ro_DB;
    char *refSeqName;
    char *hugoID;
    char *protAcc;	/* protein Accession number from NCBI */
    char *answer;
    char *emptyStr;
    char *parSpID;
    
    int leg;		/* marker for debugging */
    char *spID, *kgProteinID, *geneSymbol, *refseqID, *desc;

    if (argc != 4) usage();
    kgTempDb  = cloneString(argv[1]);
    protDbDate = cloneString(argv[2]);
    ro_DB = cloneString(argv[3]);
    
    safef(spDb, sizeof(spDb), "sp%s",  protDbDate);
    safef(proteinsDb, sizeof(proteinsDb), "proteins%s", protDbDate);

    conn = hAllocConn(ro_DB);
    conn2= hAllocConn(ro_DB);
    conn3= hAllocConn(ro_DB);

    o1 = mustOpen("j.dat", "w");

    emptyStr = strdup("");

    sqlSafef(query2, sizeof query2, "select name, proteinID from %s.knownGene;", kgTempDb);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    while (row2 != NULL)
	{
	kgID 		= row2[0];
	kgProteinID	= row2[1];
	
	refseqID 	= strdup("");
	geneSymbol 	= strdup("");
	desc		= strdup("");
	protAcc		= strdup("");

        sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", kgProteinID);
        spID = sqlGetField(proteinsDb, "spXref3", "accession", cond_str);
    
        /* process variant splice proteins */
	if (spID == NULL)
	    {
            sqlSafefFrag(cond_str, sizeof cond_str, "varAcc='%s'", kgProteinID);
	    spID = kgProteinID;
	    
            parSpID = sqlGetField(proteinsDb, "splicProt", "parAcc", cond_str);
	    if (parSpID != NULL)
	    	{
        	sqlSafefFrag(cond_str, sizeof cond_str, "accession='%s'", parSpID);
        	protDisplayId = sqlGetField(proteinsDb, "spXref3", "displayID", cond_str);
		}
	    else
	    	{
		fprintf(stderr, "%s not found in kgXref3 nor in varProtein.\n", kgProteinID);
		exit(1);
		}
	    }
	else
	    {
	    protDisplayId = kgProteinID;	
	    }
	/* use description for the protein as default, replace it with HUGO desc if available. */
	sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", protDisplayId);
        desc  = sqlGetField(proteinsDb, "spXref3", "description", cond_str);
        
        if (strstr(kgID, "NM_") != NULL)
            {
	    leg = 1;
            /* special processing for RefSeq DNA based genes */
            sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", kgID);
            refSeqName = sqlGetField(ro_DB, "refLink", "name", cond_str);
            if (refSeqName != NULL)
                {
                geneSymbol = cloneString(refSeqName);
		refseqID   = kgID;
            	sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", kgID);
            	desc = sqlGetField(ro_DB, "refLink", "product", cond_str);
		
		sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc='%s'", refseqID);
        	answer = sqlGetField(ro_DB, "refLink", "protAcc", cond_str);
        	if (answer != NULL)
            	    {
	    	    protAcc = strdup(answer);
	    	    }
                }
            }
        else
            {
            sqlSafefFrag(cond_str, sizeof cond_str, "displayID = '%s'", protDisplayId);
            hugoID = sqlGetField(proteinsDb, "spXref3", "hugoSymbol", cond_str);
            if (!((hugoID == NULL) || (*hugoID == '\0')) )
                {
		leg = 21;
                geneSymbol = cloneString(hugoID);

            	sqlSafefFrag(cond_str, sizeof cond_str, "displayID = '%s'", protDisplayId);
            	desc = sqlGetField(proteinsDb, "spXref3", "hugoDesc", cond_str);
		if (desc == NULL) 
		    {
		    printf("%s/%s don't have hugo desc ...\n", kgProteinID, protDisplayId);
		    fflush(stdout);
		    }
		}

	    refseqID = emptyStr;
	    protAcc  = emptyStr;
            sqlSafefFrag(cond_str, sizeof cond_str, "mrna = '%s'", kgID);
            answer = sqlGetField(ro_DB, "mrnaRefseq", "refseq", cond_str);
	    if (answer != NULL) 
	    	{
		refseqID = answer;
		}
	    else
	    	{
		/*printf("%s does not have a related RefSeq.\n", kgID);fflush(stdout); */
		}
	    
	    if (strlen(geneSymbol) == 0)
		{ 
		leg = 23;
		if (strlen(refseqID) != 0)
			{
			sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", refseqID);
			answer = sqlGetField(ro_DB, "refLink", "name", cond_str);
			if (answer != NULL) 
				{
				leg = 24;
				geneSymbol = strdup(answer);
				}
			}
                }
            }

	/* fix missing fields */
	if (strlen(refseqID) == 0)
		{
		/* printf("%3d %s reseqID is empty.\n", leg, kgID); */
		}

	if (strlen(geneSymbol) == 0)
		{
		/* printf("%3d %s geneSymbol is empty.\n", leg, kgID);fflush(stdout);*/
		geneSymbol = strdup(kgID);
		}

	if (strlen(desc) == 0)
		{
		/* printf("%3d %s desc is empty.\n", leg, kgID);fflush(stdout); */
		desc = strdup("N/A");
		}
	
	fprintf(o1, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", 
		kgID, kgID, spID, protDisplayId, geneSymbol, refseqID, protAcc, desc);
	row2 = sqlNextRow(sr2);
	}

    fclose(o1);
    hFreeConn(&conn);
    hFreeConn(&conn2);
    hFreeConn(&conn3);
    mustSystem("cat j.dat|sort|uniq  >kgXref.tab");
    mustSystem("rm j.dat");
    return(0);
    }
Example #19
0
void processAlign(char *kgTempDb, char *spDb, char *alignID, int cdsCnt, FILE *outf)
{
struct sqlConnection *conn2, *conn3, *conn4;
char query2[256], query3[256];
struct sqlResult *sr2, *sr3;
char **row2, **row3;
char *score;
char *chrom;
char *protAcc;
char *mrnaID;
char *ranking;
int  protDbId;
char condStr[255];
int  i;
char *chp;
char *isCurated;

conn2= hAllocConn(kgTempDb);
conn3= hAllocConn(kgTempDb);
conn4= hAllocConn(kgTempDb);

sqlSafef(query2, sizeof(query2), "select * from %s.kgCandidate where alignID='%s'", kgTempDb, alignID);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    mrnaID = row2[0];
    chrom = row2[1];
    ranking = row2[11];
    
    /* check if it is a composite mrnaID */
    /* if yes, select from entries with both protein and mrna specified */
    if (alignID[0] == 'U') 
    	{
	chp = strstr(row2[0], "_");
	*chp = '\0';
	protAcc = row2[0];
	chp ++;
	mrnaID = chp;
    	sqlSafef(query3, sizeof(query3), 
    	      "select protAcc, score from %s.protMrnaScore where mrnaAcc='%s' and protAcc='%s'",
	      kgTempDb, mrnaID, protAcc);
	}
    else
    	{
    	sqlSafef(query3, sizeof(query3), 
    	      "select protAcc, score from %s.protMrnaScore where mrnaAcc='%s' order by score desc",
	      kgTempDb, mrnaID);
	}
	
    sr3  = sqlMustGetResult(conn3, query3);
    row3 = sqlNextRow(sr3);
	      
    while(row3 != NULL)
        {
	protAcc = row3[0];
	score   = row3[1];

	chp = strstr(protAcc, "-");
	if (chp == NULL)
	    {
            sqlSafefFrag(condStr, sizeof(condStr), "acc='%s'", protAcc);
	    isCurated = sqlGetField(spDb, "info", "isCurated", condStr);
	    if (sameWord(isCurated, "1"))
	    	{
		protDbId = 1;
		}
	    else
	    	{
		protDbId = 2;
		}
	    }
   	else
	    {
	    protDbId = 4;
	    }
	    
	fprintf(outf, "%s:", chrom);
	for (i=0; i<cdsCnt; i++) fprintf(outf, "%s", cdsBloc[i]);
	fprintf(outf, "\t%s\t%d\t%8s\t%s\t%s\t%s\n", 
		ranking, protDbId, score, mrnaID, protAcc, alignID);

	/* for composite type, process just one record */ 
        if (alignID[0] == 'U') break; 
	row3 = sqlNextRow(sr3);
	}
    sqlFreeResult(&sr3);
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);
hFreeConn(&conn2);
hFreeConn(&conn3);
hFreeConn(&conn4);
}
Example #20
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn3;
char query[256], query3[256];
struct sqlResult *sr, *sr3;
char **row, **row3;

FILE *o1, *o2;

char *locusID;	/* LocusLink ID */

char *kgTempDbName, *roDbName; 
char cond_str[200];
char *kgId;
char *mapID;
char *desc;
char *mRNA;

optionInit(&argc, argv, options);
if (argc != 3)  usage();
kgTempDbName    = argv[1];
roDbName 	= argv[2];

conn = hAllocConn(roDbName);
conn3= hAllocConn(roDbName);

o1 = fopen("j.dat",  "w");
o2 = fopen("jj.dat", "w");
    
table = optionVal("table", "knownGene");
sqlSafef(query, sizeof(query), "select name from %s.%s", roDbName, table);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
while (row != NULL)
    {
    kgId = row[0];
	
    sqlSafefFrag(cond_str, sizeof(cond_str), "kgId='%s'", kgId);
    mRNA = sqlGetField(roDbName, "kgXref", "mRNA", cond_str);
    
    sqlSafefFrag(cond_str, sizeof(cond_str), "mrna='%s'", mRNA);
    locusID = sqlGetField("entrez", "entrezMrna", "geneId", cond_str);
    
    /* look for RefSeq if not found in mRNAs */
    if (locusID == NULL)
    	{
    	sqlSafefFrag(cond_str, sizeof(cond_str), "refseq='%s'", mRNA);
    	locusID = sqlGetField("entrez", "entrezRefseq", "geneId", cond_str);
	}

    if (locusID != NULL)
	{
        sqlSafef(query3, sizeof(query3), "select * from %s.keggList where locusID = '%s'", kgTempDbName, locusID);
        sr3 = sqlGetResult(conn3, query3);
        while ((row3 = sqlNextRow(sr3)) != NULL)
            {
            mapID   = row3[1];
	    desc    = row3[2];
	    fprintf(o1, "%s\t%s\t%s\n", kgId, locusID, mapID);
	    fprintf(o2, "%s\t%s\n", mapID, desc);
	    row3 = sqlNextRow(sr3);
            }
        sqlFreeResult(&sr3);
	}
    else
        {
	/* printf("%s not found in Entrez.\n", kgId);fflush(stdout);*/
        if (differentString(table, "knownGene"))
            {
            sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", kgId);
            locusID = sqlGetField(roDbName, table, "name2", cond_str);
            sqlSafef(query3, sizeof(query3), "select * from %s.keggList where locusID = '%s'", kgTempDbName, kgId);
            sr3 = sqlGetResult(conn3, query3);
            while ((row3 = sqlNextRow(sr3)) != NULL)
                {
                mapID   = row3[1];
                desc    = row3[2];
                fprintf(o1, "%s\t%s\t%s\n", kgId, locusID, mapID);
                fprintf(o2, "%s\t%s\n", mapID, desc);
                row3 = sqlNextRow(sr3);
                }
            sqlFreeResult(&sr3);
            }
        }
    row = sqlNextRow(sr);
    }

fclose(o1);
fclose(o2);
hFreeConn(&conn);

mustSystem("cat j.dat|sort|uniq >keggPathway.tab");
mustSystem("cat jj.dat|sort|uniq >keggMapDesc.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
return(0);
}
Example #21
0
void doTracks(char *proteinID, char *mrnaID, char *aa, int *yOffp, char *psOutput)
/* draw various protein tracks */
{
int l;

char aaOrigOffsetStr[20];
int hasResFreq;
char uniProtDbName[50];
char *protDbDate;
char *chrom;
char strand;
char *kgId, *kgPep, *protPep;
char cond_str[255];
char *answer;
//int i, ll;
//char *chp1, *chp2;

g_font = mgSmallFont();
safef(pbScaleStr, sizeof(pbScaleStr), "%d", pbScale);

if (psOutput != NULL)
    {
    pbScale         = atoi(cartOptionalString(cart, "pbt.pbScaleStr"));
    }

if (cgiOptionalString("trackOffset") != NULL)
	{
	trackOrigOffset = atoi(cgiOptionalString("trackOffset")); 
	}

if (cgiOptionalString("pbScaleStr") != NULL)
	{
	pbScale  = atoi(cgiOptionalString("pbScaleStr")); 
	}

if (cgiOptionalString("pbScale") != NULL)
    {
    scaleButtonPushed = TRUE;
    if (strcmp(cgiOptionalString("pbScale"), "1/6")  == 0) pbScale = 1;
    if (strcmp(cgiOptionalString("pbScale"), "1/2")  == 0) pbScale = 3;
    if (strcmp(cgiOptionalString("pbScale"), "FULL") == 0) pbScale = 6;
    if (strcmp(cgiOptionalString("pbScale"), "DNA")  == 0) pbScale =22;
    safef(pbScaleStr, sizeof(pbScaleStr), "%d", pbScale);
    cgiMakeHiddenVar("pbScaleStr", pbScaleStr);
    }
else
    {
    scaleButtonPushed = FALSE;
    }

if (psOutput == NULL)
{
if (cgiVarExists("pbt.left3"))
    {
    relativeScroll(-0.95);
    initialWindow = FALSE;
    }
else if (cgiVarExists("pbt.left2"))
    {
    relativeScroll(-0.475);
    initialWindow = FALSE;
    }
else if (cgiVarExists("pbt.left1"))
    {
    relativeScroll(-0.02);
    initialWindow = FALSE;
    }
else if (cgiVarExists("pbt.right1"))
    {
    relativeScroll(0.02);
    initialWindow = FALSE;
    }
else if (cgiVarExists("pbt.right2"))
    {
    relativeScroll(0.475);
    initialWindow = FALSE;
    }
else if (cgiVarExists("pbt.right3"))
    {
    relativeScroll(0.95);
    initialWindow = FALSE;
    }
}

dnaUtilOpen();

l=strlen(aa);

/* initialize AA properties */
aaPropertyInit(&hasResFreq);
sfCount = getSuperfamilies2(proteinID);
if (sfCount == 0)
    {
    sfCount = getSuperfamilies(proteinID);
    }
if (mrnaID != NULL)
    {
    if (kgVersion == KG_III)
    	{
	doExonTrack = FALSE;
	sqlSafefFrag(cond_str, sizeof(cond_str), "spId='%s'", proteinID);
        kgId = sqlGetField(database, "kgXref", "kgId", cond_str);
	if (kgId != NULL)
	    {
	    sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", kgId);
            kgPep = sqlGetField(database, "knownGenePep", "seq", cond_str);
      	    //printf("<pre><br>%s", kgPep);fflush(stdout);
	    if (kgPep != NULL)
	    	{
		if (strstr(protDbName, "proteins") != NULL)
		    {
		    protDbDate = strstr(protDbName, "proteins") + strlen("proteins");
		    safef(uniProtDbName, sizeof(uniProtDbName),"sp%s", protDbDate);
		
		    sqlSafefFrag(cond_str, sizeof(cond_str), "acc='%s'", proteinID);
            	    protPep = sqlGetField(uniProtDbName, "protein", "val", cond_str);
            	    //printf("<br>%s\n", protPep);fflush(stdout);
            	    if (protPep != NULL)
		    	{
			if (sameWord(kgPep, protPep))
			    {
			    //printf("<br>MATCH!\n");fflush(stdout);
		    	    sqlSafefFrag(cond_str, sizeof(cond_str), "qName='%s'", kgId);
            	    	    answer = sqlGetField(database, kgProtMapTableName, 
			    			 "qName", cond_str);
            	    	    if (answer != NULL)
			    	{
    			    	/* NOTE: passing in kgId instead of proteinID because
					 kgProtMap2's qName uses kgId instead of 
					 protein display ID */
    			    	getExonInfo(kgId, &exCount, &chrom, &strand);
			    	assert(exCount > 0);
				doExonTrack = TRUE;
			    	}
			    }
			/*
			else
			    {
			    chp1 = kgPep;
			    printf("<br>");
			    chp2 = protPep;
			    ll = strlen(kgPep);
			    if (strlen(protPep) < ll) ll= strlen(protPep);
			    for (i=0; i<ll; i++)
			    	{
				if (*chp1 != *chp2)
					{
					printf("%c", *chp1);
					}
				else
					{
					printf(".");
					}
				chp1++; chp2++;
				}
			    }
			    //printf("</pre>");fflush(stdout);
			*/
			}
		    }
		}
	    }
	}
    else
    	{
	doExonTrack = TRUE;
    	getExonInfo(proteinID, &exCount, &chrom, &strand);
    	assert(exCount > 0);
	}
    /* do the following only if pbTracks called doTracks() */
    if (initialWindow && IAmPbTracks)
	{
	prevGBOffsetSav = calPrevGB(exCount, chrom, strand, l, yOffp, proteinID, mrnaID);
	trackOrigOffset = prevGBOffsetSav;
    	if (trackOrigOffset > (protSeqLen*pbScale - 600))
	    trackOrigOffset = protSeqLen*pbScale - 600;
	/* prevent negative value */
	if (trackOrigOffset < 0) trackOrigOffset = 0;
	}

    /* if this if for PDF/Postscript, the trackOrigOffset is already calculated previously,
        use the saved value */
    if (psOutput != NULL)
    	{
    	trackOrigOffset = atoi(cartOptionalString(cart, "pbt.trackOffset"));
    	}
    }

/*printf("<br>%d %d<br>%d %d\n", prevGBStartPos, prevGBEndPos, 
	blockGenomeStartPositive[exCount-1], blockGenomeStartPositive[0]); fflush(stdout);
*/
if (strand == '-')
    {
    if ((prevGBStartPos <= blockGenomeStartPositive[exCount-1]) && (prevGBEndPos >= blockGenomeStartPositive[0]))
    	{
    	showPrevGBPos = FALSE;
    	}
    }
else
    {
    if ((prevGBStartPos <= blockGenomeStartPositive[0]) && (prevGBEndPos >= blockGenomeStartPositive[exCount-1]))
    	{
    	showPrevGBPos = FALSE;
    	}
    }

if ((cgiOptionalString("aaOrigOffset") != NULL) && scaleButtonPushed)
     {
     trackOrigOffset = atoi(cgiOptionalString("aaOrigOffset"))*pbScale;
     }

pixWidth = 160+ protSeqLen*pbScale;
if (pixWidth > MAX_PB_PIXWIDTH)
   {
   pixWidth = MAX_PB_PIXWIDTH;
   }

if ((protSeqLen*pbScale - trackOrigOffset) < MAX_PB_PIXWIDTH)
    {
    pixWidth = protSeqLen*pbScale - trackOrigOffset + 160;
    }

if (pixWidth < 550) pixWidth = 550;

insideWidth = pixWidth-gfxBorder;

if (proteinInSupportedGenome)
    {
    pixHeight = 250;
    }
else
    {
    pixHeight = 215;
    }

if (sfCount > 0) pixHeight = pixHeight + 20;

/* make room for individual residues display */
if (pbScale >=6)  pixHeight = pixHeight + 20;
if (pbScale >=18) pixHeight = pixHeight + 30;

if (psOutput)
    {
    vg = vgOpenPostScript(pixWidth, pixHeight, psOutput);
    suppressHtml = TRUE;
    hideControls = TRUE;
    }
else
    {
    trashDirFile(&gifTn, "pbt", "pbt", ".png");
    vg = vgOpenPng(pixWidth, pixHeight, gifTn.forCgi, FALSE);
    }

/* Put up horizontal scroll controls. */
hWrites("Move ");
hButton("pbt.left3", "<<<");
hButton("pbt.left2", " <<");
hButton("pbt.left1", " < ");
hButton("pbt.right1", " > ");
hButton("pbt.right2", ">> ");
hButton("pbt.right3", ">>>");

hPrintf(" &nbsp &nbsp ");

/* Put up scaling controls. */
hPrintf("Current scale: ");
if (pbScale == 1)  hPrintf("1/6 ");
if (pbScale == 3)  hPrintf("1/2 ");
if (pbScale == 6)  hPrintf("FULL ");
if (pbScale == 22) hPrintf("DNA ");

hPrintf(" &nbsp&nbsp Rescale to ");
hPrintf("<INPUT TYPE=SUBMIT NAME=\"pbScale\" VALUE=\"1/6\">\n");
hPrintf("<INPUT TYPE=SUBMIT NAME=\"pbScale\" VALUE=\"1/2\">\n");
hPrintf("<INPUT TYPE=SUBMIT NAME=\"pbScale\" VALUE=\"FULL\">\n");
if (kgVersion == KG_III)
    {
    /* for KG III, the protein has to exist in the kgProtMap2 table 
       (which will turn on doExonTrack flag)
       to provide the genomic position data needed for DNA sequence display */
    if ((proteinInSupportedGenome) && (doExonTrack))
    hPrintf("<INPUT TYPE=SUBMIT NAME=\"pbScale\" VALUE=\"DNA\">\n");
    }
else
    {
    if (proteinInSupportedGenome) 
   	hPrintf("<INPUT TYPE=SUBMIT NAME=\"pbScale\" VALUE=\"DNA\">\n");
    }
hPrintf("<FONT SIZE=1><BR><BR></FONT>\n");

g_vg = vg;

pbRed    = vgFindColorIx(g_vg, 0xf9, 0x51, 0x59);
pbBlue   = vgFindColorIx(g_vg, 0x00, 0x00, 0xd0);
bkgColor = vgFindColorIx(vg, 255, 254, 232);

vgBox(vg, 0, 0, insideWidth, pixHeight, bkgColor);

/* Start up client side map. */
hPrintf("<MAP Name=%s>\n", mapName);

vgSetClip(vg, 0, gfxBorder, insideWidth, pixHeight - 2*gfxBorder);

/* start drawing indivisual tracks */

doAAScale(l, yOffp, 1);

if (pbScale >= 6)  doResidues(aa, l, yOffp);

if (pbScale >= 18) doDnaTrack(chrom, strand, exCount, l, yOffp);

if ((mrnaID != NULL) && showPrevGBPos)
    {
    doPrevGB(exCount, chrom, strand, l, yOffp, proteinID, mrnaID);
    }

if (mrnaID != NULL)
    {
    if (doExonTrack) doExon(exCount, chrom, l, yOffp, proteinID, mrnaID);
    }

doCharge(aa, l, yOffp);

doHydrophobicity(aa, l, yOffp);

doCysteines(aa, l, yOffp);

if (sfCount > 0) doSuperfamily(ensPepName, sfCount, yOffp); 

if (hasResFreq) doAnomalies(aa, l, yOffp);

doAAScale(l, yOffp, -1);

vgClose(&vg);

/* Finish map and save out picture and tell html file about it. */
hPrintf("</MAP>\n");

/* put tracks image here */

hPrintf(
"\n<IMG SRC=\"%s\" BORDER=1 WIDTH=%d HEIGHT=%d USEMAP=#%s><BR>",
        gifTn.forCgi, pixWidth, pixHeight, mapName);

if (proteinInSupportedGenome)
    {
    hPrintf("<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml#tracks\" TARGET=_blank>");
    }
else
    {
    if (hIsGsidServer())
	{
	hPrintf("<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbGsid/pbTracksHelp.shtml#tracks\" TARGET=_blank>");
    	}
    else
	{
	hPrintf("<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml#tracks\" TARGET=_blank>");
    	}
    }

hPrintf("Explanation of Protein Tracks</A><br>");

safef(trackOffset, sizeof(trackOffset), "%d", trackOrigOffset);
cgiMakeHiddenVar("trackOffset", trackOffset);

/* remember where the AA base origin is so that it can be passed to next PB page */
aaOrigOffset = trackOrigOffset/pbScale;
safef(aaOrigOffsetStr, sizeof(aaOrigOffsetStr), "%d", aaOrigOffset);
cgiMakeHiddenVar("aaOrigOffset", aaOrigOffsetStr);

/* save the following state variables, to be used by PDF/Postcript processing */
cartSetString(cart,"pbt.pbScaleStr", pbScaleStr);
cartSetString(cart,"pbt.trackOffset", trackOffset);
cartSaveSession(cart);
fflush(stdout);
}
Example #22
0
static void showProtH1n1(char *item, char *geneSymbol)
{
char query2[256];
struct sqlResult *sr2;
char **row2;
struct sqlConnection *conn2 = hAllocConn(database);

char *subjId, *dnaSeqId;
char *aaSeqId= NULL;
char *gene=NULL;

char cond_str[256];
char *predFN;
char *homologID;
char *SCOPdomain;
char *chain;
char goodSCOPdomain[40];
int  first = 1;
float  eValue;
char *chp;
int homologCount;
int gotPDBFile = 0;

sqlSafef(query2, sizeof(query2),
	"select subjId, dnaSeqId, aaSeqId, gene from gisaidXref where dnaSeqId='%s'", item);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
if (row2 != NULL)
    {
    subjId = strdup(row2[0]);
    dnaSeqId = strdup(row2[1]);
    aaSeqId  = strdup(row2[2]);
    gene     = strdup(row2[3]);
    }
else
    {
    errAbort("%s not found.", item);
    }
sqlFreeResult(&sr2);

printf("<H3>Protein Structure Analysis and Prediction</H3>");

printf("<B>Comparison to 1918 Flu Virus:</B> ");
printf("<A HREF=\"%s/%s/%s/1918_%s.mutate", getH1n1StructUrl(), gene, aaSeqId, aaSeqId);
printf("\" TARGET=_blank>%s</A><BR>\n", aaSeqId);

printf("<B>Comparison to A H1N1 gene %s concensus:</B> ", gene);
printf("<A HREF=\"%s/%s/%s/consensus_%s.mutate", getH1n1StructUrl(), gene, aaSeqId, aaSeqId);
printf("\" TARGET=_blank>%s</A><BR>\n", aaSeqId);

printf("<BR><B>3D Structure Prediction of %s concensus sequence (with variation of sequence %s highlighted):", geneSymbol, item);
printf("<BR>PDB file:</B> ");
char pdbUrl[PATH_LEN];
safef(pdbUrl, sizeof(pdbUrl), "%s/%s/decoys/%s.try1-opt3.pdb.gz", getH1n1StructUrl(), item, item);


// Modeller stuff
char modelPdbUrl[PATH_LEN];
if (getH1n1Model(gene, modelPdbUrl))
    {
    struct tempName imageFile, chimeraScript, chimerax;
    mkH1n1StructData(gene, NULL, aaSeqId, &imageFile, &chimeraScript);
    mkChimerax(gene, modelPdbUrl, chimeraScript.forCgi, &chimerax);
    printf("<A HREF=\"%s\" TARGET=_blank>%s</A>, view with <A HREF=\"%s\">Chimera</A><BR>\n", modelPdbUrl, gene, chimerax.forHtml);
    printf("<TABLE>\n");
    printf("<TR>\n");
    printf("<TD ALIGN=\"center\"><img src=\"%s\"></TD>", imageFile.forHtml);
    printf("</TR>\n");
    printf("</TABLE>\n");
    }
return;

gotPDBFile = 0;
sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s' and evalue <1.0e-5;", item);

printf("<TABLE>\n");
printf("<TR><TD ALIGN=\"center\">Front</TD>\n");
printf("<TD ALIGN=\"center\">Top</TD>\n");
printf("<TD ALIGN=\"center\">Side</TD>\n");
printf("</TR>\n");
printf("<TR>\n");
printf("<TD ALIGN=\"center\"><img src=\"%s/%s/%s.undertaker-align.view1_200.jpg\"></TD>", getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><img src=\"%s/%s/%s.undertaker-align.view2_200.jpg\"></TD>", getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><img src=\"%s/%s/%s.undertaker-align.view3_200.jpg\"></TD>", getH1n1StructUrl(), item, item);
printf("</TR>\n");
printf("<TR>\n");
printf("<TD ALIGN=\"center\"><A HREF=\"%s/%s/%s.undertaker-align.view1_500.jpg\">500x500</A></TD>",
	getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><A HREF=\"%s/%s/%s.undertaker-align.view2_500.jpg\">500x500</A></TD>",
	getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><A HREF=\"%s/%s/%s.undertaker-align.view3_500.jpg\">500x500</A></TD>",
	getH1n1StructUrl(), item, item);
printf("</TR>\n");
printf("</TABLE>\n");

printf("<BR><B>Detailed results of SAM-T02:</B> ");
printf("<A HREF=\"%s/%s/summary.html", getH1n1StructUrl(), item);
printf("\" TARGET=_blank>%s</A><BR>\n", item);

/* by pass the following additional processing for now, until two necessary tables are built */
hFreeConn(&conn2);
return;

if (sqlGetField(database, "protHomolog", "proteinID", cond_str) != NULL)
    {
    sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s'", item);
    predFN = sqlGetField(database, "protPredFile", "predFileName", cond_str);
    if (predFN != NULL)
	{
	printf("<A HREF=\"../SARS/%s/", item);
	/* printf("%s.t2k.undertaker-align.pdb\">%s</A><BR>\n", item,item); */
	printf("%s\">%s</A><BR>\n", predFN,item);
	gotPDBFile = 1;
	}
    }
if (!gotPDBFile)
    {
    printf("No high confidence level structure prediction available for this sequence.");
    printf("<BR>\n");
    }
printf("<B>3D Structure of Close Homologs:</B> ");
homologCount = 0;
strcpy(goodSCOPdomain, "dummy");

conn2= hAllocConn(database);
sqlSafef(query2, sizeof(query2),
	"select homologID,eValue,SCOPdomain,chain from sc1.protHomolog where proteinID='%s' and evalue <= 0.01;",
	item);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
if (row2 != NULL)
    {
    while (row2 != NULL)
	{
	homologID = row2[0];
	sscanf(row2[1], "%e", &eValue);
	SCOPdomain = row2[2];
	chp = SCOPdomain+strlen(SCOPdomain)-1;
	while (*chp != '.') chp--;
	*chp = '\0';
	chain = row2[3];
	if (eValue <= 1.0e-10)
	    strcpy(goodSCOPdomain, SCOPdomain);
	else
	    {
	    if (strcmp(goodSCOPdomain,SCOPdomain) != 0)
		goto skip;
	    else
		if (eValue > 0.1) goto skip;
	    }
	if (first)
	    first = 0;
	else
	    printf(", ");

	printf("<A HREF=\"http://www.rcsb.org/pdb/cgi/explore.cgi?job=graphics&pdbId=%s",
	       homologID);
	if (strlen(chain) >= 1)
	    printf("\"TARGET=_blank>%s(chain %s)</A>", homologID, chain);
	else
	    printf("\"TARGET=_blank>%s</A>", homologID);
	homologCount++;

	skip:
	row2 = sqlNextRow(sr2);
	}
    }
hFreeConn(&conn2);
sqlFreeResult(&sr2);
if (homologCount == 0)
    printf("None<BR>\n");

printf("<BR><B>Details:</B> ");
printf("<A HREF=\"../SARS/%s/summary.html", item);
printf("\" TARGET=_blank>%s</A><BR>\n", item);

htmlHorizontalLine();
}
Example #23
0
int getSuperfamilies2(char *proteinID)
/* getSuperfamilies2() superceed getSuperfamilies() starting from hg16, 
   it gets Superfamily data of a protein 
   from ensemblXref3, sfAssign, and sfDes from the proteinsXXXXXX database,
   and placed them in arrays to be used by doSuperfamily().*/
{
struct sqlConnection *conn, *conn2, *conn3;
char query[MAXNAMELEN], query2[MAXNAMELEN];
struct sqlResult *sr, *sr2;
char **row, **row2;

char cond_str[255];

char *sfID, *seqID, *sfDesc,  *region;
int  done;
int j;

char *chp, *chp2;
int  sfCnt;
int  int_start, int_end;

if (!hTableExists(protDbName, "sfAssign")) return(0);
if (!hTableExists(protDbName, "ensemblXref3")) return(0);

conn  = hAllocConn(database);
conn2 = hAllocConn(database);
conn3 = hAllocConn(database);

sqlSafef(query2, sizeof(query), 
    "select distinct sfID, seqID from %s.ensemblXref3 x, %s.sfAssign a where (swissAcc='%s' or tremblAcc='%s') and seqID=x.protein and protein != '' and evalue <= 0.02",
      protDbName, protDbName, proteinID, proteinID);
sr2  = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
sfCnt=0;    
while (row2 != NULL)
    {      
    sfID = row2[0];
    seqID= row2[1];
    
    sqlSafef(query, sizeof(query), 
    	  "select region from %s.sfAssign where sfID='%s' and seqID='%s' and evalue <=0.02", 
	  protDbName, sfID, seqID);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    
    while (row != NULL)
    	{      
  	region   = row[0];
    	
	for (j=0; j<sfCnt; j++)
	    {
	    if (sfId[j] == atoi(sfID)) goto skip;
	    }
	
	sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s;", sfID);
    	sfDesc = sqlGetField(protDbName, "sfDes", "description", cond_str);


    	/* !!! refine logic here later to be defensive against illegal syntax */
    	chp = region;
    	done = 0;
    	while (!done)
	    {
	    chp2  = strstr(chp, "-");
	    *chp2 = '\0';
	    chp2++;

	    sscanf(chp, "%d", &int_start);
	
	    chp = chp2;
	    chp2  = strstr(chp, ",");
	    if (chp2 != NULL) 
	    	{
	    	*chp2 = '\0';
	    	}
	    else
	    	{
	    	done = 1;
		}
	    chp2++;
	    sscanf(chp, "%d", &int_end);
 	    sfId[sfCnt]    = atoi(sfID);
	    sfStart[sfCnt] = int_start;
	    sfEnd[sfCnt]   = int_end;
	    strncpy(superfam_name[sfCnt], sfDesc, MAXNAMELEN-1);
	    sfCnt++;
	    chp = chp2;
	    }
skip:
    	row = sqlNextRow(sr);
    	}

    sqlFreeResult(&sr);
    row2 = sqlNextRow(sr2);
    }
	
sqlFreeResult(&sr2);
hFreeConn(&conn);
hFreeConn(&conn2);
hFreeConn(&conn3);
return(sfCnt);
}
Example #24
0
void showSAM_h1n1(char *item)
{
char query2[256];
struct sqlResult *sr2;
char **row2;
struct sqlConnection *conn2 = hAllocConn(database);
char cond_str[256];
char *predFN;
char *homologID;
char *SCOPdomain;
char *chain;
char goodSCOPdomain[40];
int  first = 1;
float  eValue;
char *chp;
int homologCount;
int gotPDBFile = 0;

printf("<H3>Protein Structure Analysis and Prediction by ");
printf("<A HREF=\"http://www.soe.ucsc.edu/research/compbio/SAM_T02/sam-t02-faq.html\"");
printf(" TARGET=_blank>SAM-T02</A></H3>\n");

printf("<B>Multiple Alignment:</B> ");
printf("<A HREF=\"%s/%s/summary.html#alignment", getH1n1StructUrl(), item);
printf("\" TARGET=_blank>%s</A><BR>\n", item);

printf("<B>Secondary Structure Predictions:</B> ");
printf("<A HREF=\"%s/%s/summary.html#secondary-structure", getH1n1StructUrl(), item);
printf("\" TARGET=_blank>%s</A><BR>\n", item);

printf("<B>3D Structure Prediction (PDB file):</B> ");
char pdbUrl[PATH_LEN];
safef(pdbUrl, sizeof(pdbUrl), "%s/%s/decoys/%s.try1-opt3.pdb.gz", getH1n1StructUrl(), item, item);
struct tempName chimerax;
mkChimerax(item, pdbUrl, NULL, &chimerax);

printf("<A HREF=\"%s\" TARGET=_blank>%s</A>, view with <A HREF=\"%s\">Chimera</A><BR>\n", pdbUrl, item, chimerax.forHtml);

gotPDBFile = 0;
sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s' and evalue <1.0e-5;", item);

printf("<TABLE>\n");
printf("<TR><TD ALIGN=\"center\">Front</TD>\n");
printf("<TD ALIGN=\"center\">Top</TD>\n");
printf("<TD ALIGN=\"center\">Side</TD>\n");
printf("</TR>\n");
printf("<TR>\n");
printf("<TD ALIGN=\"center\"><img src=\"%s/%s/%s.undertaker-align.view1_200.jpg\"></TD>", getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><img src=\"%s/%s/%s.undertaker-align.view2_200.jpg\"></TD>", getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><img src=\"%s/%s/%s.undertaker-align.view3_200.jpg\"></TD>", getH1n1StructUrl(), item, item);
printf("</TR>\n");
printf("<TR>\n");
printf("<TD ALIGN=\"center\"><A HREF=\"%s/%s/%s.undertaker-align.view1_500.jpg\">500x500</A></TD>",
	getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><A HREF=\"%s/%s/%s.undertaker-align.view2_500.jpg\">500x500</A></TD>",
	getH1n1StructUrl(), item, item);
printf("<TD ALIGN=\"center\"><A HREF=\"%s/%s/%s.undertaker-align.view3_500.jpg\">500x500</A></TD>",
	getH1n1StructUrl(), item, item);
printf("</TR>\n");
printf("</TABLE>\n");

printf("<BR><B>Detailed results of SAM-T02:</B> ");
printf("<A HREF=\"%s/%s/summary.html", getH1n1StructUrl(), item);
printf("\" TARGET=_blank>%s</A><BR>\n", item);

/* by pass the following additional processing for now, until two necessary tables are built */
hFreeConn(&conn2);
return;

if (sqlGetField(database, "protHomolog", "proteinID", cond_str) != NULL)
    {
    sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s'", item);
    predFN = sqlGetField(database, "protPredFile", "predFileName", cond_str);
    if (predFN != NULL)
	{
	printf("<A HREF=\"../SARS/%s/", item);
	/* printf("%s.t2k.undertaker-align.pdb\">%s</A><BR>\n", item,item); */
	printf("%s\">%s</A><BR>\n", predFN,item);
	gotPDBFile = 1;
	}
    }
if (!gotPDBFile)
    {
    printf("No high confidence level structure prediction available for this sequence.");
    printf("<BR>\n");
    }
printf("<B>3D Structure of Close Homologs:</B> ");
homologCount = 0;
strcpy(goodSCOPdomain, "dummy");

conn2= hAllocConn(database);
sqlSafef(query2, sizeof(query2),
	"select homologID,eValue,SCOPdomain,chain from sc1.protHomolog where proteinID='%s' and evalue <= 0.01;",
	item);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
if (row2 != NULL)
    {
    while (row2 != NULL)
	{
	homologID = row2[0];
	sscanf(row2[1], "%e", &eValue);
	SCOPdomain = row2[2];
	chp = SCOPdomain+strlen(SCOPdomain)-1;
	while (*chp != '.') chp--;
	*chp = '\0';
	chain = row2[3];
	if (eValue <= 1.0e-10)
	    strcpy(goodSCOPdomain, SCOPdomain);
	else
	    {
	    if (strcmp(goodSCOPdomain,SCOPdomain) != 0)
		goto skip;
	    else
		if (eValue > 0.1) goto skip;
	    }
	if (first)
	    first = 0;
	else
	    printf(", ");

	printf("<A HREF=\"http://www.rcsb.org/pdb/cgi/explore.cgi?job=graphics&pdbId=%s",
	       homologID);
	if (strlen(chain) >= 1)
	    printf("\"TARGET=_blank>%s(chain %s)</A>", homologID, chain);
	else
	    printf("\"TARGET=_blank>%s</A>", homologID);
	homologCount++;

	skip:
	row2 = sqlNextRow(sr2);
	}
    }
hFreeConn(&conn2);
sqlFreeResult(&sr2);
if (homologCount == 0)
    printf("None<BR>\n");

printf("<BR><B>Details:</B> ");
printf("<A HREF=\"../SARS/%s/summary.html", item);
printf("\" TARGET=_blank>%s</A><BR>\n", item);

htmlHorizontalLine();
}
Example #25
0
void processRefSeq(char *database, char *faFile, char *raFile, char *pslFile, char *loc2refFile, 
	char *pepFile, char *mim2locFile)
/* hgRefSeqMrna - Load refSeq mRNA alignments and other info into 
 * refSeqGene table. */
{
struct lineFile *lf;
struct hash *raHash, *rsiHash = newHash(0);
struct hash *loc2mimHash = newHash(0);
struct refSeqInfo *rsiList = NULL, *rsi;
char *s, *line, *row[5];
int wordCount, dotMod = 0;
int noLocCount = 0;
int rsiCount = 0;
int noProtCount = 0;
struct psl *psl;
struct sqlConnection *conn = hgStartUpdate(database);
struct hash *productHash = loadNameTable(conn, "productName", 16);
struct hash *geneHash = loadNameTable(conn, "geneName", 16);
char *kgName = "refGene";

FILE *kgTab = hgCreateTabFile(".", kgName);
FILE *productTab = hgCreateTabFile(".", "productName");
FILE *geneTab = hgCreateTabFile(".", "geneName");
FILE *refLinkTab = hgCreateTabFile(".", "refLink");
FILE *refPepTab = hgCreateTabFile(".", "refPep");
FILE *refMrnaTab = hgCreateTabFile(".", "refMrna");

struct exon *exonList = NULL, *exon;
char *answer;
char cond_str[200];

/* Make refLink and other tables table if they don't exist already. */
sqlMaybeMakeTable(conn, "refLink", refLinkTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refLink");
sqlMaybeMakeTable(conn, "refGene", refGeneTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refGene");
sqlMaybeMakeTable(conn, "refPep", refPepTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refPep");
sqlMaybeMakeTable(conn, "refMrna", refMrnaTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refMrna");

/* Scan through locus link to omim ID file and put in hash. */
    {
    char *row[2];

    printf("Scanning %s\n", mim2locFile);
    lf = lineFileOpen(mim2locFile, TRUE);
    while (lineFileRow(lf, row))
	{
	hashAdd(loc2mimHash, row[1], intToPt(atoi(row[0])));
	}
    lineFileClose(&lf);
    }

/* Scan through .ra file and make up start of refSeqInfo
 * objects in hash and list. */
printf("Scanning %s\n", raFile);
lf = lineFileOpen(raFile, TRUE);
while ((raHash = hashNextRa(lf)) != NULL)
    {
    if (clDots > 0 && ++dotMod == clDots )
        {
	dotMod = 0;
	dotOut();
	}
    AllocVar(rsi);
    slAddHead(&rsiList, rsi);
    if ((s = hashFindVal(raHash, "acc")) == NULL)
        errAbort("No acc near line %d of %s", lf->lineIx, lf->fileName);
    rsi->mrnaAcc = cloneString(s);
    if ((s = hashFindVal(raHash, "siz")) == NULL)
        errAbort("No siz near line %d of %s", lf->lineIx, lf->fileName);
    rsi->size = atoi(s);
    if ((s = hashFindVal(raHash, "gen")) != NULL)
	rsi->geneName = cloneString(s);
    //!!!else
      //!!!  warn("No gene name for %s", rsi->mrnaAcc);
    if ((s = hashFindVal(raHash, "cds")) != NULL)
        parseCds(s, 0, rsi->size, &rsi->cdsStart, &rsi->cdsEnd);
    else
        rsi->cdsEnd = rsi->size;
    if ((s = hashFindVal(raHash, "ngi")) != NULL)
        rsi->ngi = atoi(s);

    rsi->geneNameId = putInNameTable(geneHash, geneTab, rsi->geneName);
    s = hashFindVal(raHash, "pro");
    if (s != NULL)
        rsi->productName = cloneString(s);
    rsi->productNameId = putInNameTable(productHash, productTab, s);
    hashAdd(rsiHash, rsi->mrnaAcc, rsi);

    freeHashAndVals(&raHash);
    }
lineFileClose(&lf);
if (clDots) printf("\n");

/* Scan through loc2ref filling in some gaps in rsi. */
printf("Scanning %s\n", loc2refFile);
lf = lineFileOpen(loc2refFile, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    char *mrnaAcc;

    if (line[0] == '#')
        continue;
    wordCount = chopTabs(line, row);
    if (wordCount < 5)
        errAbort("Expecting at least 5 tab-separated words line %d of %s",
		lf->lineIx, lf->fileName);
    mrnaAcc = row[1];
    mrnaAcc = accWithoutSuffix(mrnaAcc);

    if (mrnaAcc[2] != '_')
        warn("%s is and odd name %d of %s", 
		mrnaAcc, lf->lineIx, lf->fileName);
    if ((rsi = hashFindVal(rsiHash, mrnaAcc)) != NULL)
        {
	rsi->locusLinkId = lineFileNeedNum(lf, row, 0);
	rsi->omimId = ptToInt(hashFindVal(loc2mimHash, row[0]));
	rsi->proteinAcc = cloneString(accWithoutSuffix(row[4]));
	}
    }
lineFileClose(&lf);

/* Report how many seem to be missing from loc2ref file. 
 * Write out knownInfo file. */
printf("Writing %s\n", "refLink.tab");
for (rsi = rsiList; rsi != NULL; rsi = rsi->next)
    {
    ++rsiCount;
    if (rsi->locusLinkId == 0)
        ++noLocCount;
    if (rsi->proteinAcc == NULL)
        ++noProtCount;
    fprintf(refLinkTab, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u\n",
	emptyForNull(rsi->geneName), 
	emptyForNull(rsi->productName),
    	emptyForNull(rsi->mrnaAcc), 
	emptyForNull(rsi->proteinAcc),
	rsi->geneNameId, rsi->productNameId, 
	rsi->locusLinkId, rsi->omimId);
    }
if (noLocCount) 
    printf("Missing locusLinkIds for %d of %d\n", noLocCount, rsiCount);
if (noProtCount)
    printf("Missing protein accessions for %d of %d\n", noProtCount, rsiCount);

/* Process alignments and write them out as genes. */
lf = pslFileOpen(pslFile);
dotMod = 0;
while ((psl = pslNext(lf)) != NULL)
  {
  if (hashFindVal(rsiHash, psl->qName) != NULL)
    {
    if (clDots > 0 && ++dotMod == clDots )
        {
	dotMod = 0;
	dotOut();
	}
   
    sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", psl->qName);
    answer = sqlGetField(proteinDB, "spXref2", "displayID", cond_str);
	       
    if (answer == NULL)
	{
	fprintf(stderr, "%s NOT FOUND.\n", psl->qName);
   	fflush(stderr);
	}

    if (answer != NULL)
    	{	
        struct genePred *gp = NULL;
    	exonList = pslToExonList(psl);
    	fprintf(kgTab, "%s\t%s\t%c\t%d\t%d\t",
	psl->qName, psl->tName, psl->strand[0], psl->tStart, psl->tEnd);
    	rsi = hashMustFindVal(rsiHash, psl->qName);

        gp = genePredFromPsl(psl, rsi->cdsStart, rsi->cdsEnd, genePredStdInsertMergeSize);
        if (!gp)
            errAbort("Cannot convert psl (%s) to genePred.\n", psl->qName);

    	fprintf(kgTab, "%d\t%d\t", gp->cdsStart, gp->cdsEnd);
    	fprintf(kgTab, "%d\t", slCount(exonList));
    
    	fflush(kgTab);
     
    	for (exon = exonList; exon != NULL; exon = exon->next)
        fprintf(kgTab, "%d,", exon->start);
    	fprintf(kgTab, "\t");
    
        for (exon = exonList; exon != NULL; exon = exon->next)
        	fprintf(kgTab, "%d,", exon->end);
    	fprintf(kgTab, "\n");
    	slFreeList(&exonList);
    	}
    }
  else
    {
    fprintf(stderr, "%s found in psl, but not in .fa or .ra data files.\n", psl->qName);
    fflush(stderr);
    }
  }

if (clDots) printf("\n");

if (!clTest)
    {
    writeSeqTable(pepFile, refPepTab, FALSE, TRUE);
    writeSeqTable(faFile, refMrnaTab, FALSE, FALSE);
    }

carefulClose(&kgTab);
carefulClose(&productTab);
carefulClose(&geneTab);
carefulClose(&refLinkTab);
carefulClose(&refPepTab);
carefulClose(&refMrnaTab);

if (!clTest)
    {
    printf("Loading database with %s\n", kgName);
    fflush(stdout);
    
    hgLoadTabFile(conn, ".", kgName, NULL);

    printf("Loading database with %s\n", "productName");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "productName", NULL);
    
    printf("Loading database with %s\n", "geneName");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "geneName", NULL);
    
    printf("Loading database with %s\n", "refLink");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refLink", NULL);
    
    printf("Loading database with %s\n", "refPep");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refPep", NULL);
    
    printf("Loading database with %s\n", "refMrna");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refMrna", NULL);
    }
}
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2, *conn3;
char query2[256], query3[256];
struct sqlResult *sr2, *sr3;
char **row2, **row3;
char condStr[255];
char *answer;

char *kgTempDb;
char *outfileName;
FILE *outf;
int  i;
char *chp;
char *acc2;

char *name, *txStart, *txEnd;
char *chrom;
char *acc, *stat;
char *frame, *start, *stop;
char *causes;
char *genomeDb;
char *geneName;
char srcType;
int  alignCnt = 0;

char *candTable, *chkTable;
int  orfStop, cdsGap, cdsSplice, numCdsIntrons;
boolean passed;
float ranking;

if (argc != 6) usage();
kgTempDb    = argv[1];
genomeDb    = argv[2];
candTable   = argv[3];
chkTable    = argv[4];
outfileName = argv[5];

outf = mustOpen(outfileName, "w");
conn = hAllocConn(genomeDb);
conn2= hAllocConn(genomeDb);
conn3= hAllocConn(genomeDb);

/* go through each protein */
safef(query2, sizeof(query2), "select * from %s.%s", kgTempDb, candTable);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    name  = row2[0];
    chrom = row2[1];
    txStart = row2[3];
    txEnd   = row2[4];
    
    /* retrieve gene-check results */
    safef(query3, sizeof(query3), 
          "select * from %s.%s where acc='%s' and chrStart=%s and chrEnd = %s",
          kgTempDb, chkTable, name, txStart, txEnd);
    sr3  = sqlMustGetResult(conn3, query3);
    row3 = sqlNextRow(sr3);
	{
	passed = FALSE;
	ranking  = 3;
	
   	acc 	  = row3[0];
   	stat	  = row3[5];
   	frame     = row3[6];
   	start	  = row3[7];
   	stop	  = row3[8];
   	orfStop   = atoi(row3[9]);
   	cdsGap    = atoi(row3[10]);
   	cdsSplice = atoi(row3[12]);
	numCdsIntrons = atoi(row3[18]);
	causes    = row3[21];
	
	ranking = 9;
	/* all genes passed gene-check with status ok are considered good */
	if (sameWord(stat, "ok")) 
	    {
	    passed = TRUE;
	    ranking  = 1;
	    }
	else
	    {
	    /* frame, start, orfStop, and stop conditions must be met for KG candidates */
	    if ((sameWord(frame, "ok")) &&
		(sameWord(start, "ok")) &&
		(orfStop == 0) &&
		(sameWord(stop,  "ok")) )
		{
		ranking = 2;
		/* accept cdsSplice = 0 or (cdsSplice = 1 and numCdsIntrons > 1) */
		if ((cdsSplice == 0) || ((numCdsIntrons > 1) && (cdsSplice == 1)))
		    {
		    passed = TRUE;
		    }
		/* if cdsGap > 0, degrade it ranking by 1.  If cdsGap is not 
		   a multiple of 3, degrade its ranking further */
		if (cdsGap > 0)
		    {
		    ranking = ranking + 1;
		    if ((cdsGap - (cdsGap/3)*3) != 0) ranking = ranking + 1;
		    }
		}
	    }
	    
        /* give RefSeq entries 0.5 advantage in its ranking */  
	safef(condStr, sizeof(condStr), "name='%s'", acc);
	answer = sqlGetField(genomeDb, "refGene", "name", condStr);
 	if (answer != NULL) 
	    {
	    ranking = ranking - 0.5;
	    }
	else
	    {	
            chp = strstr(acc, "_");
	    if (chp != NULL)
	    	{
		acc2 = chp + 1;
		}
	    else
	        {
		acc2 = acc;
		}
	    safef(condStr, sizeof(condStr), "name='%s'", acc2);
	    
	    /* If it is an MGC gene, give it a 0.3 advantable */
	    answer = sqlGetField(genomeDb, "mgcGenes", "name", condStr);
 	    if (answer != NULL) 
	    	{
	    	ranking = ranking - 0.3;
	    	}
	    }

	/* print out entries, with their rankings, that passed the above criteria */    
	if (passed) 
	    {
	    /*for (i=0; i<10; i++)
	    	{
		fprintf(outf, "%s\t", row2[i]);
		}
	    */

	    geneName = strdup(row2[0]);
	    chp = strstr(geneName, "_");
	    if (chp != NULL)
	    	{
		if (strstr(geneName, "NM_") != NULL)
		    {
		    srcType = 'R';	/* src is RefSeq */
		    }
		else
		    {
		    chp++;
		    /* keep the composite name, so that kgGetCds can process correctly */
		    /* geneName = chp; */
		    srcType  = 'U';	/* src is UCSC prot/mrna alignment */
		    }
		}
	    else
	    	{
		srcType = 'G';		/* src is GenBank */
		}
	    alignCnt++;
	    fprintf(outf, "%s\t", geneName);
	    for (i= 1; i<10; i++) fprintf(outf, "%s\t", row2[i]);
	    fprintf(outf, "%c%d\t", srcType, alignCnt);

	    fprintf(outf, "%.2f\n", ranking);
	    }

        row3 = sqlNextRow(sr3);
	}
    sqlFreeResult(&sr3);
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

hFreeConn(&conn);
hFreeConn(&conn2);
hFreeConn(&conn3);
fclose(outf);
return(0);
}
Example #27
0
void makeActiveImagePB(char *psOutput, char *psOutput2)
/* Make image and image map. */
{
char *mapName = "map";
int pixWidth, pixHeight;

char *answer;
char cond_str[255];
struct sqlConnection *conn;
struct sqlConnection *connCentral;
char query[256];
struct sqlResult *sr;
char **row;
int  iypos;
char *blatGbDb;
char *sciName, *commonName;
char *spDisplayId;
char *oldDisplayId;
conn  = sqlConnect(UNIPROT_DB_NAME);
hPrintf("<br><font size=4>Protein ");

hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" TARGET=_blank><B>%s</B></A>\n",
        proteinID, proteinID);

spDisplayId = spAccToId(conn, spFindAcc(conn, proteinID));
if (strstr(spDisplayId, spFindAcc(conn, proteinID)) == NULL)
    {
    hPrintf(" (aka %s", spDisplayId);
    /* show once if the new and old displayId are the same */
    oldDisplayId = oldSpDisplayId(spDisplayId);
    if (oldDisplayId != NULL)
        {
        if (!sameWord(spDisplayId, oldDisplayId))
            {
            hPrintf(" or %s", oldSpDisplayId(spDisplayId));
            }
        }
    hPrintf(")\n");
    }
hPrintf(" %s\n", description);
hPrintf("</font><br>");

hPrintf("Organism: ");
/* get scientific and Genbank common name of this organism */
sciName    = NULL;
commonName = NULL;
sqlSafefFrag(cond_str, sizeof(cond_str),"accession='%s'", proteinID);
answer = sqlGetField(PROTEOME_DB_NAME, "spXref3", "division", cond_str);
if (answer != NULL)
    {
    sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s and nameType='scientific name'", answer);
    sciName = sqlGetField(PROTEOME_DB_NAME, "taxonNames", "name", cond_str);

    sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s and nameType='genbank common name'", answer);
    commonName = sqlGetField(PROTEOME_DB_NAME, "taxonNames", "name", cond_str);
    }
if (sciName != NULL)
    {
    hPrintf("%s", sciName);
    }
if (commonName != NULL)
    {
    hPrintf(" (%s)", commonName);
    }
hPrintf("<br>");

protSeq = getAA(proteinID);
if (protSeq == NULL)
    {
    hUserAbort("%s is not a current valid entry in UniProtKB\n", proteinID);
    }
protSeqLen = strlen(protSeq);

fflush(stdout);

iypos = 15;
doTracks(proteinID, mrnaID, protSeq, &iypos, psOutput);
if (!hTableExists(database, "pbStamp")) goto histDone;

pbScale = 3;
pixWidth = 765;
insideWidth = pixWidth-gfxBorder;

pixHeight = 350;

if (psOutput2)
    {
    vg2 = vgOpenPostScript(pixWidth, pixHeight, psOutput2);
    }
else
    {
    trashDirFile(&gifTn2, "pbt", "pbt", ".png");
    vg2 = vgOpenPng(pixWidth, pixHeight, gifTn2.forCgi, FALSE);
    }

g_vg = vg2;

pbRed    = vgFindColorIx(vg2, 0xf9, 0x51, 0x59);
pbBlue   = vgFindColorIx(g_vg, 0x00, 0x00, 0xd0);

normalColor   = pbBlue;
abnormalColor = pbRed;

bkgColor = vgFindColorIx(vg2, 255, 254, 232);
vgBox(vg2, 0, 0, insideWidth, pixHeight, bkgColor);

/* Start up client side map. */
mapName=cloneString("pbStamps");
hPrintf("\n<MAP Name=%s>\n", mapName);

vgSetClip(vg2, 0, gfxBorder, insideWidth, pixHeight - 2*gfxBorder);
iypos = 15;

/* Draw stamps. */

doStamps(proteinID, mrnaID, protSeq, vg2, &iypos);

/* Finish map. */
hPrintf("</MAP>\n");

/* Save out picture and tell html file about it. */
vgClose(&vg2);
hPrintf("<P>");

hPrintf("\n<IMG SRC=\"%s\" BORDER=1 WIDTH=%d HEIGHT=%d USEMAP=#%s><BR>",
            gifTn2.forCgi, pixWidth, pixHeight, mapName);
if (proteinInSupportedGenome)
    {
    hPrintf("\n<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml#histograms\" TARGET=_blank>");
    }
else
    {
    hPrintf("\n<A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml#histograms\" TARGET=_blank>");
    }

hPrintf("Explanation of Protein Property Histograms</A><BR>");

hPrintf("<P>");

histDone:

hPrintf("<P>");
fflush(stdout);

/* See if a UCSC Genome Browser exist for this organism.  If so, display BLAT link. */
connCentral = hConnectCentral();
sqlSafef(query, sizeof(query),
      "select defaultDb.name from dbDb, defaultDb where dbDb.scientificName='%s' and dbDb.name=defaultDb.name",
      sciName);
sr = sqlGetResult(connCentral, query);
row = sqlNextRow(sr);
if (row != NULL)
    {
    blatGbDb = strdup(row[0]);
    }
else
    {
    blatGbDb = NULL;
    }
sqlFreeResult(&sr);
hDisconnectCentral(&connCentral);

if (proteinInSupportedGenome || (blatGbDb != NULL))
    {
    hPrintf("\n<B>UCSC Links:</B><BR>\n ");
    hPrintf("<UL>\n");

    /* Show GB links only if the protein belongs to a supported genome */
    if (proteinInSupportedGenome)
        {
        doGenomeBrowserLink(proteinID, mrnaID, hgsidStr);
        doGeneDetailsLink(proteinID, mrnaID, hgsidStr);
        }

    /* Show Gene Sorter link only if it is valid for this genome */
    if (hgNearOk(database))
        {
        doGeneSorterLink(protDisplayID, mrnaID, hgsidStr);
        }

    /* Show BLAT link if we have UCSC Genome Browser for it */
    if (blatGbDb != NULL)
        {
        doBlatLink(blatGbDb, sciName, commonName, protSeq);
        }

    hPrintf("</UL><P>");
    }

/* This section shows various types of  domains */
conn = sqlConnect(UNIPROT_DB_NAME);
domainsPrint(conn, proteinID);

hPrintf("<P>");

/* Do Pathway section only if the protein belongs to a supported genome */
if (proteinInSupportedGenome);
    {
    doPathwayLinks(proteinID, mrnaID);
    }

printFASTA(proteinID, protSeq);
}
Example #28
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2;
struct sqlResult *sr2;
char query2[256];
char **row2;
char condStr[255];

char *uniProtDb;
char *score;
FILE *outf;
FILE *dupOutf;
char *chrom, *cdsStart, *cdsEnd;
char *displayID;
char *oldDisplayID;

char *chp, *chp1;
int  i;

int  isDuplicate;
    
char *kgTempDb;
char *infileName, *outfileName, *dupOutfileName;

if (argc != 6) usage();
    
kgTempDb    = argv[1];
uniProtDb  = argv[2];
infileName  = argv[3];
outfileName = argv[4];
dupOutfileName = argv[5];
  
inf     = mustOpen(infileName, "r");
outf    = mustOpen(outfileName, "w");
dupOutf = mustOpen(dupOutfileName, "w");
conn    = hAllocConn();
conn2= hAllocConn();

strcpy(oldInfo, "");

isDuplicate   = 0;
oldMrnaStr    = cloneString("");
oldAlignStr   = cloneString("");
oldProteinStr = cloneString("");
oldDisplayID  = cloneString("");

mrnaStr       = cloneString("");
proteinStr    = cloneString("");

while (fgets(line_in, 500, inf) != NULL)
    {
    strcpy(line, line_in);
    strcpy(line2, line_in);

    chp = strstr(line, "\t");	
    *chp = '\0';
    mrnaStr = strdup(line);
    
    chp ++;
    chp1 = chp;
    chp = strstr(chp, "\t");	
    *chp = '\0';
    chrom = strdup(chp1);
    
    chp ++;
    chp1 = chp;
    chp = strstr(chp, "\t");	
    *chp = '\0';
    cdsStart = strdup(chp1);
    
    chp ++;
    chp1 = chp;
    chp = strstr(chp, "\t");	
    *chp = '\0';
    cdsEnd = strdup(chp1);
    chp1 = line2 + (chp - line);
    *chp1 = '\0';
 
    chp ++;
    chp1 = chp;
    chp  = strstr(chp, "\t");	
    *chp = '\0';
    score= strdup(chp1);
   
    chp ++;
    chp1 = chp;
    chp  = strstr(chp, "\n");	
    *chp = '\0';
    proteinStr= strdup(chp1);
    
    strcpy(newInfo, line2);
    if (sameString(oldInfo, newInfo))
	{
	isDuplicate = 1;
 	sqlSafefFrag(condStr, sizeof(condStr), "acc='%s'", proteinStr);
        displayID = sqlGetField(uniProtDb, "displayId", "val", condStr);	
	if (displayID == NULL) 
	    {
	    printf("!!! %s not found\n", proteinStr);fflush(stdout);
	    }
 	sqlSafefFrag(condStr, sizeof(condStr), "acc='%s'", oldProteinStr);
        oldDisplayID = sqlGetField(uniProtDb, "displayId", "val", condStr);	
	if (oldDisplayID == NULL) 
	    {
	    printf("!!! %s not found\n", oldProteinStr);fflush(stdout);
	    }
	fprintf(dupOutf, 
		"%s\t%s\t%s\t%s\n", oldMrnaStr, oldDisplayID, mrnaStr, displayID);fflush(stdout);
	}
    else
	{
	/* remember previous record as old only if it is not a duplicate */
	if (!isDuplicate)
	    {
	    oldMrnaStr 	  = mrnaStr;
	    oldProteinStr = proteinStr;
	    }
	strcpy(oldInfo, newInfo);
	isDuplicate = 0;

	sqlSafef(query2, sizeof(query2), 
	      "select * from %s.kgCandidate2 where name='%s' and proteinID='%s' and chrom='%s' and cdsStart='%s' and cdsEnd='%s'", 
	      kgTempDb, mrnaStr, proteinStr, chrom, cdsStart, cdsEnd);
	sr2 = sqlMustGetResult(conn2, query2);
    	row2 = sqlNextRow(sr2);
    	while (row2 != NULL)
	    {
	    for (i=0; i<10; i++) fprintf(outf, "%s\t", row2[i]);
	    if (!sameWord(proteinStr, row2[10]))
	    	{
		printf("\n??? %s\t%s\n", proteinStr, row2[10]);fflush(stdout);
		}
		
 	    sqlSafefFrag(condStr, sizeof(condStr), "acc='%s'", proteinStr);
            displayID = sqlGetField(uniProtDb, "displayId", "val", condStr);	
	    if (displayID == NULL) 
	    	{
		printf("!!! %s not found\n", proteinStr);fflush(stdout);
		}
	    fprintf(outf, "%s\t", displayID);
	    fprintf(outf, "%s\n", row2[11]);
	    row2 = sqlNextRow(sr2);
	    }
	sqlFreeResult(&sr2);
	}
    }
fclose(inf);
fclose(outf);
fclose(dupOutf);
return(0);
}
Example #29
0
int main(int argc, char *argv[])
{
FILE *inf;

char *mrnaDate;
int  months;
char dirName[PATH_MAX];

struct sqlConnection *conn, *conn3;
char query[256];
struct sqlResult *sr;
char **row;

char *protAcc, *mrnaAcc, *matchStr;
char *protSizeStr, *mrnaSizeStr;
int  protSize, mrnaSize, match;

char *protMrnaTableName;

char condStr[255];
int score;

if (argc != 5) usage();
    
proteinDataDate = argv[1];
kgTempDb = argv[2];
genomeReadOnly = argv[3];
protMrnaTableName = argv[4];

sprintf(spDB, "sp%s", proteinDataDate);
sprintf(proteinsDB, "proteins%s", proteinDataDate);
sprintf(gbTempDB, "%sTemp", kgTempDb);
  
inf = fopen("protein.lis", "r"); 
if ((FILE *) NULL == inf)
    errAbort("ERROR: Can not open input file: protein.lis");
o3  = fopen("kgBestMrna.out",   "w");
if ((FILE *) NULL == o3)
    errAbort("ERROR: Can not open output file: kgBestMrna.out");
o7  = fopen("best.lis",    "w");
if ((FILE *) NULL == o7)
    errAbort("ERROR: Can not open output file: best.lis");

conn = hAllocConn(genomeReadOnly);
conn3= hAllocConn(genomeReadOnly);
   
proteinCount = 0; 
snprintf(dirName, (size_t) sizeof(dirName), "%s", "./clusterRun" );

sqlSafef(query, sizeof query,"select qName, tName, matches, qSize, tSize from %s.%s", kgTempDb, protMrnaTableName);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
while (row != NULL)
    {
    protAcc 	= row[0];
    mrnaAcc 	= row[1];
    matchStr    = row[2];
    protSizeStr = row[3];
    mrnaSizeStr = row[4];

    sscanf(matchStr, "%d", &match);
    sscanf(protSizeStr, "%d", &protSize);
    sscanf(mrnaSizeStr, "%d", &mrnaSize);
    sscanf(matchStr, "%d", &match);
  
    if ((float)match/(float)protSize > 0.3)
    	{
        sqlSafefFrag(cond_str, sizeof cond_str, "acc='%s'", mrnaAcc);
        mrnaDate = sqlGetField(genomeReadOnly, "gbCdnaInfo", "moddate",
			       condStr);
	if (mrnaDate != NULL)
	   {
           months = cal_months(mrnaDate);
           score  = mrnaSize + months*2 - (protSize - match) *50;
           printf("%s\t%s\t%d\n", protAcc, mrnaAcc, score);fflush(stdout);
           }
	}
    row = sqlNextRow(sr);
    }    

hFreeConn(&conn);
hFreeConn(&conn3);
fclose(o3);
fclose(o7);
return(0);
}
Example #30
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn2;
char condStr[500];

FILE *inf;
FILE   *outf;

char line[1000];

char *chrStart;

char *inFileName, *outFileName;
char contig[100], start[100], end[100];
char num[100], code[100], id[100], oStart[100], oEnd[100], strnd[100];
char *database;

char *oldContig;
int  oldNum = 0;

int lastNum = 0;
int lastEnd = 0;

if (argc != 4)usage();

database    = argv[1];
inFileName  = argv[2];
outFileName = argv[3];

hSetDb(database);

outf = fopen(outFileName, "w");
conn2= hAllocConn();

inf   = mustOpen(inFileName, "r");

oldContig = strdup("");
while (fgets(line, 1000, inf) != NULL)
    {
    sscanf(line, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
    	   contig, start, end, num, code, id, oStart, oEnd, strnd);
    sqlSafefFrag(condStr, sizeof condStr, "ctg_acc='%s'", contig);
    chrStart =  sqlGetField(database, "seq_contig", "chr_start", condStr);
    if (!sameWord(oldContig, contig)) 
    	{
    	if (!sameWord(oldContig, ""))
    	    {
	    lastNum++;
	    fprintf(outf, "%s\t%d\t%d\t", 
    	    	    oldContig, lastEnd+1, atoi(chrStart)+atoi(start)-2);
    	    fprintf(outf, "%d\t%s\t%d\t%s\t%s\n", 
    	    	    lastNum, "N", atoi(chrStart)+atoi(start)-2 - (lastEnd+1) +1, "contig", "no");
	    }
	oldContig = strdup(contig);
	oldNum = lastNum;
	}
    lastNum = atoi(num) + oldNum;    
    fprintf(outf, "%s\t%d\t%d\t", 
    	    contig, atoi(chrStart)+atoi(start)-1, atoi(chrStart)+atoi(end)-1);
    fprintf(outf, "%d\t%s\t%s\t%s\t%s\t%s\n", 
    	    lastNum, code, id, oStart, oEnd, strnd);
    lastEnd = atoi(chrStart)+atoi(end)-1;
    }
hFreeConn(&conn2);
	
fclose(outf);
return(0);
}