void hgGetNrOmimGene(char *database, char *outFileName)
/* hgGetNrOmimGene - Generate omimGene entries related to NR_xxxx RefSeq. */
{
struct sqlConnection *conn2, *conn3;
 
char query2[256], query3[256];
struct sqlResult *sr2, *sr3;
char **row2, **row3;
FILE *outf;

char *chrom, *txStart, *txEnd;
char *omimId;

outf = fopen(outFileName, "w");
conn2= hAllocConn(database);
conn3= hAllocConn(database);
	
/* first get all RefSeq entries that begin with "NR_" and have related OMIM entries */
sprintf(query2,"select g.chrom, g.txStart, g.txEnd, omimId from refGene g, refLink l, omimGene o where l.mrnaAcc=g.name and g.name like 'NR_%c' and omimId <>0 limit 1000", '%');
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    chrom 	= row2[0];
    txStart 	= row2[1];
    txEnd	= row2[2];
    omimId	= row2[3];

    /* then check if this omimId is already in the omimGene table */
    sprintf(query3,"select name from %s.omimGene where name='%s'",
    	    database, omimId);
    sr3 = sqlMustGetResult(conn3, query3);
    row3 = sqlNextRow(sr3);

    /* if not, create a new omimGene entry */
    if (row3 == NULL)
	{
	fprintf(outf,"%s\t%s\t%s\t%s\n", chrom, txStart, txEnd, omimId);
	}
    sqlFreeResult(&sr3);
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

hFreeConn(&conn2);
hFreeConn(&conn3);
fclose(outf);
}
Ejemplo n.º 2
0
int searchProteinsInSwissProtByGene(char *queryGeneID)
/* search Swiss-Prot database to see if it contains the protein
   Input: queryGeneID
   return: number of proteins found in Swiss-Prot
*/
{
int  proteinCnt;
struct sqlConnection *conn;
char query[256];
struct sqlResult *sr;
char **row;

conn = sqlConnect(UNIPROT_DB_NAME);
sqlSafef(query, sizeof(query),
            "select count(*) from gene, displayId, accToTaxon,taxon "
            "where gene.val='%s' and gene.acc=displayId.acc and accToTaxon.taxon=taxon.id "
            "and accToTaxon.acc=gene.acc order by taxon.id",
            queryGeneID);

sr  = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);

if (row == NULL)
    {
    errAbort("Error occured during mySQL query: %s\n", query);
    }

proteinCnt = atoi(row[0]);

sqlFreeResult(&sr);
sqlDisconnect(&conn);
return(proteinCnt);
}
int main(int argc, char *argv[])
{
char *database;
char *outFn;

struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;

int iCnt;

char *chp1, *chp2;

if (argc != 3) usage();

database = argv[1];
conn2= hAllocConn(database);

outFn = argv[2];
outf = mustOpen(outFn, "w");

sprintf(query2,"select gene_rgd_id, uniprot_id from genes_rat where uniprot_id <> \"\"");
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    boolean oneGeneDone;
    
    rgdId = row2[0];

    iCnt = 0;
    chp1 = row2[1];
    oneGeneDone = FALSE;
    while (!oneGeneDone) 
    	{
	iCnt++;
	chp2 = strstr(chp1,",");
	if (chp2 != NULL)
	    {
	    *chp2 = '\0';
	    fprintf(outf, "RGD:%s\t%s\n", rgdId, chp1);
            chp2++;
	    chp1 = chp2;
	    }
	else
	    {
	    fprintf(outf, "RGD:%s\t%s\n", rgdId, chp1);
	    oneGeneDone = TRUE;
	    }
	}
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

fclose(outf);
hFreeConn(&conn2);
return(0);
}
Ejemplo n.º 4
0
void webMain(struct sqlConnection *conn)
/* Set up fancy web page with hotlinks bar and
 * sections. */
{
struct section *sectionList = NULL;
char query[256];
struct sqlResult *sr;
char **row;

sectionList = loadSectionList(conn);

puts("<FORM ACTION=\"/cgi-bin/gsidSubj\" NAME=\"mainForm\" METHOD=\"GET\">\n");

/* display GSID logo image here */
printf("<img src=\"/images/gsid_header.jpg\" alt=\"\" name=\"gsid_header\" width=\"800\" height=\"86\" border=\"1\" usemap=\"#gsid_headerMap\">");

hPrintf("<br><br>");
hotLinks();

printf("<font size=\"5\"><BR><B>Subject View   </B></font>");

if (sameWord(curSubjId, ""))
    {
    printf("<BR><H3>Please enter a subject ID.\n");
    printf("<input type=\"text\" name=\"hgs_subj\" value=\"%s\">\n", curSubjId);
    cgiMakeButton("submit", "Go!");
    printf("</H3>");
    printf("For example: GSID4123");fflush(stdout);
    }
else
    {
    sqlSafef(query, sizeof(query), "select subjId from %s.gsidSubjInfo where subjId = '%s'",
    	  database, curSubjId);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    sqlFreeResult(&sr);
    if (row != NULL)
    	{
    	printf(
	"&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;search for another subject:&nbsp;");
    	printf("<input type=\"text\" name=\"hgs_subj\" value=\"\">\n");
	cgiMakeButton("submit", "Go!");
    	printSections(sectionList, conn, curSubjId);
    	}
    else
    	{
    	printf("<H3><span style='color:red;'>%s</span> is not a valid subject ID.</H3>", curSubjId);

    	printf("<H3>Please enter a valid subject ID.\n");
    	printf("<input type=\"text\" name=\"hgs_subj\" value=\"%s\">\n", "");
    	cgiMakeButton("submit", "Go!");
    	printf("<BR><BR>For example: GSID4123");
	printf("</H3>");
	fflush(stdout);
	}
    }
puts("</FORM>\n");
}
Ejemplo n.º 5
0
void doH1n1Gene(struct trackDb *tdb, char *item)
/* Show details page for H1N1 Genes and Regions annotations track. */
{
struct sqlConnection *conn  = hAllocConn(database);
struct sqlResult *sr;
char query[256];
char **row;
char *chrom, *chromStart, *chromEnd;
char *gene=NULL;

genericHeader(tdb, item);

gene = item;
printf("<B>Gene: </B> %s\n<BR>", gene);
sqlSafef(query, sizeof query, "select chrom, chromStart, chromEnd from h1n1Gene where name='%s';", gene);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
   {
   chrom      = row[0];
   chromStart = row[1];
   chromEnd   = row[2];
   printPosOnChrom(chrom, atoi(chromStart), atoi(chromEnd), NULL, FALSE, item);
   }
sqlFreeResult(&sr);
hFreeConn(&conn);
htmlHorizontalLine();

printf("<H3>Protein Structure Analysis and Prediction</H3>");
printf("<B>3D Structure Prediction of consensus sequence (with variations of all selected sequences highlighted):");
printf("<BR>PDB file:</B> ");

char pdbUrl[PATH_LEN];
safef(pdbUrl, sizeof(pdbUrl), "%s/%s/decoys/%s.try1-opt3.pdb.gz", getH1n1StructUrl(), item, item);

// Modeller stuff
char modelPdbUrl[PATH_LEN];
if (getH1n1Model(gene, modelPdbUrl))
    {
    char *selectFile = cartOptionalString(cart, gisaidAaSeqList);
    struct tempName imageFile, chimeraScript, chimerax;
    mkH1n1StructData(gene, selectFile, NULL, &imageFile, &chimeraScript);
    mkChimerax(gene, modelPdbUrl, chimeraScript.forCgi, &chimerax);
    printf("<A HREF=\"%s\" TARGET=_blank>%s</A>, view with <A HREF=\"%s\">Chimera</A><BR>\n", 
    	   modelPdbUrl, gene, chimerax.forHtml);
    printf("<TABLE>\n");
    printf("<TR>\n");
    printf("<TD ALIGN=\"center\"><img src=\"%s\"></TD>", imageFile.forHtml);
    printf("</TR>\n");
    printf("</TABLE>\n");
    }

htmlHorizontalLine();
printTrackHtml(tdb);

sqlFreeResult(&sr);
hFreeConn(&conn);
}
Ejemplo n.º 6
0
int main(int argc, char *argv[])
{
char *database;

struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;
char *tableName;

int expCnt;
char *probeId;
char *expScores;

char *chp, *chp9;
char *outFn;

FILE *outf;

if (argc != 4) usage();
database  = argv[1];
tableName = argv[2];
outFn     = argv[3];

outf = mustOpen(outFn, "w");

conn2= hAllocConn(database);
sqlSafef(query2, sizeof query2, "select name, expCount, expScores from %s", tableName);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
expCnt = 0;
while (row2 != NULL)
    {
    probeId = row2[0];
    expScores = row2[2];

    chp    = expScores;
    chp9   = strstr(chp, ",");
    expCnt = 0;
    while ((chp9 != NULL) && (chp != NULL))
    	{
	*chp9 = '\0';
    	fprintf(outf, "%s\t%d\t%s\n", probeId, expCnt, chp);
        chp = chp9;
	chp++;
	expCnt++;
	chp9 = strstr(chp, ",");
	}

    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);
hFreeConn(&conn2);

fclose(outf);
return(0);
}
Ejemplo n.º 7
0
int main(int argc, char *argv[])
{
char *database;
char *outFn;
struct dnaSeq *seq;

struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;

if (argc != 4) usage();

database = argv[1];
conn2= hAllocConn(database);

outFn   = argv[2];
outf    = mustOpen(outFn, "w");

tgtChrom = argv[3];

sqlSafef(query2, sizeof query2, "select secStr, name, chrom, chromStart, chromEnd, strand from evofold where chrom='%s'", tgtChrom);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    secStr   	= row2[0];
    id  	= row2[1];
    chrom 	= row2[2];
    chromStart 	= atoi(row2[3]);
    chromEnd   	= atoi(row2[4]);
    strand     	= *row2[5];
    seq = hChromSeq(database, chrom, chromStart, chromEnd);
    touppers(seq->dna);
    if (strand == '-')
        reverseComplement(seq->dna, seq->size);

    memSwapChar(seq->dna, seq->size, 'T', 'U');

    safef(javaCmd, sizeof(javaCmd),
       "java -cp VARNAv3-7.jar fr.orsay.lri.varna.applications.VARNAcmd -sequenceDBN %s -structureDBN '%s' -o evoFold/%s/%s.png",
          seq->dna,  secStr, chrom, id);
    
    fprintf(outf, "%s\n", javaCmd);

    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

fclose(outf);
hFreeConn(&conn2);
return(0);
}
struct pbStamp *getStampData(char *stampName)
/* get data for a stamp */
{
struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;
struct pbStamp *pbStampPtr;
int i;

conn2= hAllocConn(database);
safef(query2, sizeof(query2), "select * from %s.pbStamp where stampName ='%s'", database, stampName);
    	
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
pbStampPtr = pbStampLoad(row2);

if (row2 == NULL)
    {
    errAbort("%s stamp data not found.", stampName);
    }
sqlFreeResult(&sr2);
    
safef(query2, sizeof(query2), "select * from %s.%s;", database, pbStampPtr->stampTable);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
    	
i=0;
while (row2 != NULL)
    {
    tx[i] = atof(row2[0]);
    ty[i] = atof(row2[1]);
    i++;
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);
hFreeConn(&conn2);

return(pbStampPtr);
}
Ejemplo n.º 9
0
int searchProteinsInSupportedGenomes(char *queryID, char **database)
/* search existing genome databases to see if they contain the protein
   Input: queryID
   return: number of proteins found in existing genome databases
   output: the last genome database is stored at *database
*/
{
int  pbProteinCnt = {0};
char *gDatabase;
char *org = NULL;


char cond_str[255];
struct sqlConnection *conn;

struct sqlConnection *connCentral;
char queryCentral[256];
struct sqlResult *srCentral;
char **row3;
char *answer;

/* get all genome DBs that support PB */
connCentral = hConnectCentral();
sqlSafef(queryCentral, sizeof(queryCentral),
      "select defaultDb.name, dbDb.organism from dbDb,defaultDb where hgPbOk=1 and defaultDb.name=dbDb.name");
srCentral = sqlMustGetResult(connCentral, queryCentral);
row3 = sqlNextRow(srCentral);

/* go through each valid genome database that has PB */
while (row3 != NULL)
    {
    gDatabase = row3[0];
    org       = row3[1];
    conn = sqlConnect(gDatabase);
    sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", queryID);
    answer = sqlGetField(gDatabase, "kgSpAlias", "count(distinct spID)", cond_str);
    sqlDisconnect(&conn);

    if ((answer != NULL) && (!sameWord(answer, "0")))
    	{
	/* increase the count only by one, because new addition of splice variants to kgSpAlias
	   would give a count of 2 for both the parent and the variant, which caused the
	   problem when rescale button is pressed */
	if (atoi(answer) > 0) pbProteinCnt++;
	*database = strdup(gDatabase);
	}
    row3 = sqlNextRow(srCentral);
    }
sqlFreeResult(&srCentral);
hDisconnectCentral(&connCentral);
return(pbProteinCnt);
}
Ejemplo n.º 10
0
/* check if a locusID points to a KG mRNA */
boolean checkMrna(char *locusID)
{
    struct sqlConnection *conn, *conn2;
    char query2[256];
    struct sqlResult *sr2;
    char **row2;
    boolean result;
    char cond_str[256];

    char *chp;
    char *gbAC;
    char *gbID;
    char *knownGeneID;

    conn   = hAllocConn();
    conn2  = hAllocConn();
    result = FALSE;

    sqlSafef(query2, sizeof query2, "select gbAC from %s.locus2Acc0 where locusID=%s and seqType='m';",
             tempDbName, locusID);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    while (row2 != NULL)
    {
        gbAC  	= row2[0];
        gbID = strdup(gbAC);

        chp = strstr(gbID, ".");
        if (chp != NULL) *chp = '\0';

        sqlSafefFrag(cond_str, sizeof cond_str, "name = '%s';", gbID);
        knownGeneID = sqlGetField(dbName, "knownGene", "name", cond_str);
        if (knownGeneID != NULL)
        {
            result=TRUE;
            break;
        }
        row2 = sqlNextRow(sr2);
    }

    hFreeConn(&conn);
    hFreeConn(&conn2);
    sqlFreeResult(&sr2);
    return(result);
}
static void display1(struct sqlConnection *conn, char *sampleId, char* colName)
{
char query[256];
struct sqlResult *sr;
char **row;

safef(query, sizeof(query), 
      "select %s from gisaidSubjInfo where EPI_ISOLATE_ID='%s'", 
      colName, sampleId);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
    
if (row != NULL) 
    {
    printf("<B>%s:</B> %s<BR>\n", colName, row[0]);
    fflush(stdout);
    }
sqlFreeResult(&sr);
}
Ejemplo n.º 12
0
static struct genbankCds getCds(struct sqlConnection *conn, struct mappingInfo *mi)
/* Get CDS, return empty genebankCds if not found or can't parse  */
{
char query[256];
struct sqlResult *sr;
struct genbankCds cds;
char **row;

sqlSafef(query, sizeof(query),
      "select cds.name "
      "from %s.gbCdnaInfo, %s.cds "
      "where gbCdnaInfo.acc=\"%s\" and gbCdnaInfo.cds=cds.id",
      database, database, mi->gbAcc);

sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if ((row == NULL) || !genbankCdsParse(row[0], &cds))
    ZeroVar(&cds);  /* can't get or parse cds */
sqlFreeResult(&sr);
return cds;
}
Ejemplo n.º 13
0
static struct psl *loadAlign(struct sqlConnection *conn, struct mappingInfo *mi, int start)
/* load a psl that must exist */
{
char rootTable[256], table[256], query[256];
boolean hasBin;
struct sqlResult *sr;
char **row;
struct psl *psl;

if (mi->suffix == NULL)
    safef(rootTable, sizeof(rootTable), "%s%sAli", mi->tblPre, mi->geneSet);
else
    safef(rootTable, sizeof(rootTable), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix);
hFindSplitTable(database, seqName, rootTable, table, &hasBin);

sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tStart = %d",
      table, mi->pg->name, start);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
psl = pslLoad(row+hasBin);
sqlFreeResult(&sr);
return psl;
}
Ejemplo n.º 14
0
int main(int argc, char *argv[])
{
char *database;
char *outFn;

struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;

int iCnt;

char *chp1, *chp2, *chp9;
char *geneSymbol, *location;
boolean questionable;

if (argc != 3) usage();

database = argv[1];
conn2= hAllocConn(database);

outFn   = argv[2];
outf    = mustOpen(outFn, "w");

sqlSafef(query2, sizeof query2,
"select omimId, concat(disorders1,' ',disorders2, ' ',disorders3), geneSymbol, location from omimGeneMap where disorders1 <>''");
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    omimId = row2[0];
    iCnt = 0;
    chp1 = row2[1];
    
    geneSymbol = row2[2];
    location = row2[3];

    chp9 = strstr(chp1, ";");
    while (chp9 != NULL)
    	{
	questionable = FALSE;
	*chp9 = '\0';
	while (*chp1 == ' ') chp1++;
	
	if (*chp1 == '?') 
	    {
	    questionable = TRUE;
	    chp1++;
	    }
	chp2 = chp9;
	chp2--;
	while (*chp2 == ' ')
	    {
	    *chp2 = '\0';
	    chp2--;
	    }

	fprintf(outf, "%s\t%s\t%s\t%s\t%d\n", chp1, geneSymbol, omimId, location,questionable); 
	chp9++;
	chp1 = chp9;
        chp9 = strstr(chp1, ";");
	}
	
    while (*chp1 == ' ') chp1++;
    chp2 = chp1 + strlen(chp1);
    chp2--;
    while (*chp2 == ' ')
	{
	*chp2 = '\0';
	chp2--;
	}
    questionable = FALSE;
    if (*chp1 == '?') 
    	{
	questionable = TRUE;
	chp1++;
	}

    fprintf(outf, "%s\t%s\t%s\t%s\t%d\n", chp1, geneSymbol, omimId, location, questionable); 
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

fclose(outf);
hFreeConn(&conn2);
return(0);
}
Ejemplo n.º 15
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn;
 
char query[512];
struct sqlResult *sr;
char **row;

char *dataBase;
char *chp;
char *chp9;
char *feature, *xrefStr;
char *Dbxref;
char *DbxrefEnd = NULL;
char *rgdGeneId;
char *rest = NULL;
FILE *outf;
char *outfileName;
boolean more;

if (argc != 3) usage();
dataBase    = argv[1];
outfileName = argv[2];

outf = mustOpen(outfileName, "w");

conn= hAllocConn(dataBase);
	
sqlSafef(query, sizeof query,"select feature, rgdId from rgdGeneRaw0 where feature = 'gene'");
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
while (row != NULL)
    {
    feature 	= row[0];
    xrefStr     = row[1];

    Dbxref = row[1];
    chp9 = strstr(xrefStr, ";");
    if (chp9 != NULL) 
    	{
	*chp9 = '\0';
	DbxrefEnd = chp9;
	}

    chp = Dbxref;

    /* get start of "RGD:" */
    chp = strstr(chp, ",");
    chp ++;
    rgdGeneId = chp;
    
    /* check if there are other references beside the RGD: entry */
    more = FALSE;
    chp = strstr(rgdGeneId, ",");
    if (chp != NULL) 
    	{
	more = TRUE;
	*chp = '\0';
        chp++;
	rest = chp;
	}

    if (more)
    	{
	chp9 = strstr(rest, ",");
	while (chp9 != NULL)
	    {
	    *chp9 = '\0';
	    fprintf(outf, "%s\t%s\n", rgdGeneId, chp); fflush(stdout);
	    if (DbxrefEnd == chp9)
	    	{
		/* if end is reached, break */
		break;
		}
	    else
	    	{
		/* keep looking for next entry */
		chp9++;
		chp = chp9;
		chp9 = strstr(chp, ",");
		}
	    }
	
	/* print last entry */
	fprintf(outf, "%s\t%s\n", rgdGeneId, chp); fflush(stdout);
	}
    
    row = sqlNextRow(sr);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
fclose(outf);

return(0);
}
Ejemplo n.º 16
0
int getSuperfamilies2(char *proteinID)
/* getSuperfamilies2() superceed getSuperfamilies() starting from hg16, 
   it gets Superfamily data of a protein 
   from ensemblXref3, sfAssign, and sfDes from the proteinsXXXXXX database,
   and placed them in arrays to be used by doSuperfamily().*/
{
struct sqlConnection *conn, *conn2, *conn3;
char query[MAXNAMELEN], query2[MAXNAMELEN];
struct sqlResult *sr, *sr2;
char **row, **row2;

char cond_str[255];

char *sfID, *seqID, *sfDesc,  *region;
int  done;
int j;

char *chp, *chp2;
int  sfCnt;
int  int_start, int_end;

if (!hTableExists(protDbName, "sfAssign")) return(0);
if (!hTableExists(protDbName, "ensemblXref3")) return(0);

conn  = hAllocConn(database);
conn2 = hAllocConn(database);
conn3 = hAllocConn(database);

sqlSafef(query2, sizeof(query), 
    "select distinct sfID, seqID from %s.ensemblXref3 x, %s.sfAssign a where (swissAcc='%s' or tremblAcc='%s') and seqID=x.protein and protein != '' and evalue <= 0.02",
      protDbName, protDbName, proteinID, proteinID);
sr2  = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
sfCnt=0;    
while (row2 != NULL)
    {      
    sfID = row2[0];
    seqID= row2[1];
    
    sqlSafef(query, sizeof(query), 
    	  "select region from %s.sfAssign where sfID='%s' and seqID='%s' and evalue <=0.02", 
	  protDbName, sfID, seqID);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    
    while (row != NULL)
    	{      
  	region   = row[0];
    	
	for (j=0; j<sfCnt; j++)
	    {
	    if (sfId[j] == atoi(sfID)) goto skip;
	    }
	
	sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s;", sfID);
    	sfDesc = sqlGetField(protDbName, "sfDes", "description", cond_str);


    	/* !!! refine logic here later to be defensive against illegal syntax */
    	chp = region;
    	done = 0;
    	while (!done)
	    {
	    chp2  = strstr(chp, "-");
	    *chp2 = '\0';
	    chp2++;

	    sscanf(chp, "%d", &int_start);
	
	    chp = chp2;
	    chp2  = strstr(chp, ",");
	    if (chp2 != NULL) 
	    	{
	    	*chp2 = '\0';
	    	}
	    else
	    	{
	    	done = 1;
		}
	    chp2++;
	    sscanf(chp, "%d", &int_end);
 	    sfId[sfCnt]    = atoi(sfID);
	    sfStart[sfCnt] = int_start;
	    sfEnd[sfCnt]   = int_end;
	    strncpy(superfam_name[sfCnt], sfDesc, MAXNAMELEN-1);
	    sfCnt++;
	    chp = chp2;
	    }
skip:
    	row = sqlNextRow(sr);
    	}

    sqlFreeResult(&sr);
    row2 = sqlNextRow(sr2);
    }
	
sqlFreeResult(&sr2);
hFreeConn(&conn);
hFreeConn(&conn2);
hFreeConn(&conn3);
return(sfCnt);
}
Ejemplo n.º 17
0
int getSuperfamilies(char *proteinID)
/* preserved here for previous older genomes.
   Newer genomes should be using getSuperfamilies2(). 6/16/04 Fan*/
{
struct sqlConnection *conn, *conn2;
char query[MAXNAMELEN];
struct sqlResult *sr;
char **row;

char cond_str[255];

char *genomeID, *seqID, *modelID, *eValue, *sfID, *sfDesc;

char *region;
int  done;

char *ensPep;
char *transcriptName;

char *chp, *chp2;
int  ii = 0;
int  int_start, int_end;

   
if (!hTableExists(database, "sfAssign")) return(0);
 
conn  = hAllocConn(database);
conn2 = hAllocConn(database);

if (hTableExists(database, "ensemblXref3")) 
    {	
    /* use ensemblXref3 for Ensembl data release after ensembl34d */
    sqlSafefFrag(cond_str, sizeof(cond_str), "tremblAcc='%s'", proteinID);
    ensPep = sqlGetField(database, "ensemblXref3", "protein", cond_str);
    if (ensPep == NULL)
	{
   	sqlSafefFrag(cond_str, sizeof(cond_str), "swissAcc='%s'", proteinID);
   	ensPep = sqlGetField(database, "ensemblXref3", "protein", cond_str);
	if (ensPep == NULL) return(0);
	}
    }
else
    {
    if (! (hTableExists(database, "ensemblXref") || hTableExists(database, "ensTranscript") ) )
       return(0);
    
    /* two steps query needed because the recent Ensembl gene_xref 11/2003 table does not have 
       valid translation_name */
    sqlSafefFrag(cond_str, sizeof(cond_str), "external_name='%s'", protDisplayID);
    transcriptName = sqlGetField(database, "ensGeneXref", "transcript_name", cond_str);
    if (transcriptName == NULL)
        {
        return(0); 
        }
    else
        {
        sqlSafefFrag(cond_str, sizeof(cond_str), "transcript_name='%s';", transcriptName);
        ensPep = sqlGetField(database, "ensTranscript", "translation_name", cond_str);
        if (ensPep == NULL) 
	    {
	    hFreeConn(&conn);
    	    return(0); 
    	    }
    	}
    }

ensPepName = ensPep;

sqlSafef(query, sizeof(query), "select * from %s.sfAssign where seqID='%s' and evalue <= 0.02;", database, ensPep);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if (row == NULL) return(0);
    
while (row != NULL)
    {      
    genomeID = row[0];
    seqID    = row[1];
    modelID  = row[2];
    region   = row[3];
    eValue   = row[4];
    sfID     = row[5];
    /* sfDesc   = row[6]; */
    /* !!! the recent Suprefamily sfAssign table does not have valid sf description */
    sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s;", sfID);
    sfDesc = sqlGetField(database, "sfDes", "description", cond_str);

    /* !!! refine logic here later to be defensive against illegal syntax */
    chp = region;
    done = 0;
    while (!done)
	{
	chp2  = strstr(chp, "-");
	*chp2 = '\0';
	chp2++;

	sscanf(chp, "%d", &int_start);
			
	chp = chp2;
	chp2  = strstr(chp, ",");
	if (chp2 != NULL) 
	    {
	    *chp2 = '\0';
	    }
	else
	    {
	    done = 1;
	    }
	chp2++;
	sscanf(chp, "%d", &int_end);

 	sfId[ii]    = atoi(sfID);
	sfStart[ii] = int_start;
	sfEnd[ii]   = int_end;
	strncpy(superfam_name[ii], sfDesc, MAXNAMELEN-1);
	ii++;
	chp = chp2;
	}

    row = sqlNextRow(sr);
    }

sqlFreeResult(&sr);
hFreeConn(&conn);
hFreeConn(&conn2);
  
return(ii);
}
Ejemplo n.º 18
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2, *conn3;
char query[256], query2[256];
struct sqlResult *sr, *sr2;
char **row, **row2;
    
char *chp;
FILE *o1;

char *locusID;	/* LocusLink ID */
char *gbAC;		/* GenBank accession.version */
char *giNCBI;	/* NCBI gi for the protein record associated with the CDS */
char *seqType;	/* sequence type m=mRNA g=genomic u=undefined */
char *proteinAC;	/* protein accession.version */
char *taxID;	/* tax id */
    
char *locusID2;	/* LocusLink ID */
char *refAC;	/* Refseq accession.version */
char *giNCBI2;	/* NCBI gi for the protein record associated with the CDS */
char *revStatus;	/* review status */
char *proteinAC2;	/* protein accession.version */
char *taxID2;	/* tax id */
char *dbName; 

if (argc != 2) usage();
dbName = argv[1];

conn = hAllocConn(dbName);
conn2= hAllocConn(dbName);
conn3= hAllocConn(dbName);

o1 = fopen("j.dat", "w");
    
sqlSafef(query2, sizeof query2, "select * from %sTemp.locus2Ref0;", dbName);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    locusID2 	= row2[0];
    refAC 	= row2[1];
    giNCBI2 	= row2[2];
    revStatus 	= row2[3];
    proteinAC2 	= row2[4];
    taxID2 	= row2[5];
		
    sqlSafef(query, sizeof query, "select * from %sTemp.locus2Acc0 where locusID=%s and seqType='m';", dbName, locusID2);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    while (row != NULL)
    	{
	locusID 	= row[0];
	gbAC 		= row[1];
	giNCBI 		= row[2];
	seqType 	= row[3];
	proteinAC 	= row[4];
	taxID 		= row[5];

	chp = strstr(gbAC, ".");
	if (chp != NULL) *chp = '\0';
    			
	chp = strstr(refAC, ".");
	if (chp != NULL) *chp = '\0';
    			
	fprintf(o1, "%s\t%s\n", gbAC, refAC);
			
	row = sqlNextRow(sr);
	}
    row2 = sqlNextRow(sr2);
    }
		
fclose(o1);
hFreeConn(&conn);
hFreeConn(&conn2);
sqlFreeResult(&sr2);

mustSystem("cat j.dat|sort|uniq >mrnaRefseq.tab");
printf("mrnaRefseq.tab created.\n");
mustSystem("rm j.dat");
return(0);
}
Ejemplo n.º 19
0
void processAlign(char *kgTempDb, char *spDb, char *alignID, int cdsCnt, FILE *outf)
{
struct sqlConnection *conn2, *conn3, *conn4;
char query2[256], query3[256];
struct sqlResult *sr2, *sr3;
char **row2, **row3;
char *score;
char *chrom;
char *protAcc;
char *mrnaID;
char *ranking;
int  protDbId;
char condStr[255];
int  i;
char *chp;
char *isCurated;

conn2= hAllocConn(kgTempDb);
conn3= hAllocConn(kgTempDb);
conn4= hAllocConn(kgTempDb);

sqlSafef(query2, sizeof(query2), "select * from %s.kgCandidate where alignID='%s'", kgTempDb, alignID);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    mrnaID = row2[0];
    chrom = row2[1];
    ranking = row2[11];
    
    /* check if it is a composite mrnaID */
    /* if yes, select from entries with both protein and mrna specified */
    if (alignID[0] == 'U') 
    	{
	chp = strstr(row2[0], "_");
	*chp = '\0';
	protAcc = row2[0];
	chp ++;
	mrnaID = chp;
    	sqlSafef(query3, sizeof(query3), 
    	      "select protAcc, score from %s.protMrnaScore where mrnaAcc='%s' and protAcc='%s'",
	      kgTempDb, mrnaID, protAcc);
	}
    else
    	{
    	sqlSafef(query3, sizeof(query3), 
    	      "select protAcc, score from %s.protMrnaScore where mrnaAcc='%s' order by score desc",
	      kgTempDb, mrnaID);
	}
	
    sr3  = sqlMustGetResult(conn3, query3);
    row3 = sqlNextRow(sr3);
	      
    while(row3 != NULL)
        {
	protAcc = row3[0];
	score   = row3[1];

	chp = strstr(protAcc, "-");
	if (chp == NULL)
	    {
            sqlSafefFrag(condStr, sizeof(condStr), "acc='%s'", protAcc);
	    isCurated = sqlGetField(spDb, "info", "isCurated", condStr);
	    if (sameWord(isCurated, "1"))
	    	{
		protDbId = 1;
		}
	    else
	    	{
		protDbId = 2;
		}
	    }
   	else
	    {
	    protDbId = 4;
	    }
	    
	fprintf(outf, "%s:", chrom);
	for (i=0; i<cdsCnt; i++) fprintf(outf, "%s", cdsBloc[i]);
	fprintf(outf, "\t%s\t%d\t%8s\t%s\t%s\t%s\n", 
		ranking, protDbId, score, mrnaID, protAcc, alignID);

	/* for composite type, process just one record */ 
        if (alignID[0] == 'U') break; 
	row3 = sqlNextRow(sr3);
	}
    sqlFreeResult(&sr3);
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);
hFreeConn(&conn2);
hFreeConn(&conn3);
hFreeConn(&conn4);
}
int main(int argc, char *argv[])
{
struct sqlConnection *conn2, *conn3, *conn4;
char query2[256], query3[256], query4[256];
struct sqlResult *sr2, *sr3, *sr4;
char **row2, **row3, **row4;

char *aaSeq;
char *accession;
char *desc;

FILE *outFile;
char *outFileName;
char *tableName;

char *interProId;
int  maxLen, len;
char *maxAcc = NULL;
char *start, *end;
char *maxStart=NULL, *maxEnd=NULL, *maxDesc = NULL;

if (argc != 3) usage();
tableName    = argv[1];
outFileName  = argv[2];
   
outFile = mustOpen(outFileName, "w");
conn2 = hAllocConn();
conn3 = hAllocConn();
conn4 = hAllocConn();
	
/* loop over all InterPro entry for the specific InterPro xref table for this organism */
sprintf(query2, "select distinct interProId from proteome.%s", tableName);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    interProId  = row2[0];
    
    /* get all start/end positions of this InterPro domain */ 
    sprintf(query3, 
    "select accession, start, end, description from proteome.%s where interProId='%s'", 
    tableName, interProId);
    sr3 = sqlMustGetResult(conn3, query3);
    row3 = sqlNextRow(sr3);

    maxLen = 0;
    while (row3 != NULL)
	{
   	accession = row3[0];
       	start     = row3[1];	 
        end       = row3[2];  
	desc      = row3[3];
	len = atoi(end) - atoi(start) + 1;
	
	/* remember the max len, so far */
	if (len > maxLen)
	    {
	    maxLen   = len;
	    maxAcc   = cloneString(accession);
	    maxStart = cloneString(start);
	    maxEnd   = cloneString(end);
	    maxDesc  = cloneString(desc);
	    }
	    
	row3 = sqlNextRow(sr3);
	}
    sqlFreeResult(&sr3);
    
    /* fetch the corresponding AA sequence of the domain having the max length */
    sprintf(query4, "select substring(val, %s, %d) from uniProt.protein where acc='%s'",
    	    maxStart, maxLen, maxAcc);
    sr4 = sqlMustGetResult(conn4, query4);
    row4 = sqlNextRow(sr4);
    if (row4 == NULL)
    	{
	fprintf(stderr, "%s %s missing, exiting ...\n", maxAcc, interProId);
	exit(1);
	}
    else
    	{
	aaSeq = row4[0];
	if (maxLen >= 18)
	    {
	    fprintf(outFile, ">%s %s\n", interProId, maxDesc);
	    fprintf(outFile, "%s\n", aaSeq);fflush(stdout);
	    }
	}
    sqlFreeResult(&sr4);	   
    
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

hFreeConn(&conn2);
hFreeConn(&conn3);
hFreeConn(&conn4);

fclose(outFile);
return(0);
}
Ejemplo n.º 21
0
static void gadPrint(struct section *section, 
	struct sqlConnection *conn, char *geneId)
/* Print out GAD section. */
{
int refPrinted = 0;
boolean showCompleteGadList;

char condStr[256];
char query[256];
struct sqlResult *sr;
char **row;
struct dyString *currentCgiUrl;
char *upperDisease;

char *url = 
cloneString("http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=gene=");
char *itemName;

if (url != NULL && url[0] != 0)
    {
    safef(condStr, sizeof(condStr), 
    "k.kgId='%s' and k.geneSymbol = g.geneSymbol", geneId);
    itemName = sqlGetField(database, "kgXref k, gadAll g", "k.geneSymbol", condStr);
    showCompleteGadList = FALSE;
    if (cgiOptionalString("showAllRef") != NULL)
    	{
        if (sameWord(cgiOptionalString("showAllRef"), "Y") ||
	    sameWord(cgiOptionalString("showAllRef"), "y") )
	    {
	    showCompleteGadList = TRUE;
	    }
	}
    currentCgiUrl = cgiUrlString();
   
    printf("<B>Genetic Association Database: ");
    printf("<A HREF=\"%s'%s'\" target=_blank>", url, itemName);
    printf("%s</B></A>\n", itemName);

    printf("<BR><B>CDC HuGE Published Literature:  ");
    printf("<A HREF=\"%s%s%s\" target=_blank>", 
           "http://hugenavigator.net/HuGENavigator/searchSummary.do?firstQuery=",
           itemName, 
	   "&publitSearchType=now&whichContinue=firststart&check=n&dbType=publit&Mysubmit=go");
    printf("%s</B></A>\n", itemName);

    /* List diseases associated with the gene */
    safef(query, sizeof(query),
    "select distinct broadPhen from gadAll where geneSymbol='%s' and association = 'Y' order by broadPhen",
    itemName);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    
    if (row != NULL) 
    	{
	upperDisease = replaceChars(row[0], "'", "''");
	touppers(upperDisease);
	printf("<BR><B>Positive Disease Associations:  </B>");
	printf("<A HREF=\"%s%s%s%s%s\" target=_blank>",
	"http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=upper(DISEASE)%20like%20'%25",
	cgiEncode(upperDisease), "%25'%20AND%20upper(GENE)%20%20like%20'%25", itemName, "%25'");
	printf("%s</B></A>\n", row[0]);
        row = sqlNextRow(sr);
    	}
    while (row != NULL)
        {
	upperDisease = replaceChars(row[0], "'", "''");
	touppers(upperDisease);
	printf(", <A HREF=\"%s%s%s%s%s\" target=_blank>",
	"http://geneticassociationdb.nih.gov/cgi-bin/tableview.cgi?table=allview&cond=upper(DISEASE)%20like%20'%25",
	cgiEncode(upperDisease), "%25'%20AND%20upper(GENE)%20%20like%20'%25", itemName, "%25'");
	printf("%s</B></A>\n", row[0]);
        row = sqlNextRow(sr);
	}
    sqlFreeResult(&sr);

    refPrinted = 0;
    safef(query, sizeof(query), 
       "select broadPhen,reference,title,journal, pubMed, conclusion from gadAll where geneSymbol='%s' and association = 'Y' order by broadPhen",
       itemName);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    
    if (row != NULL) printf("<BR><B>Related Studies: </B><OL>");
    while (row != NULL)
        {
        printf("<LI><B>%s </B>", row[0]);

	printf("<br>%s, %s, %s.\n", row[1], row[2], row[3]);
	if (!sameWord(row[4], ""))
	    {
	    printf(" [PubMed ");
	    printf("<A HREF=\"%s%s%s'\" target=_blank>",
	    "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=Abstract&list_uids=",
	    row[4],"&query_hl=1&itool=genome.ucsc.edu");
	    printf("%s</B></A>]\n", row[4]);
	    }
	printf("<br><i>%s</i>\n", row[5]);
	
	printf("</LI>\n");
        refPrinted++;
        if ((!showCompleteGadList) && (refPrinted >= 3)) break;
	row = sqlNextRow(sr);
    	}
    sqlFreeResult(&sr);
    printf("</OL>");
    
    if ((!showCompleteGadList) && (row != NULL))
    	{
        printf("<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; more ...  </B>");
        printf(
	      "<A HREF=\"%s?showAllRef=Y&%s&#35;gad\">click here to view the complete list</A> ", 
	      "hgGene", currentCgiUrl->string);
    	}
    }
}
Ejemplo n.º 22
0
void gsidMsa(char *database, char *table, char *baseAcc, int startPos, 
char *outWigFn, char *outConsFn)
{
struct sqlConnection *conn2;
 
char query2[256];
struct sqlResult *sr2;
char **row2;
FILE *outf, *outf2;

char base;

int ii;
int i = 0;
int j,jj,k;
int seqCnt = 0;
int max, kmax, kmax2;

conn2= hAllocConn(database);

outf = mustOpen(outWigFn, "w");
	
sqlSafef(query2, sizeof query2, "select seq from %s.%s where id='%s'", database, table, baseAcc);

sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
baseGenomeSeq = cloneString(row2[0]);
baseSeqLen=strlen(baseGenomeSeq);
sqlFreeResult(&sr2);

sqlSafef(query2, sizeof query2, "select * from %s.%s", database, table);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);

ii=0;
while (row2 != NULL)
    {
    strcpy(seqId[ii], row2[0]);
    strcpy(seq[ii], row2[1]);
    
    ii++;
    row2 = sqlNextRow(sr2);
    }

seqCnt = ii;
sqlFreeResult(&sr2);
hFreeConn(&conn2);

/* print header */
fprintf(outf, "browser position chr1:1-9000\n");
fprintf(outf, "track type=wiggle_0\n");
fprintf(outf, "variableStep chrom=chr1\n");

jj=0;
for (j=0; j<baseSeqLen; j++)
    {
    for (k=0; k<MAXBASE; k++)
    	cnt[i][k] = 0;
    
    for (i=0; i<seqCnt; i++)
	{
	for (k=0; k<MAXBASE; k++)
	    {
	    base = toupper(seq[i][j]);
	    if (base == refBase[k]) 
		{
		cnt[j][k]++;
		}
	    }
	} 
    max  = 0;
    kmax = 0;
    kmax2= 0;
    for (k=0; k<MAXBASE; k++)
	{
	if (cnt[j][k] > max) 
	    {
	    max = cnt[j][k];
	    
	    /* keep track of the 2nd hightest */
	    kmax2 = kmax;
	    kmax = k;
	    }
	}
    consensusSeq[j] = refBase[kmax];
    if (refBase[kmax] == '-')
	{
        consensusSeq2[j] = refBase2[kmax2];
	}
    else
	{
        consensusSeq2[j] = refBase[kmax];
	}

    aliSeq[j] = refBase[kmax];
    identity[j] = (float)max/(float)seqCnt;
    if (baseGenomeSeq[j] != '-')
	{
        fprintf(outf, "%d %f\n", startPos+jj, identity[j]);
	jj++;
	}
    }

fclose(outf);

consensusSeq[baseSeqLen]  = '\0';
consensusSeq2[baseSeqLen] = '\0';
outf2 = mustOpen(outConsFn, "w");
fprintf(outf2, ">%s MSA Consensus Sequence\n", table);
fprintf(outf2, "%s\n", consensusSeq2);
fclose(outf2);
}
Ejemplo n.º 23
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn2, *conn3, *conn4;
char query2[256], query3[256], query4[256];
struct sqlResult *sr2, *sr3, *sr4;
char **row2, **row3, **row4;

FILE *o3;
char *chp;

char *proteinDataDate;

int maxlen = {0};
int len;

char *bioentry_id;
char *biodatabase_id;
char *display_id;
char *accession;
char *division;
char *biosequence_str;
  
char *desc, *desc2;
char *genenames = NULL;
char *ontology_term_id;
char *qualifier_value;

if (argc != 2) usage();
proteinDataDate = argv[1];

conn2= hAllocConn();
conn3= hAllocConn();
conn4= hAllocConn();
    
o3 = fopen("allPep.tab", "w");
    
sqlSafef(query3, sizeof query3, "select * from biosql%s.bioentry;", proteinDataDate);

sr3 = sqlMustGetResult(conn3, query3);
row3 = sqlNextRow(sr3);
	      
while (row3 != NULL)
    {
    bioentry_id 	= row3[0];
    biodatabase_id  = row3[1];
    display_id 	= row3[2];
    accession 	= row3[3];
        
    division 	= row3[5];
    	
    sqlSafef(query2, sizeof query2, "select * from biosql%s.biosequence where bioentry_id='%s';", 
	           proteinDataDate, bioentry_id);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    if (row2 != NULL)
	{
 	biosequence_str = row2[4];
		
	len = strlen(biosequence_str);
	if (maxlen < len) maxlen = len;
	}
		
    sqlSafef(query4, sizeof query4,
	    "select * from biosql%s.bioentry_qualifier_value where bioentry_id='%s';",
	    proteinDataDate, bioentry_id);
    
    genenames="";

    desc  = "";
    desc2 = "";
	
    sr4  = sqlMustGetResult(conn4, query4);
    row4 = sqlNextRow(sr4);
    if (row4 != NULL)
	{
	ontology_term_id= row4[1];
	qualifier_value = row4[2];

	if (strcmp(ontology_term_id, "10") == 0)
	    {
	    desc = qualifier_value;
	    }
	chp = strstr(desc, "(");
	if (chp != NULL)
	    {
	    chp--;
	    *chp = '\0';
	    chp++;
	    desc2 = chp;
	    }
	}
    fprintf(o3, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
       		bioentry_id,
   		biodatabase_id,
    		display_id,
    		accession,
    		division,
		genenames,
		desc,
		desc2,
    		biosequence_str);
    sqlFreeResult(&sr2);
    sqlFreeResult(&sr4);
    row3 = sqlNextRow(sr3);
    }
    
//fprintf(stderr, "Max AA length = %d\n", maxlen);

hFreeConn(&conn2);
hFreeConn(&conn3);
sqlFreeResult(&sr3);
	
fclose(o3);
return(0);
}
Ejemplo n.º 24
0
int main(int argc, char *argv[])
{
FILE *inf;

char *mrnaDate;
int  months;
char dirName[PATH_MAX];

struct sqlConnection *conn, *conn3;
char query[256];
struct sqlResult *sr;
char **row;

char *protAcc, *mrnaAcc, *matchStr;
char *protSizeStr, *mrnaSizeStr;
int  protSize, mrnaSize, match;

char *protMrnaTableName;

char condStr[255];
int score;

if (argc != 5) usage();
    
proteinDataDate = argv[1];
kgTempDb = argv[2];
genomeReadOnly = argv[3];
protMrnaTableName = argv[4];

sprintf(spDB, "sp%s", proteinDataDate);
sprintf(proteinsDB, "proteins%s", proteinDataDate);
sprintf(gbTempDB, "%sTemp", kgTempDb);
  
inf = fopen("protein.lis", "r"); 
if ((FILE *) NULL == inf)
    errAbort("ERROR: Can not open input file: protein.lis");
o3  = fopen("kgBestMrna.out",   "w");
if ((FILE *) NULL == o3)
    errAbort("ERROR: Can not open output file: kgBestMrna.out");
o7  = fopen("best.lis",    "w");
if ((FILE *) NULL == o7)
    errAbort("ERROR: Can not open output file: best.lis");

conn = hAllocConn(genomeReadOnly);
conn3= hAllocConn(genomeReadOnly);
   
proteinCount = 0; 
snprintf(dirName, (size_t) sizeof(dirName), "%s", "./clusterRun" );

sqlSafef(query, sizeof query,"select qName, tName, matches, qSize, tSize from %s.%s", kgTempDb, protMrnaTableName);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
while (row != NULL)
    {
    protAcc 	= row[0];
    mrnaAcc 	= row[1];
    matchStr    = row[2];
    protSizeStr = row[3];
    mrnaSizeStr = row[4];

    sscanf(matchStr, "%d", &match);
    sscanf(protSizeStr, "%d", &protSize);
    sscanf(mrnaSizeStr, "%d", &mrnaSize);
    sscanf(matchStr, "%d", &match);
  
    if ((float)match/(float)protSize > 0.3)
    	{
        sqlSafefFrag(cond_str, sizeof cond_str, "acc='%s'", mrnaAcc);
        mrnaDate = sqlGetField(genomeReadOnly, "gbCdnaInfo", "moddate",
			       condStr);
	if (mrnaDate != NULL)
	   {
           months = cal_months(mrnaDate);
           score  = mrnaSize + months*2 - (protSize - match) *50;
           printf("%s\t%s\t%d\n", protAcc, mrnaAcc, score);fflush(stdout);
           }
	}
    row = sqlNextRow(sr);
    }    

hFreeConn(&conn);
hFreeConn(&conn3);
fclose(o3);
fclose(o7);
return(0);
}
/*
 *	there are a variety of conditions that affect how FetchData is
 *	going to work.  This is an attempt to allow it to do as much
 *	as possible, but not get overloaded.
 *	summaryOnly is done when whole chrom summaries are requested
 *	for statistic purposes.  In those cases we do not need to go all
 *	the way to the data to get the averages, the SQL rows are good
 *	enough.  Although even on this level there is quite a bit of
 *	work to do on tracks such as Quality that have 180,000 rows on
 *	just chr1.
 *	a wiggleStats wsList is given when doing statistics, if it is
 *	purely a data fetch operation, there is no need to do
 *	wiggleStats and it will be a NULL pointer.
 *	a bedList pointer is given when a returned bed list is desired.
 *	In the case of processing a bedList, we honor the return limit
 *	of number of bed elements via the maxBedElements.
 *	If we are not returning a bedList and we are not doing a stats
 *	summary, then we have an honest data fetch operation, and in
 *	this case we honor the stated line limit of maxBedElements.
 *	When the caller is doing this data fetch operation and states
 *	that maxBedElements is zero, then we do all data that can be found.
 *	This would be the case for a stats operation when only one chrom
 *	is being worked on.
 */
struct wiggleData *wigFetchData(char *db, char *table, char *chromName,
    int winStart, int winEnd, boolean summaryOnly, boolean freeData,
	int tableId, boolean (*wiggleCompare)(int tableId, double value,
	    boolean summaryOnly, struct wiggle *wiggle),
		char *constraints, struct bed **bedList,
		    unsigned maxBedElements, struct wiggleStats **wsList)
/*  return linked list of wiggle data between winStart, winEnd */
{
struct sqlConnection *conn = hAllocConn(db);
struct sqlResult *sr;
char **row;
int rowOffset;
int rowCount = 0;
struct wiggle *wiggle;
struct hash *spans = NULL;      /* List of spans encountered during load */
char spanName[128];
char whereSpan[128];
char query[256];
struct hashEl *el;
int leastSpan = BIGNUM;
int mostSpan = 0;
int spanCount = 0;
int span = 0;
struct hashCookie cookie;
struct wiggleData *wigData = (struct wiggleData *) NULL;
struct wiggleData *wdList = (struct wiggleData *) NULL;
boolean bewareConstraints = FALSE;
boolean createBedList = FALSE;
boolean firstSpanDone = FALSE;
unsigned dataLimit = 0;
unsigned dataDone = 0;
boolean reachedDataLimit = FALSE;

/*	make sure table exists before we try to talk to it
 *	If it does not exist, we return a null result
 */
if (! sqlTableExists(conn, table))
    {
    hFreeConn(&conn);
    return((struct wiggleData *)NULL);
    }

if ((struct bed **)NULL != bedList)
    createBedList = TRUE;

/*	if we are not doing a summary (== return all data) and
 *	we are not creating a bed list, then obey the limit requested
 *	It will be zero if they really want everything.
 */
if (!summaryOnly && !createBedList)
    dataLimit = maxBedElements;

spans = newHash(0);	/*	a listing of all spans found here	*/

resetStats(&wigStatsAcc);	/*	zero everything	*/


/*	Are the constraints going to interfere with our span search ? */
if (constraints)
    {
    char *c = cloneString(constraints);
    tolowers(c);
    if (stringIn("span",c))
	bewareConstraints = TRUE;
    }

if (bewareConstraints)
    snprintf(query, sizeof(query),
	"SELECT span from %s where chrom = '%s' AND %s group by span",
	table, chromName, constraints );
else
    snprintf(query, sizeof(query),
	"SELECT span from %s where chrom = '%s' group by span",
	table, chromName );

/*	Survey the spans to see what the story is here */

sr = sqlMustGetResult(conn,query);
while ((row = sqlNextRow(sr)) != NULL)
{
    unsigned span = sqlUnsigned(row[0]);

    ++rowCount;

    snprintf(spanName, sizeof(spanName), "%u", span);
    el = hashLookup(spans, spanName);
    if ( el == NULL)
	{
	if (span > mostSpan) mostSpan = span;
	if (span < leastSpan) leastSpan = span;
	++spanCount;
	hashAddInt(spans, spanName, span);
	}

    }
sqlFreeResult(&sr);

/*	Now, using that span list, go through each span, fetching data	*/
cookie = hashFirst(spans);
while ((! reachedDataLimit) && (el = hashNext(&cookie)) != NULL)
    {
    if ((struct wiggleStats **)NULL != wsList)
	returnStats(&wigStatsAcc,wsList,chromName,winStart,winEnd,span);

    resetStats(&wigStatsAcc);

    if (bewareConstraints)
	{
	snprintf(whereSpan, sizeof(whereSpan), "((span = %s) AND %s)", el->name,
	    constraints);
	}
    else
	snprintf(whereSpan, sizeof(whereSpan), "span = %s", el->name);

    span = atoi(el->name);

    sr = hOrderedRangeQuery(conn, table, chromName, winStart, winEnd,
        whereSpan, &rowOffset);

    rowCount = 0;
    while ((! reachedDataLimit) && (row = sqlNextRow(sr)) != NULL)
	{
	++rowCount;
	wiggle = wiggleLoad(row + rowOffset);
	if (wiggle->count > 0 && (! reachedDataLimit))
	    {
	    wigData = wigReadDataRow(wiggle, winStart, winEnd,
		    tableId, summaryOnly, wiggleCompare );
	    if (wigData)
		{
		if (firstSpanDone)
		    accumStats(&wigStatsAcc, wigData, (struct bed **)NULL,
			maxBedElements, table);
		else
		    accumStats(&wigStatsAcc, wigData, bedList,
			maxBedElements, table);
		dataDone += wigData->count;
		if (freeData)
		    {
		    freeMem(wigData->data); /* and mark it gone */
		    wigData->data = (struct wiggleDatum *)NULL;
		    }
		slAddHead(&wdList,wigData);
		if (!createBedList && dataLimit)
		    if (dataLimit < dataDone)
			reachedDataLimit = TRUE;
		if (createBedList && (wigStatsAcc.bedElCount > maxBedElements))
		    reachedDataLimit = TRUE;
		}
	    }
	}
	/*	perhaps last bed line	*/
    if (!firstSpanDone && createBedList &&
	(wigStatsAcc.bedElEnd > wigStatsAcc.bedElStart) && wigData)
	{
	struct bed *bedEl;
	bedEl = bedElement(wigData->chrom, wigStatsAcc.bedElStart,
	    wigStatsAcc.bedElEnd, table, ++wigStatsAcc.bedElCount);
	slAddHead(bedList, bedEl);
	}
    sqlFreeResult(&sr);
    firstSpanDone = TRUE;
    }
closeWibFile();

if (createBedList)
    slReverse(bedList);

/*	last stats calculation	*/
if ((struct wiggleStats **)NULL != wsList)
    returnStats(&wigStatsAcc,wsList,chromName,winStart,winEnd,span);

hFreeConn(&conn);

if (wdList != (struct wiggleData *)NULL)
	slReverse(&wdList);
/*	this wdList can be freed by wigFreeData */
return(wdList);
}	/*	struct wiggleData *wigFetchData()	*/
int spanInUse(struct sqlConnection *conn, char *table, char *chrom,
	int winStart, int winEnd, struct cart *cart)
/*	determine span used during hgTracks display,
 *	winEnd == 0 means whole chrom	*/
{
struct sqlResult *sr;
char query[256];
char **row;
float basesPerPixel = 0.0;
int spanInUse = 0;
struct hashCookie cookie;
int insideWidth;
int minSpan = BIGNUM;
int maxSpan = 0;
int spanCount = 0;
struct hash *spans = newHash(0);	/*	list of spans in this table */
struct hashEl *el;
int insideX = hgDefaultGfxBorder;
int pixWidth = atoi(cartUsualString(cart, "pix", DEFAULT_PIX_WIDTH ));
boolean withLeftLabels = cartUsualBoolean(cart, "leftLabels", TRUE);

/*	winEnd less than 1 (i.e. == 0), we need to find this chrom size	*/
if (winEnd < 1)
    {
    safef(query, ArraySize(query),
	"SELECT size from chromInfo where chrom = '%s'", chrom);
    sr = sqlMustGetResult(conn,query);
    if ((row = sqlNextRow(sr)) == NULL)
	errAbort("spanInUse: query failed: '%s'\n", query);
    winEnd = sqlUnsigned(row[0]);
    sqlFreeResult(&sr);
    if (winEnd < 1)
	errAbort("spanInUse: failed to find valid chrom size via query: '%s'\n", query);
    }

/*	This is a time expensive query,
 *	~3 to 6 seconds on large chroms full of data	*/
safef(query, ArraySize(query),
    "SELECT span from %s where chrom = '%s' group by span", table, chrom);

sr = sqlMustGetResult(conn,query);
while ((row = sqlNextRow(sr)) != NULL)
    {   
    char spanName[128];
    unsigned span = sqlUnsigned(row[0]);

    safef(spanName, ArraySize(spanName), "%u", span);
    el = hashLookup(spans, spanName);
    if ( el == NULL)
	{
	if (span > maxSpan) maxSpan = span;
	if (span < minSpan) minSpan = span;
	++spanCount;
	hashAddInt(spans, spanName, span);
	}
    }
sqlFreeResult(&sr);

spanInUse = minSpan;

if (withLeftLabels)
	insideX += hgDefaultLeftLabelWidth + hgDefaultGfxBorder;

insideWidth = pixWidth - insideX - hgDefaultGfxBorder;

basesPerPixel = (winEnd - winStart) / insideWidth;
cookie = hashFirst(spans);

while ((el = hashNext(&cookie)) != NULL)
    {
    int span = sqlSigned(el->name);
    
    if ((float) span <= basesPerPixel) 
	spanInUse = span;
    }

return spanInUse;
}	/*	int spanInUse()	*/
int main(int argc, char *argv[])
{
    struct sqlConnection *conn, *conn2;
    char query2[256];
    struct sqlResult *sr2;
    char **row2;
    char cond_str[255];
    char *proteinDatabaseName;
    FILE *o1, *o2, *o3;
    FILE *fh[23];
    char temp_str[1000];;
    char *accession;
    char *aaSeq;
    char *chp;
    int i, j, len;
    int ihi, ilow;
    char *answer;
    char *protDisplayId;
    int aaResCnt[30];
    char aaAlphabet[30];
    int aaResFound;
    float fvalue1, fvalue2;
    float p1, p2;
    int icnt, jcnt;
    char *taxon;
    char *database;
    int sortedCnt;

    if (argc != 4) usage();

    strcpy(aaAlphabet, "WCMHYNFIDQKRTVPGEASLXZB");

    proteinDatabaseName = argv[1];
    taxon = argv[2];
    database = argv[3];

    o2 = mustOpen("pbResAvgStd.tab", "w");

    for (i=0; i<20; i++)
    {
        safef(temp_str, sizeof(temp_str), "%c.txt", aaAlphabet[i]);
        fh[i] = mustOpen(temp_str, "w");
    }

    conn  = hAllocConn(hDefaultDb());
    conn2 = hAllocConn(hDefaultDb());

    safef(query2, sizeof(query2), "select proteinID from %s.knownGene;", database);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    icnt = 0;
    jcnt = 0;

    for (j=0; j<MAXRES; j++)
    {
        sumJ[j] = 0;
    }

    while (row2 != NULL)
    {
        protDisplayId = row2[0];
        safef(cond_str, sizeof(cond_str),  "val='%s'", protDisplayId);
        accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str);

        if (accession == NULL)
        {
            safef(cond_str, sizeof(cond_str),  "acc='%s'", protDisplayId);
            accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str);
            if (accession == NULL)
            {
                verbose(2, "'%s' not found.\n", protDisplayId);
                goto skip;
            }
        }

        safef(cond_str, sizeof(cond_str),  "accession='%s'", accession);
        answer = sqlGetField("proteins040115", "spXref2", "biodatabaseID", cond_str);
        if (answer == NULL)
        {
            /* this protein might be a variant splice protein, and then it won't be in spXref2 */
            goto skip;
        }
        if (answer[0] != '1')
        {
            /* printf("%s not in SWISS-PROT\n", protDisplayId);fflush(stdout); */
            goto skip;
        }

        safef(cond_str, sizeof(cond_str),  "acc='%s'", accession);
        aaSeq = sqlGetField(proteinDatabaseName, "protein", "val", cond_str);
        if (aaSeq == NULL)
        {
            printf("Can't find peptide sequence for %s, exiting ...\n", protDisplayId);
            fflush(stdout);
            exit(1);
        }

        len  = strlen(aaSeq);
        if (len < 100) goto skip;

        lenDouble = (double)len;

        for (j=0; j<MAXRES; j++)
        {
            aaResCnt[j] = 0;
        }

        chp = aaSeq;
        for (i=0; i<len; i++)
        {
            aaResFound = 0;
            for (j=0; j<MAXRES; j++)
            {
                if (*chp == aaAlphabet[j])
                {
                    aaResFound = 1;
                    aaResCnt[j] ++;
                }
            }
            if (!aaResFound)
            {
                fprintf(stderr, "%c %d not a valid AA residue.\n", *chp, *chp);
            }
            chp++;
        }

        for (j=0; j<MAXRES; j++)
        {
            freq[icnt][j] = (double)aaResCnt[j]/lenDouble;
            sumJ[j] = sumJ[j] + freq[icnt][j];
        }

        for (j=0; j<20; j++)
        {
            fprintf(fh[j], "%15.7f\t%s\n", freq[icnt][j], accession);
            fflush(fh[j]);
        }
        icnt++;
        if (icnt >= MAXN)
            errAbort("Too many proteins - please set MAXN to be more than %d\n", MAXN);

skip:
        row2 = sqlNextRow(sr2);
    }

    recordCnt = icnt;
    recordCntDouble = (double)recordCnt;

    for (j=0; j<20; j++)
    {
        carefulClose(&(fh[j]));
    }

    sqlFreeResult(&sr2);
    hFreeConn(&conn);
    hFreeConn(&conn2);

    for (j=0; j<MAXRES; j++)
    {
        avg[j] = sumJ[j]/recordCntDouble;
    }

    for (j=0; j<20; j++)
    {
        sum = 0.0;
        for (i=0; i<recordCnt; i++)
        {
            sum = sum + (freq[i][j] - avg[j]) * (freq[i][j] - avg[j]);
        }
        sigma[j] = sqrt(sum/(double)(recordCnt-1));
        fprintf(o2, "%c\t%f\t%f\n", aaAlphabet[j], avg[j], sigma[j]);
    }

    carefulClose(&o2);

    o1 = mustOpen("pbAnomLimit.tab", "w");
    for (j=0; j<20; j++)
    {
        safef(temp_str, sizeof(temp_str), "cat %c.txt|sort|uniq > %c.srt",  aaAlphabet[j], aaAlphabet[j]);
        mustSystem(temp_str);

        /* figure out how many unique entries */
        safef(temp_str, sizeof(temp_str), "wc %c.srt > %c.tmp",  aaAlphabet[j], aaAlphabet[j]);
        mustSystem(temp_str);
        safef(temp_str, sizeof(temp_str), "%c.tmp",  aaAlphabet[j]);
        o3 = mustOpen(temp_str, "r");
        mustGetLine(o3, temp_str, 1000);
        chp = temp_str;
        while (*chp == ' ') chp++;
        while (*chp != ' ') chp++;
        *chp = '\0';
        sscanf(temp_str, "%d", &sortedCnt);
        safef(temp_str, sizeof(temp_str), "rm %c.tmp", aaAlphabet[j]);
        mustSystem(temp_str);

        /* cal hi and low cutoff threshold */
        ilow = (int)((float)sortedCnt * 0.025);
        ihi  = (int)((float)sortedCnt * 0.975);

        safef(temp_str, sizeof(temp_str), "%c.srt",  aaAlphabet[j]);
        o2 = mustOpen(temp_str, "r");
        i=0;
        for (i=0; i<ilow; i++)
        {
            mustGetLine(o2, temp_str, 1000);
        }
        sscanf(temp_str, "%f", &fvalue1);

        mustGetLine(o2, temp_str, 1000);
        sscanf(temp_str, "%f", &fvalue2);
        p1 = (fvalue1 + fvalue2)/2.0;

        for (i=ilow+1; i<ihi; i++)
        {
            mustGetLine(o2, temp_str, 1000);
        }
        sscanf(temp_str, "%f", &fvalue1);

        mustGetLine(o2, temp_str, 1000);
        sscanf(temp_str, "%f", &fvalue2);
        p2 = (fvalue1 + fvalue2)/2.0;
        carefulClose(&o2);

        fprintf(o1, "%c\t%f\t%f\n", aaAlphabet[j], p1, p2);
        fflush(stdout);

        for (i=0; i<recordCnt; i++)
        {
            measure[i] = freq[i][j];
        }
        safef(temp_str, sizeof(temp_str), "pbAaDist%c.tab", aaAlphabet[j]);
        calDist(measure,  recordCnt,    51,     0.0, 0.005, temp_str);
    }

    carefulClose(&o1);

    return(0);
}
Ejemplo n.º 28
0
static void sequencePrint(struct section *section, 
	struct sqlConnection *conn, char *subjId)
/* Print out Sequence section. */
{
char query[256];
struct sqlResult *sr;
char **row;
char *seq, *seqId;

int i, l;
char *chp;

printf("<B>DNA Sequences</B><BR>");
sqlSafef(query, sizeof(query), 
      "select dnaSeqId, seq from gisaidXref, dnaSeq where subjId = '%s' and id = dnaSeqId order by dnaSeqId", subjId);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if (row == NULL) printf("<BR>Not available.<BR><BR>");

while (row != NULL) 
    {
    seqId    = row[0];
    seq	     = row[1];

    l =strlen(seq);
    hPrintf("<A NAME=\"%s\">\n", seqId);
    hPrintf("<pre>\n");
    hPrintf("%c%s", '>', seqId);
    hPrintf("%s%s", ":", subjId);
    chp = seq;
    for (i=0; i<l; i++)
	{
	if ((i%50) == 0) hPrintf("\n");
	hPrintf("%c", *chp);
	chp++;
	}
    hPrintf("</pre>");
    fflush(stdout);
    
    row = sqlNextRow(sr);
    }
sqlFreeResult(&sr);

printf("<B>Protein Sequences</B><BR>");
sqlSafef(query, sizeof(query), 
      "select aaSeqId, seq from gisaidXref, aaSeq where subjId = '%s' and aaSeqId = id order by aaSeqId", subjId);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if (row == NULL) printf("<BR>Not available.<BR>");
    
while (row != NULL) 
    {
    seqId    = row[0];
    seq	     = row[1];

    l =strlen(seq);
    hPrintf("<A NAME=\"%s\">\n", seqId);

    hPrintf("<pre>\n");
    hPrintf("%c%s", '>', seqId);
    hPrintf("%s%s", ":", subjId);
    //hPrintf("<A NAME=\"%s\">\n", seqId);
    //hPrintf("><A HREF=\"../cgi-bin/pbGsid?proteinID=%s\"", seqId);
    //hPrintf("%c%s", '>', seqId);
    //hPrintf("</A>");
    //hPrintf("%c%s", ':', subjId);
    chp = seq;
    for (i=0; i<l; i++)
	{
	if ((i%50) == 0) hPrintf("\n");
	hPrintf("%c", *chp);
	chp++;
	}
    hPrintf("</pre>");
    fflush(stdout);
    
    row = sqlNextRow(sr);
    }
sqlFreeResult(&sr);
return;
}
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2, *conn3;
char query[256], query2[256], query3[256];
struct sqlResult *sr, *sr2, *sr3;
char **row, **row2, **row3;
char *r1, *r2, *r3, *r4;

FILE *o1;
char *proteinDataDate;
char *bio_dblink_id;
char *source_bioentry_id;
char *dbxref_id;

char *bioentry_id;
char *biodatabase_id;
char *display_id;
char *accession;
char *entry_version;
char *division;     

char *dbxref_id3;
char *dbname;
char *extAC;

if (argc != 2) usage();

proteinDataDate = argv[1];
o1 = fopen("temp_spXref2.dat", "w");

conn = hAllocConn();
conn2= hAllocConn();
conn3= hAllocConn();

sprintf(query2,"select * from biosql%s.bioentry;", proteinDataDate);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    bioentry_id 	= row2[0];    
    biodatabase_id 	= row2[1]; 
    display_id		= row2[2];     
    accession		= row2[3];      
    entry_version	= row2[4];  
    division		= row2[5];
			       
    sprintf(query, "select * from biosql%s.bioentry_direct_links where source_bioentry_id='%s';",
	    proteinDataDate, bioentry_id);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    while (row != NULL)
    	{
    	bio_dblink_id = row[0];
    	source_bioentry_id = row[1];
   	dbxref_id = row[2];
    
        sprintf(query3, "select * from biosql%s.dbxref where dbxref_id=%s;",
		proteinDataDate, dbxref_id);
	sr3  = sqlMustGetResult(conn3, query3);
    	row3 = sqlNextRow(sr3);

	dbxref_id3 	= row3[0];
	dbname 		= row3[1];
	extAC 		= row3[2];
			
	fprintf(o1, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", accession, display_id, division,
		    dbname, extAC,bioentry_id,biodatabase_id);
			
    	sqlFreeResult(&sr3);
	row = sqlNextRow(sr);
	}
   sqlFreeResult(&sr);
   row2 = sqlNextRow(sr2);
   }

fclose(o1);
sqlFreeResult(&sr2);
hFreeConn(&conn);
hFreeConn(&conn2);
hFreeConn(&conn3);

system("cat temp_spXref2.dat | sort |uniq > spXref2.tab");
system("rm temp_spXref2.dat");
return(0);
}
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2, *conn3;
char query[256], query2[256], query3[256];
struct sqlResult *sr, *sr2, *sr3;
char **row, **row2, **row3;

char *chp;
FILE *o1, *o2;

char *locusID;	/* LocusLink ID */
char *gbAC;		/* GenBank accession.version */
char *locusID2;	/* LocusLink ID */
char *refAC;	/* Refseq accession.version */
char *dbName; 
char cond_str[200];
char *kgID;
char *mapID;
char *desc;

if (argc != 2) usage();
dbName = argv[1];

conn = hAllocConn(dbName);
conn2= hAllocConn(dbName);
conn3= hAllocConn(dbName);

o1 = fopen("j.dat",  "w");
o2 = fopen("jj.dat", "w");
    
sprintf(query2,"select * from %sTemp.locus2Ref0;", dbName);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
    {
    locusID2 	= row2[0];
    refAC 	= row2[1];
    
    sprintf(query, "select * from %sTemp.locus2Acc0 where locusID=%s and seqType='m';", 
		   dbName, locusID2);
    sr = sqlMustGetResult(conn, query);
    row = sqlNextRow(sr);
    while (row != NULL)
    	{
	locusID 	= row[0];
	gbAC 		= row[1];
	
	chp = strstr(gbAC, ".");
	if (chp != NULL) *chp = '\0';
	chp = strstr(refAC, ".");
	if (chp != NULL) *chp = '\0';
    
	sprintf(cond_str, "name='%s'", gbAC);
        kgID = sqlGetField(dbName, "knownGene", "name", cond_str);
	if (kgID != NULL)
	    {
            sprintf(query3, "select * from %sTemp.keggList where locusID = '%s'", dbName, locusID);
            sr3 = sqlGetResult(conn3, query3);
            while ((row3 = sqlNextRow(sr3)) != NULL)
                {
                mapID   = row3[1];
		desc    = row3[2];
		fprintf(o1, "%s\t%s\t%s\n", kgID, locusID, mapID);
		fprintf(o2, "%s\t%s\n", mapID, desc);
		row3 = sqlNextRow(sr3);
                }
            sqlFreeResult(&sr3);
	    }
	row = sqlNextRow(sr);
	}
    row2 = sqlNextRow(sr2);
    }
sqlFreeResult(&sr2);

fclose(o1);
fclose(o2);
hFreeConn(&conn);
hFreeConn(&conn2);

mustSystem("cat j.dat|sort|uniq >keggPathway.tab");
mustSystem("cat jj.dat|sort|uniq >keggMapDesc.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
return(0);
}