struct slName *getTablesForField(struct sqlConnection *conn,
                                 char *splitPrefix, char *table, char *splitSuffix)
/* Get tables that match field. */
{
    struct slName *list = NULL, *el;
    if (splitPrefix != NULL || splitSuffix != NULL)
    {
        char query[256], **row;
        struct sqlResult *sr;
        safef(query, sizeof(query), "show tables like '%s%s%s'",
              emptyForNull(splitPrefix), table, emptyForNull(splitSuffix));
        sr = sqlGetResult(conn, query);
        while ((row = sqlNextRow(sr)) != NULL)
        {
            el = slNameNew(row[0]);
            slAddHead(&list, el);
        }
        sqlFreeResult(&sr);
        slReverse(&list);
    }
    if (list == NULL)
    {
        if (sqlTableExists(conn, table))
            list = slNameNew(table);
    }
    return list;
}
Example #2
0
struct composite *makeCompositeList(struct encode2Manifest *manList, struct hash *metaHash)
/* Return a list of composites with everything on manList */
{
struct composite *comp, *compList = NULL;
struct hash *compHash = hashNew(0);
char compName[256];
struct encode2Manifest *man;
for (man = manList; man != NULL; man = man->next)
    {
    char *realComp = tagVal(man, metaHash, "composite");
    if (realComp != NULL)
        safef(compName, sizeof(compName), "%s", realComp);
    else
        {
	char *lab = emptyForNull(tagVal(man, metaHash, "lab"));
	char *dataType = emptyForNull(tagVal(man, metaHash, "dataType"));
	safef(compName, sizeof(compName), "comp%s%s", lab, dataType);
	}
    comp = hashFindVal(compHash, compName);
    if (comp == NULL)
        {
	AllocVar(comp);
	comp->name = cloneString(compName);
	hashAdd(compHash, compName, comp);
	slAddTail(&compList, comp);
	}
    struct slRef *manRef = slRefNew(man);
    slAddTail(&comp->manRefList, manRef);
    }
hashFree(&compHash);
return compList;
}
Example #3
0
void writeSeriesList(char *fileName, struct series *list)
/* Write out list to file in tab separated format, adding initial id field. */
{
FILE *f = mustOpen(fileName, "w");
int id = 0;
struct series *series;
for (series = list; series != NULL; series = series->next)
    {
    fprintf(f, "%d\t%s\t%s\t%s\n", ++id, series->name,
        emptyForNull(series->dataType), emptyForNull(series->grantee));
    }
carefulClose(&f);
}
Example #4
0
void printLeafTrackList(FILE *f, char *indent, struct view *view, struct composite *comp,
    struct slName *varList, struct taggedFile *tfList, char *type)
/* Print list of low level tracks under view */
{
struct taggedFile *tf;
for (tf = tfList; tf != NULL; tf = tf->next)
    {
    if (sameString(tf->manifest->outputType, view->name))
	{
	fprintf(f, "%strack t%d\n", indent, ++trackId);
	fprintf(f, "%sparent %s\n", indent, view->trackName);
	fprintf(f, "%stype %s\n", indent, type);
	fprintf(f, "%ssubGroups view=%s", indent, view->name);
	struct slName *var;
	for (var = varList; var != NULL; var = var->next)
	    {
	    char *val = metaTagValFindVal(tf->tagList, var->name);
	    if (val != NULL)
		fprintf(f, " %s=%s", var->name, val);
	    }
	fprintf(f, "\n");
	fprintf(f, "%sshortLabel", indent);
	for (var = varList; var != NULL; var = var->next)
	    {
	    char *val = metaTagValFindVal(tf->tagList, var->name);
	    if (val != NULL)
		fprintf(f, " %s", val);
	    }
	fprintf(f, "\n");
	char *lab = emptyForNull(metaTagValFindVal(tf->tagList, "lab"));
	char *dataType = emptyForNull(metaTagValFindVal(tf->tagList, "dataType"));
	fprintf(f, "%slongLabel %s %s", indent, lab, dataType);
	boolean gotOne = FALSE;
	for (var = varList; var != NULL; var = var->next)
	    {
	    char *val = metaTagValFindVal(tf->tagList, var->name);
	    if (val != NULL)
		{
		if (gotOne)
		    fprintf(f, ",");
		else
		    gotOne = TRUE;
		fprintf(f, " %s %s", var->name, val);
		}
	    }
	fprintf(f, "\n");
	fprintf(f, "%sbigDataUrl %s\n", indent, tf->manifest->fileName);
	fprintf(f, "\n");
	}
    }
}
Example #5
0
void edwMakePlateFileNameAndPath(int edwFileId, char *submitFileName,
    char licensePlate[edwMaxPlateSize], char edwFile[PATH_LEN], char serverPath[PATH_LEN])
/* Convert file id to local file name, and full file path. Make any directories needed
 * along serverPath. */
{
/* Preserve suffix.  Give ourselves up to two suffixes. */
int nameSize = strlen(submitFileName);
char *suffix = lastMatchCharExcept(submitFileName, submitFileName + nameSize, '.', '/');
if (suffix != NULL)
    {
    char *secondSuffix = lastMatchCharExcept(submitFileName, suffix, '.', '/');
    if (secondSuffix != NULL)
        suffix = secondSuffix;
    }
suffix = emptyForNull(suffix);

/* Figure out edw file name, starting with license plate. */
edwMakeLicensePlate(edwLicensePlatePrefix, edwFileId, licensePlate, edwMaxPlateSize);

/* Figure out directory and make any components not already there. */
char edwDir[PATH_LEN];
edwDirForTime(edwNow(), edwDir);
char uploadDir[PATH_LEN];
safef(uploadDir, sizeof(uploadDir), "%s%s", edwRootDir, edwDir);
makeDirsOnPath(uploadDir);

/* Figure out full file names */
safef(edwFile, PATH_LEN, "%s%s%s", edwDir, licensePlate, suffix);
safef(serverPath, PATH_LEN, "%s%s", edwRootDir, edwFile);
}
Example #6
0
struct dyString *readAndReplaceTableName(char *fileName, char *table)
/* Read file into string.  While doing so strip any leading comments
 * and insist that the first non-comment line contain the words
 * "create table" followed by a table name.  Replace the table name,
 * and copy the rest of the file verbatem. */
{
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct dyString *dy = dyStringNew(0);
    char *line, *word;
    if (!lineFileNextReal(lf, &line))
        errAbort("No real lines in %s\n", fileName);
    word = nextWord(&line);
    if (!sameWord(word, "create"))
        errAbort("Expecting first word in file to be CREATE. Got %s", word);
    word = nextWord(&line);
    if (word == NULL || !sameWord(word, "table"))
        errAbort("Expecting second word in file to be table. Got %s", emptyForNull(word));
    word = nextWord(&line);
    if (word == NULL)
        errAbort("Expecting table name on same line as CREATE TABLE");
    sqlDyStringPrintf(dy, "CREATE TABLE %s ", table);
    if (line != NULL)
        dyStringAppend(dy, line);
    dyStringAppendC(dy, '\n');
    while (lineFileNext(lf, &line, NULL))
    {
        dyStringAppend(dy, line);
        dyStringAppendC(dy, '\n');
    }
    lineFileClose(&lf);
    return dy;
}
Example #7
0
void transformJobTable(struct sqlConnection *conn, struct edwAnalysisJob *jobList, char *outName)
/* Transform edwAnalysisJob to edwAnalysis */
{
FILE *f = mustOpen(outName, "w");
struct edwAnalysisJob *job;
for (job = jobList; job != NULL; job = job->next)
     {
     struct eapJob j = {0};
     j.id = job->id;
     j.commandLine = sqlEscapeTabFileString(job->commandLine);
     assert(job->commandLine);
     j.startTime = job->startTime;
     j.endTime = job->endTime;
     j.stderr = sqlEscapeTabFileString(emptyForNull(job->stderr));
     j.returnCode = job->returnCode;
     j.cpusRequested = max(1, job->cpusRequested);
     char buf[16];
     if (isEmpty(job->parasolId))
	 {
	 safef(buf, sizeof(buf), "%d", job->pid);
	 j.parasolId = buf;
	 }
     else
         j.parasolId = job->parasolId;
     eapJobTabOut(&j, f);
     freez(&j.commandLine);
     freez(&j.stderr);
     }
carefulClose(&f);
}
Example #8
0
static void showTableDataRows(struct fieldedTable *table, int pageSize, int maxLenField,
    struct hash *tagOutputWrappers, void *wrapperContext)
/* Render data rows into HTML */
{
int count = 0;
struct fieldedRow *row;
boolean isNum[table->fieldCount];
int i;
for (i=0; i<table->fieldCount; ++i)
    isNum[i] = fieldedTableColumnIsNumeric(table, i);

for (row = table->rowList; row != NULL; row = row->next)
    {
    if (++count > pageSize)
         break;
    printf("<TR>\n");
    int fieldIx = 0;
    for (fieldIx=0; fieldIx<table->fieldCount; ++fieldIx)
	{
	char shortVal[maxLenField+1];
	char *longVal = emptyForNull(row->row[fieldIx]);
	char *val = longVal;
	int valLen = strlen(val);
	if (maxLenField > 0 && maxLenField < valLen)
	    {
	    if (valLen > maxLenField)
		{
		memcpy(shortVal, val, maxLenField-3);
		shortVal[maxLenField-3] = 0;
		strcat(shortVal, "...");
		val = shortVal;
		}
	    }
	if (isNum[fieldIx])
	    webPrintLinkCellRightStart();
	else
	    webPrintLinkCellStart();
	boolean printed = FALSE;
	if (tagOutputWrappers != NULL && !isEmpty(val))
	    {
	    char *field = table->fields[fieldIx];
	    webTableOutputWrapperType *printer = hashFindVal(tagOutputWrappers, field);
	    if (printer != NULL)
		{
		printer(table, row, field, longVal, val, wrapperContext);
		printed = TRUE;
		}
	    
	    }
	if (!printed)
	    printf("%s", val);
	webPrintLinkCellEnd();
	}
    printf("</TR>\n");
    }
}
Example #9
0
struct raField *raFieldNew(char *name, char *val, struct lm *lm)
/* Return new raField. */
{
struct raField *field;
lmAllocVar(lm, field);
field->name = lmCloneString(lm, name);
val = emptyForNull(skipLeadingSpaces(val));
field->val = lmCloneString(lm, val);
return field;
}
static struct hash *getCoveredTables(struct joiner *joiner, char *db,
                                     struct sqlConnection *conn)
/* Get list of tables covered in database. */
{
    struct hash *hash = hashNew(0);
    struct joinerIgnore *ig;
    struct slName *spec;
    struct joinerSet *js;
    struct joinerField *jf;

    /* First put in all the ignored tables. */
    for (ig = joiner->tablesIgnored; ig != NULL; ig = ig->next)
    {
        if (slNameInList(ig->dbList, db))
        {
            for (spec = ig->tableList; spec != NULL; spec = spec->next)
            {
                verbose(3,"ignoreTable: '%s'\n", spec->name);
                addTablesLike(hash, conn, spec->name);
            }
        }
    }

    /* Now put in tables that are in one of the identifiers. */
    for (js = joiner->jsList; js != NULL; js = js->next)
    {
        for (jf = js->fieldList; jf != NULL; jf = jf->next)
        {
            if (slNameInList(jf->dbList, db))
            {
                char spec[512];
                safef(spec, sizeof(spec), "%s%s%s",
                      emptyForNull(jf->splitPrefix), jf->table,
                      emptyForNull(jf->splitSuffix));
                addTablesLike(hash, conn, spec);
                verbose(4,"ident: '%s', table: '%s'\n", js->name, spec);
            }
        }
    }
    return hash;
}
Example #11
0
static void writeInfo(FILE *infoFh, struct gffGroup *group)
/* write a row for a GTF group from the info file */
{

// scan lineList for group and protein ids
struct gffLine *ll;
char *geneId = NULL, *proteinId = NULL, *geneName = NULL, *transcriptName = NULL;
for (ll = group->lineList; ll != NULL; ll = ll->next)
    {
    saveName(&geneId, ll->geneId);
    saveName(&proteinId, ll->proteinId);
    saveName(&geneName, ll->geneName);
    saveName(&transcriptName, ll->transcriptName);
    }

fprintf(infoFh, "%s\t%s\t%s\t%s\t%d\t%d\t%c\t%s\t%s\t%s\n",
        group->name, emptyForNull(geneId), group->source,
        group->seq, group->start, group->end, group->strand,
        emptyForNull(proteinId), emptyForNull(geneName),
        emptyForNull(transcriptName));
}
Example #12
0
static struct rqlEval rqlEvalArrayIx(struct rqlParse *p, void *record, RqlEvalLookup lookup,
	struct lm *lm)
/* Handle parse tree generated by an indexed array. */
{
struct rqlParse *array = p->children;
struct rqlParse *index = array->next;
struct rqlEval arrayVal = rqlLocalEval(array, record, lookup, lm);
struct rqlEval indexVal = rqlLocalEval(index, record, lookup, lm);
struct rqlEval res;
res.type = rqlTypeString;
res.val.s = emptyForNull(lmCloneSomeWord(lm, arrayVal.val.s, indexVal.val.i));
return res;
}
Example #13
0
void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza)
/* Output stanza according to clOut */
{
char *format = clOut;
if (sameString(format, "ra"))
    {
    if (stanza->children == NULL)
	{
	struct slName *field;
	for (field = rql->fieldList; field != NULL; field = field->next)
	    {
	    char *val = tagFindVal(stanza, field->name);
	    if (val != NULL)
		printf("%s\t%s\n", field->name, val);
	    }
	printf("\n");
	}
    }
else if (sameString(format, "tab"))
    {
    if (stanza->children == NULL)
	{
	struct slName *field;
	char *connector = "";
	for (field = rql->fieldList; field != NULL; field = field->next)
	    {
	    char *val = emptyForNull(tagFindVal(stanza, field->name));
	    printf("%s%s", connector, val);
	    connector = "\t";
	    }
	printf("\n");
	}
    }
else if (sameString(format, "tags"))
    {
    struct slName *field;
    for (field = rql->fieldList; field != NULL; field = field->next)
	{
	char *val = tagFindLocalVal(stanza, field->name);
	if (val != NULL)
	    {
	    repeatCharOut(stdout, '\t', depth);
	    printf("%s\t%s\n", field->name, val);
	    }
	}
    printf("\n");
    }
else
    errAbort("Unrecognized format %s", format);
}
Example #14
0
void getUrl(struct sqlConnection *conn)
/* Put up URL. */
{
edwMustGetUserFromEmail(conn, userEmail);
printf("Please enter a URL for a validated manifest file:<BR>");
printf("URL ");
cgiMakeTextVar("url", emptyForNull(cgiOptionalString("url")), 80);
cgiMakeButton("submitUrl", "submit");
printf("<BR>\n");
cgiMakeCheckBox("update", FALSE);
printf(" Update information associated with files that have already been uploaded.");
printf("<BR>Submission by %s", userEmail);
edwPrintLogOutButton();
}
Example #15
0
static void refLinkUpdate(struct sqlConnection *conn, struct gbStatus* status)
/* Update the refLink table for the current entry */
{
int geneId;
char *gen = emptyForNull(raFieldCurVal("gen"));
char *pro = emptyForNull(raFieldCurVal("pro"));
gen = sqlEscapeString2(alloca(2*strlen(gen)+1), gen);
pro = sqlEscapeString2(alloca(2*strlen(pro)+1), pro);

/* can either have locus id (old locus link db) or gene id, or both,
 * in which case the geneId is used */
geneId = (raGeneId != 0) ? raGeneId : raLocusLinkId;

if (status->stateChg & GB_NEW)
    sqlUpdaterAddRow(refLinkUpd, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u",
                     gen, pro, raAcc, raProtAcc, raFieldCurId("gen"),
                     raFieldCurId("pro"), geneId, raOmimId);
else if (status->stateChg & GB_META_CHG)
    sqlUpdaterModRow(refLinkUpd, 1, "name='%s', product='%s', protAcc='%s', "
                     "geneName=%u, prodName=%u, locusLinkId=%u, "
                     "omimId=%u where mrnaAcc='%s'",
                     gen, pro, raProtAcc, raFieldCurId("gen"),
                     raFieldCurId("pro"), geneId, raOmimId, raAcc);
}
Example #16
0
struct customTrack *chromGraphParser(char *genomeDb, struct customPp *cpp,
	char *formatType, char *markerType, char *columnLabels,
	char *name, char *description, struct hash *settings,
	boolean report)
/* Parse out a chromGraph file (not including any track lines) */
{
char *minVal = hashFindVal(settings, "minVal");
char *maxVal = hashFindVal(settings, "maxVal");

/* Get first lines of track.  If track empty then
 * might as well return NULL here. */
struct slName *preview = getPreview(cpp, 10);
if (preview == NULL)
    return NULL;

/* Figure out format type - scanning preview if it isn't well defined. */
struct sqlConnection *conn = hAllocConn(genomeDb);
int colCount;

if (sameString(formatType, cgfFormatGuess))
    {
    if (!figureOutFormat(preview, &formatType, &colCount))
	errAbort("Can't figure out format for chromGraph track %s", 
		emptyForNull(name));
    }
hashMayRemove(settings, "formatType");

/* Now that we know format can count columns and determine how to
 * chop up lines. */
colCount = countColumns(preview, formatType);
Chopper chopper = getChopper(formatType);

/* Figure out marker type - scanning marker column of preview if it isn't
 * well defined. */
if (sameString(markerType, cgfMarkerGuess))
    {
    markerType = guessMarkerType(preview, chopper, conn, colCount);
    if (markerType == NULL)
	errAbort("Can't figure out marker column type for chromGraph track %s",
		emptyForNull(name));
    }
hashMayRemove(settings, "markerType");

/* Figure out if columns are labeled in file, using preview if needed. */
if (sameString(columnLabels, cgfColLabelGuess))
    {
    if (firstRowConsistentWithData(preview->name, chopper, colCount))
	columnLabels = cgfColLabelNumbered;
    else
	columnLabels = cgfColLabelFirstRow;
    }
hashMayRemove(settings, "columnLabels");
returnPreview(cpp, &preview);
boolean labelsInData = sameString(columnLabels, cgfColLabelFirstRow);

/* Load data into list of labeled temp files. */
struct labeledFile *fileEl, *fileList;
fileList = parseToLabeledFiles(cpp, colCount, formatType, markerType,
    labelsInData, conn, report);
saveLabeledFileList(fileList);

/* Create a customTrack for each element in file list. */
struct customTrack *outputTracks = NULL;
for (fileEl = fileList; fileEl != NULL; fileEl = fileEl->next)
    {
    struct customTrack *track;
    AllocVar(track);
    struct trackDb *tdb = customTrackTdbDefault();
    track->tdb = tdb;

    /* Figure out short and long names and type*/
    char shortLabel[128];
    char longLabel[512];
    if (name == NULL)
        name = track->tdb->shortLabel;
    if (description == NULL)
        description = track->tdb->longLabel;
    if (colCount > 1 || labelsInData)
        {
	safef(shortLabel, sizeof(shortLabel), "%s %s", name, fileEl->label);
	safef(longLabel, sizeof(longLabel), "%s %s", description, fileEl->label);
	}
    else
        {
	safef(shortLabel, sizeof(shortLabel), "%s", name);
	safef(longLabel, sizeof(longLabel), "%s", description);
	}
    tdb->shortLabel = cloneString(shortLabel);
    tdb->longLabel = cloneString(longLabel);
    tdb->type = "chromGraph";
    tdb->track = customTrackTableFromLabel(tdb->shortLabel);
    tdb->table = cloneString(tdb->track);
    track->dbTableName = NULL;

    /* Create settings */
    struct dyString *dy = dyStringNew(0);
    dyStringAppend(dy, hashToRaString(settings));
    dyStringPrintf(dy, "binaryFile %s\n", fileEl->fileName);
    dyStringPrintf(dy, "type %s\n", tdb->type);
    dyStringPrintf(dy, "tdbType %s\n", tdb->type); /* Needed if outside factory */
    if (minVal == NULL)
        dyStringPrintf(dy, "minVal %g\n", fileEl->minVal);
    if (maxVal == NULL)
        dyStringPrintf(dy, "maxVal %g\n", fileEl->maxVal);
    tdb->settings = dyStringCannibalize(&dy);
    
    /* Add to list. */
    slAddHead(&outputTracks, track);
    }
hFreeConn(&conn);
slReverse(&outputTracks);
return outputTracks;
}
Example #17
0
void writeMdbListAsResults(struct mdbObj *mdbList, char *fileName)
/* Write selected fields in list in tab-separated result format to file. */
{
FILE *f = mustOpen(fileName, "w");
struct mdbObj *mdb;
int id = 0;
for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
    {
    char *experiment = NULL;
    char *replicate = NULL;
    char *objType = NULL;
    char *fileName = NULL;
    char *md5sum = NULL;
    char *tableName = NULL;
    char *view = NULL;
    char *dateSubmitted = NULL;
    char *dateResubmitted = NULL;
    char *dateUnrestricted = NULL;
    struct mdbVar *v;
    for (v = mdb->vars; v != NULL; v = v->next)
	{
	char *var = v->var, *val = v->val;
	if (sameString(var, "expId"))
	    experiment = val;
	else if (sameString(var, "replicate"))
	    replicate = val;
	else if (sameString(var, "objType"))
	    objType = val;
	else if (sameString(var, "fileName"))
	    {
	    fileName = val;
	    /* Do surgery on file name, cutting off second comma separated index file. */
	    char *comma = strchr(fileName, ',');
	    if (comma != NULL)
	        *comma = 0;
	    }
	else if (sameString(var, "md5sum"))
	    {
	    md5sum = val;
	    char *comma = strchr(fileName, ',');
	    if (comma != NULL)
	        *comma = 0;
	    }
	else if (sameString(var, "tableName"))
	    tableName = val;
	else if (sameString(var, "view"))
	    view = val;
	else if (sameString(var, "dateSubmitted"))
	    dateSubmitted = val;
	else if (sameString(var, "dateResubmitted"))
	    dateResubmitted = val;
	else if (sameString(var, "dateUnrestricted"))
	    dateUnrestricted = val;
	}
    if (objType != NULL && !sameString(objType, "composite"))
	{
	if (experiment != NULL)
	    {
	    fprintf(f, "%d\t", ++id);
	    fprintf(f, "%s\t", emptyForNull(experiment));
	    fprintf(f, "%s\t", emptyForNull(replicate));
	    fprintf(f, "%s\t", emptyForNull(view));
	    fprintf(f, "%s\t", emptyForNull(objType));
	    fprintf(f, "%s\t", emptyForNull(fileName));
	    fprintf(f, "%s\t", emptyForNull(md5sum));
	    fprintf(f, "%s\t", emptyForNull(tableName));
	    fprintf(f, "%s\t", emptyForNull(dateSubmitted));
	    fprintf(f, "%s\t", emptyForNull(dateResubmitted));
	    fprintf(f, "%s\n", emptyForNull(dateUnrestricted));
	    }
	else
	    {
	    if (!startsWith("wgEncodeUmassWengTfbsValid", mdb->obj))	// These validation files not associated with encode experiment
		warn("No experiment for %s", mdb->obj);
	    }
	}
    }
carefulClose(&f);
}
static void synonymPrint(struct section *section, 
	struct sqlConnection *conn, char *id)
/* Print out SwissProt comments - looking up typeId/commentVal. */
{
char *protAcc = getSwissProtAcc(conn, spConn, id);
char *spDisplayId;
char *refSeqAcc = "";
char *mrnaAcc = "";
char *oldDisplayId;
char condStr[255];
char *kgProteinID;
char *parAcc; /* parent accession of a variant splice protein */
char *chp;

if (isRgdGene(conn))
    {
    rgdGene2SynonymPrint(section,conn, id);
    return;
    }
if (sqlTablesExist(conn, "kgAlias"))
    printAlias(id, conn);
if (sameWord(genome, "Zebrafish"))
    {
    char *xrefTable = "ensXRefZfish";
    char *geneIdCol = "ensGeneId";
    /* get Gene Symbol and RefSeq accession from Zebrafish-specific */
    /* cross-reference table */
    printGeneSymbol(id, xrefTable, geneIdCol, conn);
    refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn);
    hPrintf("<B>ENSEMBL ID:</B> %s", id);
    }
else
    {
    char query[256];
    char *toRefTable = genomeOptionalSetting("knownToRef");
    if (toRefTable != NULL && sqlTableExists(conn, toRefTable))
        {
	safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable,
		id);
	refSeqAcc = emptyForNull(sqlQuickString(conn, query));
	}
    if (sqlTableExists(conn, "kgXref"))
	{
	safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id);
	mrnaAcc = emptyForNull(sqlQuickString(conn, query));
	}
    if (sameWord(genome, "C. elegans"))
	hPrintf("<B>WormBase ID:</B> %s<BR>", id);
    else
	hPrintf("<B>UCSC ID:</B> %s<BR>", id);
    }
    
if (refSeqAcc[0] != 0)
    {
    hPrintf("<B>RefSeq Accession: </B> <A HREF=\"");
    printOurRefseqUrl(stdout, refSeqAcc);
    hPrintf("\">%s</A><BR>\n", refSeqAcc);
    }
else if (mrnaAcc[0] != 0)
    {
    safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc);
    if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL)
        {
    	hPrintf("<B>Representative RNA: </B> <A HREF=\"");
    	printOurMrnaUrl(stdout, mrnaAcc);
    	hPrintf("\">%s</A><BR>\n", mrnaAcc);
    	}
    else
    /* do not show URL link if it is not found in gbCdnaInfo */
    	{
    	hPrintf("<B>Representative RNA: %s </B>", mrnaAcc);
    	}
    }
if (protAcc != NULL)
    {
    kgProteinID = cloneString("");
    if (hTableExists(sqlGetDatabase(conn), "knownGene")
        && (isNotEmpty(cartOptionalString(cart, hggChrom)) &&
	      differentWord(cartOptionalString(cart, hggChrom),"none")))
    	{
    	safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%s and txEnd=%s", 
	        id, cartOptionalString(cart, hggChrom), 
    	        cartOptionalString(cart, hggStart), 
		cartOptionalString(cart, hggEnd));
    	kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr);
    	}

    hPrintf("<B>Protein: ");
    if (strstr(kgProteinID, "-") != NULL)
        {
	parAcc = cloneString(kgProteinID);
	chp = strstr(parAcc, "-");
	*chp = '\0';
	
        /* show variant splice protein and the UniProt link here */
	hPrintf("<A HREF=\"http://www.uniprot.org/uniprot%s\" "
	    "TARGET=_blank>%s</A></B>, splice isoform of ",
	    kgProteinID, kgProteinID);
        hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" "
	    "TARGET=_blank>%s</A></B>\n",
	    parAcc, parAcc);
	}
    else
        {
        hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" "
	    "TARGET=_blank>%s</A></B>\n",
	    protAcc, protAcc);
	}
    /* show SWISS-PROT display ID if it is different than the accession ID */
    /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */
    spDisplayId = spAnyAccToId(spConn, protAcc);
    if (spDisplayId == NULL) 
    	{
	errAbort("<br>%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc);
	}
	
    if (strstr(spDisplayId, protAcc) == NULL)
	{
	hPrintf(" (aka %s", spDisplayId);
	/* show once if the new and old displayId are the same */
 	oldDisplayId = oldSpDisplayId(spDisplayId);
	if (oldDisplayId != NULL)
 	    {
            if (!sameWord(spDisplayId, oldDisplayId)
                && !sameWord(protAcc, oldDisplayId))
	    	{
	    	hPrintf(" or %s", oldDisplayId);
	    	}
	    }
	hPrintf(")<BR>\n");
	}
    }
printCcds(id, conn);

}
Example #19
0
static void bigBedClick(char *fileName, struct trackDb *tdb,
                     char *item, int start, int end, int bedSize)
/* Handle click in generic bigBed track. */
{
boolean showUrl = FALSE;
char *chrom = cartString(cart, "c");

/* Open BigWig file and get interval list. */
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct lm *lm = lmInit(0);
int ivStart = start, ivEnd = end;
if (start == end)
    {
    // item is an insertion; expand the search range from 0 bases to 2 so we catch it:
    ivStart = max(0, start-1);
    ivEnd++;
    }
struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, ivStart, ivEnd, 0, lm);

/* Get bedSize if it's not already defined. */
if (bedSize == 0)
    {
    bedSize = bbi->definedFieldCount;
    showUrl = TRUE;
    }


char *scoreFilter = cartOrTdbString(cart, tdb, "scoreFilter", NULL);
int minScore = 0;
if (scoreFilter)
    minScore = atoi(scoreFilter);

/* Find particular item in list - matching start, and item if possible. */
boolean found = FALSE;
boolean firstTime = TRUE;
struct bigBedInterval *bb;
for (bb = bbList; bb != NULL; bb = bb->next)
    {
    if (!(bb->start == start && bb->end == end))
	continue;
    if (bedSize > 3)
	{
	char *name = cloneFirstWordByTab(bb->rest);
	boolean match = sameString(name, item);
	freez(&name);
	if (!match)
	    continue;
	}

    found = TRUE;
    if (firstTime)
	printf("<BR>\n");
    int seq1Seq2Fields = 0;
    // check for seq1 and seq2 in columns 7+8 (eg, pairedTagAlign)
    boolean seq1Seq2 = sameOk(trackDbSetting(tdb, BASE_COLOR_USE_SEQUENCE), "seq1Seq2");
    if (seq1Seq2 && bedSize == 6)
	seq1Seq2Fields = 2;
    char *fields[bedSize+seq1Seq2Fields];
    char startBuf[16], endBuf[16];
    char *rest = cloneString(bb->rest);
    int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields,
                                           bedSize+seq1Seq2Fields);
    if (bbFieldCount != bedSize+seq1Seq2Fields)
        {
        errAbort("Disagreement between trackDb field count (%d) and %s fieldCount (%d)",
		bedSize, fileName, bbFieldCount);
	}
    struct bed *bed = bedLoadN(fields, bedSize);
    if (bedSize >= 6 && scoreFilter && bed->score < minScore)
	continue;
    if (showUrl && (bedSize >= 4))
        printCustomUrl(tdb, item, TRUE);
    bedPrintPos(bed, bedSize, tdb);

    // display seq1 and seq2
    if (seq1Seq2 && bedSize+seq1Seq2Fields == 8)
        printf("<table><tr><th>Sequence 1</th><th>Sequence 2</th></tr>"
	       "<tr><td> %s </td><td> %s </td></tr></table>", fields[6], fields[7]);
    else if (isNotEmpty(rest))
	{
	char *restFields[256];
	int restCount = chopTabs(rest, restFields);
	int restBedFields = bedSize - 3;
	if (restCount > restBedFields)
	    {
            if (0 == extraFieldsPrint(tdb,NULL,restFields + restBedFields,restCount - restBedFields))
                {
                int i;
                char label[20];
                safef(label, sizeof(label), "nonBedFieldsLabel");
                printf("<B>%s&nbsp;</B>",
                       trackDbSettingOrDefault(tdb, label, "Non-BED fields:"));
                for (i = restBedFields;  i < restCount;  i++)
                    printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]);
                printf("<BR>\n");
                }
	    }
	}
    if (isCustomTrack(tdb->track))
	{
	time_t timep = bbiUpdateTime(bbi);
	printBbiUpdateTime(&timep);
	}

    }

if (!found)
    {
    printf("No item %s starting at %d\n", emptyForNull(item), start);
    }

lmCleanup(&lm);
bbiFileClose(&bbi);
}
Example #20
0
void processRefSeq(char *database, char *faFile, char *raFile, char *pslFile, char *loc2refFile, 
	char *pepFile, char *mim2locFile)
/* hgRefSeqMrna - Load refSeq mRNA alignments and other info into 
 * refSeqGene table. */
{
struct lineFile *lf;
struct hash *raHash, *rsiHash = newHash(0);
struct hash *loc2mimHash = newHash(0);
struct refSeqInfo *rsiList = NULL, *rsi;
char *s, *line, *row[5];
int wordCount, dotMod = 0;
int noLocCount = 0;
int rsiCount = 0;
int noProtCount = 0;
struct psl *psl;
struct sqlConnection *conn = hgStartUpdate(database);
struct hash *productHash = loadNameTable(conn, "productName", 16);
struct hash *geneHash = loadNameTable(conn, "geneName", 16);
char *kgName = "refGene";

FILE *kgTab = hgCreateTabFile(".", kgName);
FILE *productTab = hgCreateTabFile(".", "productName");
FILE *geneTab = hgCreateTabFile(".", "geneName");
FILE *refLinkTab = hgCreateTabFile(".", "refLink");
FILE *refPepTab = hgCreateTabFile(".", "refPep");
FILE *refMrnaTab = hgCreateTabFile(".", "refMrna");

struct exon *exonList = NULL, *exon;
char *answer;
char cond_str[200];

/* Make refLink and other tables table if they don't exist already. */
sqlMaybeMakeTable(conn, "refLink", refLinkTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refLink");
sqlMaybeMakeTable(conn, "refGene", refGeneTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refGene");
sqlMaybeMakeTable(conn, "refPep", refPepTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refPep");
sqlMaybeMakeTable(conn, "refMrna", refMrnaTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refMrna");

/* Scan through locus link to omim ID file and put in hash. */
    {
    char *row[2];

    printf("Scanning %s\n", mim2locFile);
    lf = lineFileOpen(mim2locFile, TRUE);
    while (lineFileRow(lf, row))
	{
	hashAdd(loc2mimHash, row[1], intToPt(atoi(row[0])));
	}
    lineFileClose(&lf);
    }

/* Scan through .ra file and make up start of refSeqInfo
 * objects in hash and list. */
printf("Scanning %s\n", raFile);
lf = lineFileOpen(raFile, TRUE);
while ((raHash = hashNextRa(lf)) != NULL)
    {
    if (clDots > 0 && ++dotMod == clDots )
        {
	dotMod = 0;
	dotOut();
	}
    AllocVar(rsi);
    slAddHead(&rsiList, rsi);
    if ((s = hashFindVal(raHash, "acc")) == NULL)
        errAbort("No acc near line %d of %s", lf->lineIx, lf->fileName);
    rsi->mrnaAcc = cloneString(s);
    if ((s = hashFindVal(raHash, "siz")) == NULL)
        errAbort("No siz near line %d of %s", lf->lineIx, lf->fileName);
    rsi->size = atoi(s);
    if ((s = hashFindVal(raHash, "gen")) != NULL)
	rsi->geneName = cloneString(s);
    //!!!else
      //!!!  warn("No gene name for %s", rsi->mrnaAcc);
    if ((s = hashFindVal(raHash, "cds")) != NULL)
        parseCds(s, 0, rsi->size, &rsi->cdsStart, &rsi->cdsEnd);
    else
        rsi->cdsEnd = rsi->size;
    if ((s = hashFindVal(raHash, "ngi")) != NULL)
        rsi->ngi = atoi(s);

    rsi->geneNameId = putInNameTable(geneHash, geneTab, rsi->geneName);
    s = hashFindVal(raHash, "pro");
    if (s != NULL)
        rsi->productName = cloneString(s);
    rsi->productNameId = putInNameTable(productHash, productTab, s);
    hashAdd(rsiHash, rsi->mrnaAcc, rsi);

    freeHashAndVals(&raHash);
    }
lineFileClose(&lf);
if (clDots) printf("\n");

/* Scan through loc2ref filling in some gaps in rsi. */
printf("Scanning %s\n", loc2refFile);
lf = lineFileOpen(loc2refFile, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    char *mrnaAcc;

    if (line[0] == '#')
        continue;
    wordCount = chopTabs(line, row);
    if (wordCount < 5)
        errAbort("Expecting at least 5 tab-separated words line %d of %s",
		lf->lineIx, lf->fileName);
    mrnaAcc = row[1];
    mrnaAcc = accWithoutSuffix(mrnaAcc);

    if (mrnaAcc[2] != '_')
        warn("%s is and odd name %d of %s", 
		mrnaAcc, lf->lineIx, lf->fileName);
    if ((rsi = hashFindVal(rsiHash, mrnaAcc)) != NULL)
        {
	rsi->locusLinkId = lineFileNeedNum(lf, row, 0);
	rsi->omimId = ptToInt(hashFindVal(loc2mimHash, row[0]));
	rsi->proteinAcc = cloneString(accWithoutSuffix(row[4]));
	}
    }
lineFileClose(&lf);

/* Report how many seem to be missing from loc2ref file. 
 * Write out knownInfo file. */
printf("Writing %s\n", "refLink.tab");
for (rsi = rsiList; rsi != NULL; rsi = rsi->next)
    {
    ++rsiCount;
    if (rsi->locusLinkId == 0)
        ++noLocCount;
    if (rsi->proteinAcc == NULL)
        ++noProtCount;
    fprintf(refLinkTab, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u\n",
	emptyForNull(rsi->geneName), 
	emptyForNull(rsi->productName),
    	emptyForNull(rsi->mrnaAcc), 
	emptyForNull(rsi->proteinAcc),
	rsi->geneNameId, rsi->productNameId, 
	rsi->locusLinkId, rsi->omimId);
    }
if (noLocCount) 
    printf("Missing locusLinkIds for %d of %d\n", noLocCount, rsiCount);
if (noProtCount)
    printf("Missing protein accessions for %d of %d\n", noProtCount, rsiCount);

/* Process alignments and write them out as genes. */
lf = pslFileOpen(pslFile);
dotMod = 0;
while ((psl = pslNext(lf)) != NULL)
  {
  if (hashFindVal(rsiHash, psl->qName) != NULL)
    {
    if (clDots > 0 && ++dotMod == clDots )
        {
	dotMod = 0;
	dotOut();
	}
   
    sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", psl->qName);
    answer = sqlGetField(proteinDB, "spXref2", "displayID", cond_str);
	       
    if (answer == NULL)
	{
	fprintf(stderr, "%s NOT FOUND.\n", psl->qName);
   	fflush(stderr);
	}

    if (answer != NULL)
    	{	
        struct genePred *gp = NULL;
    	exonList = pslToExonList(psl);
    	fprintf(kgTab, "%s\t%s\t%c\t%d\t%d\t",
	psl->qName, psl->tName, psl->strand[0], psl->tStart, psl->tEnd);
    	rsi = hashMustFindVal(rsiHash, psl->qName);

        gp = genePredFromPsl(psl, rsi->cdsStart, rsi->cdsEnd, genePredStdInsertMergeSize);
        if (!gp)
            errAbort("Cannot convert psl (%s) to genePred.\n", psl->qName);

    	fprintf(kgTab, "%d\t%d\t", gp->cdsStart, gp->cdsEnd);
    	fprintf(kgTab, "%d\t", slCount(exonList));
    
    	fflush(kgTab);
     
    	for (exon = exonList; exon != NULL; exon = exon->next)
        fprintf(kgTab, "%d,", exon->start);
    	fprintf(kgTab, "\t");
    
        for (exon = exonList; exon != NULL; exon = exon->next)
        	fprintf(kgTab, "%d,", exon->end);
    	fprintf(kgTab, "\n");
    	slFreeList(&exonList);
    	}
    }
  else
    {
    fprintf(stderr, "%s found in psl, but not in .fa or .ra data files.\n", psl->qName);
    fflush(stderr);
    }
  }

if (clDots) printf("\n");

if (!clTest)
    {
    writeSeqTable(pepFile, refPepTab, FALSE, TRUE);
    writeSeqTable(faFile, refMrnaTab, FALSE, FALSE);
    }

carefulClose(&kgTab);
carefulClose(&productTab);
carefulClose(&geneTab);
carefulClose(&refLinkTab);
carefulClose(&refPepTab);
carefulClose(&refMrnaTab);

if (!clTest)
    {
    printf("Loading database with %s\n", kgName);
    fflush(stdout);
    
    hgLoadTabFile(conn, ".", kgName, NULL);

    printf("Loading database with %s\n", "productName");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "productName", NULL);
    
    printf("Loading database with %s\n", "geneName");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "geneName", NULL);
    
    printf("Loading database with %s\n", "refLink");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refLink", NULL);
    
    printf("Loading database with %s\n", "refPep");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refPep", NULL);
    
    printf("Loading database with %s\n", "refMrna");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refMrna", NULL);
    }
}
Example #21
0
void txGeneFromBed(char *inBed, char *inPicks, char *ucscFa, char *uniProtFa, char *refPepFa, char *outKg)
/* txGeneFromBed - Convert from bed to knownGenes format table (genePred + uniProt ID). */
{
/* Load protein sequence into hashes */
struct hash *uniProtHash = faReadAllIntoHash(uniProtFa, dnaUpper);
struct hash *ucscProtHash = faReadAllIntoHash(ucscFa, dnaUpper);
struct hash *refProtHash =faReadAllIntoHash(refPepFa, dnaUpper);

/* Load picks into hash.  We don't use cdsPicksLoadAll because empty fields
 * cause that autoSql-generated routine problems. */
struct hash *pickHash = newHash(18);
struct cdsPick *pick;
struct lineFile *lf = lineFileOpen(inPicks, TRUE);
char *row[CDSPICK_NUM_COLS];
while (lineFileRowTab(lf, row))
    {
    pick = cdsPickLoad(row);
    hashAdd(pickHash, pick->name, pick);
    }

/* Load in bed */
struct bed *bed, *bedList = bedLoadNAll(inBed, 12);

/* Do reformatting and write output. */
FILE *f = mustOpen(outKg, "w");
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    char *protAcc = NULL;
    if (bed->thickStart < bed->thickEnd)
	{
        pick = hashMustFindVal(pickHash, bed->name);
	struct dnaSeq *spSeq = NULL, *uniSeq = NULL, *refPep = NULL, *ucscSeq;
	ucscSeq = hashMustFindVal(ucscProtHash, bed->name);
	if (pick->swissProt[0])
	    spSeq = hashMustFindVal(uniProtHash, pick->swissProt);
	if (pick->uniProt[0])
	    uniSeq = hashMustFindVal(uniProtHash, pick->uniProt);
	if (pick->refProt[0])
	    refPep = hashMustFindVal(refProtHash, pick->refProt);

	/* First we look for an exact match between the ucsc protein and
	 * something from swissProt/uniProt. */
	if (spSeq != NULL && sameString(ucscSeq->dna, spSeq->dna))
	    protAcc = pick->swissProt;
	if (protAcc == NULL && uniSeq != NULL && sameString(ucscSeq->dna, uniSeq->dna))
	    protAcc = pick->uniProt;
	if (protAcc == NULL && refPep != NULL && sameString(ucscSeq->dna, refPep->dna))
	    {
	    protAcc = cloneString(pick->refProt);
	    chopSuffix(protAcc);
	    }

	if (protAcc == NULL)
	    {
	    if (pick->uniProt[0])
	        protAcc = pick->uniProt;
	    else 
		{
	        protAcc = cloneString(pick->refProt);
		chopSuffix(protAcc);
		}
	    }
	}
    outputKg(bed, emptyForNull(protAcc), f);
    }
carefulClose(&f);
}