struct tenFields *parseTenFields(char *fileName)
/* Return list of partially parsed lines from GFF. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct tenFields *tfList = NULL, *tf;
char *words[11];
int wordCount, i;

while ((wordCount = lineFileChopNextTab(lf, words, ArraySize(words))) != 0)
    {
    if (wordCount != 10)
        errAbort("Expecting 10 tab separated fields got %d line %d of %s",
		wordCount, lf->lineIx, lf->fileName);
    AllocVar(tf);
    for (i=0; i<10; ++i)
	{
	char *s = words[i];
	if (i == 0 && sameString(s, "17")) /* 17 is a synonym for mitochondria */
	    s = cloneString("M");
	if (i != 6)
	    cgiDecode(s, s, strlen(s));
	tf->fields[i] = cloneString(s);
	}
    tf->lineIx = lf->lineIx;
    slAddHead(&tfList, tf);
    }
lineFileClose(&lf);
slReverse(&tfList);
return tfList;
}
void noteIds(struct tenFields *tfList, char *inGff, 
	FILE *cdsFile, FILE *otherFile)
/* Look for cases where tenth field is of form
 * ID=XXX;Note=YYY.  In these cases move XXX to
 * the ninth field, and store the ID, the 
 * third (type) field, and the YYY in f */
{
struct tenFields *tf;
struct hash *uniqHash = newHash(19);
for (tf = tfList; tf != NULL; tf = tf->next)
    {
    char *s = tf->fields[9];
    if (startsWith("ID=", s))
        {
	char *id = s+3, *note = "";
	char *e = strchr(s, ';');
	if (!hashLookup(uniqHash, id))
	    {
	    hashAdd(uniqHash, id, NULL);
	    if (e != NULL)
		{
		*e++ = 0;
		s = stringIn("Note=", e);
		if (s != NULL)
		    {
		    note = s+5;
		    cgiDecode(note, note, strlen(note));
		    }
		}
	    }
	tf->fields[8] = id;
	}
    }
hashFree(&uniqHash);
}
void doDocIdReport(struct cart *theCart)
{
char *docId = cartString(theCart, "docId");
cartWebStart(cart, database, "ENCODE DCC:  Validation report for docId %s",docId);
struct sqlConnection *conn = sqlConnect(database);
char query[10 * 1024];
struct sqlResult *sr;
char **row;
boolean beenHere = FALSE;

printf("<a href=docIdView?db=%s> Return </a><BR>", database);
safef(query, sizeof query, "select * from %s where ix=%s", docIdTable,docId);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (beenHere)
        errAbort("found more than one record with ix value %s in table %s", 
            docId, docIdTable);
    beenHere = TRUE;

    struct docIdSub *docIdSub = docIdSubLoad(row);
    cgiDecode(docIdSub->valReport, docIdSub->valReport, strlen(docIdSub->valReport));
    //printf("tempFile is %s\n<BR>", tempFile);
    printf("<pre>%s", docIdSub->valReport);
    }

cartWebEnd();
}
void docIdReport(char *database)
/* docIdReport - generate report from docIdSub table. */
{
struct sqlConnection *conn = sqlConnect(database);
struct docIdSub *docIdSub;
char query[10 * 1024];
struct sqlResult *sr;
char **row;
safef(query, sizeof query, "select * from %s", docIdTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    docIdSub = docIdSubLoad(row);
    verbose(2, "ix %d\n", docIdSub->ix);
    verbose(2, "submitDate %s\n", docIdSub->submitDate);
    verbose(2, "md5sum %s\n", docIdSub->md5sum);
    verbose(2, "valReport %s\n", docIdSub->valReport);
    verbose(2, "metaData %s\n", docIdSub->metaData);
    verbose(2, "submitPath %s\n", docIdSub->submitPath);
    verbose(2, "submitter %s\n", docIdSub->submitter);

    cgiDecode(docIdSub->metaData, docIdSub->metaData, strlen(docIdSub->metaData));
    char *tempFile = "temp";
    FILE *f = mustOpen(tempFile, "w");
    fwrite(docIdSub->metaData, strlen(docIdSub->metaData), 1, f);
    fclose(f);
    boolean validated;
    struct mdbObj *mdbObj = mdbObjsLoadFromFormattedFile(tempFile, &validated);
    printf("%s %s %s %s %s\n", docIdDecorate(docIdSub->ix),  mdbObjFindValue(mdbObj, "dataType"), mdbObjFindValue(mdbObj, "view"), mdbObjFindValue(mdbObj, "cell"), mdbObjFindValue(mdbObj, "lab"));
    }

sqlFreeResult(&sr);
sqlDisconnect(&conn);
}
void doDocIdMeta(struct cart *theCart)
{
char *docId = cartString(theCart, "docId");
cartWebStart(cart, database, "ENCODE DCC:  Metadata for docId %s",docId);
struct sqlConnection *conn = sqlConnect(database);
char query[10 * 1024];
struct sqlResult *sr;
char **row;
struct tempName tn;
trashDirFile(&tn, "docId", "meta", ".txt");
char *tempFile = tn.forCgi;
boolean beenHere = FALSE;

printf("<a href=docIdView?db=%s> Return </a><BR>", database);
safef(query, sizeof query, "select * from %s where ix=%s", docIdTable,docId);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (beenHere)
        errAbort("found more than one record with ix value %s in table %s", 
            docId, docIdTable);
    beenHere = TRUE;

    struct docIdSub *docIdSub = docIdSubLoad(row);
    cgiDecode(docIdSub->metaData, docIdSub->metaData, strlen(docIdSub->metaData));
    //printf("tempFile is %s\n<BR>", tempFile);
    FILE *f = mustOpen(tempFile, "w");
    fwrite(docIdSub->metaData, strlen(docIdSub->metaData), 1, f);
    fclose(f);
    boolean validated;
    struct mdbObj *mdbObj = mdbObjsLoadFromFormattedFile(tempFile, &validated);
    unlink(tempFile);

    if (mdbObj->next)
        errAbort("got more than one metaObj from metaData");

    for(; mdbObj; mdbObj = mdbObj->next)
        {
        printf("<BR>object: %s\n", mdbObj->obj);
        struct mdbVar *vars = mdbObj->vars;
        for(; vars; vars = vars->next)
            {
            printf("<BR>%s %s\n", vars->var, vars->val);
            }
        }
    }

cartWebEnd();
}
void doStandard(struct cart *theCart)
{
cart = theCart;
cartWebStart(cart, database, "ENCODE DCC Submissions");
struct sqlConnection *conn = sqlConnect(database);
struct docIdSub *docIdSub;
char query[10 * 1024];
struct sqlResult *sr;
char **row;
struct tempName tn;
trashDirFile(&tn, "docId", "meta", ".txt");
char *tempFile = tn.forCgi;
//printf("tempFile is %s\n<BR>", tempFile);

    // <Data type> <Cell Type> <Key Metadata> <View>
printf("<table border=1><tr>");
printf("<th>dataType</th>");
printf("<th>cell type</th>");
printf("<th>metadata</th>");
printf("<th>view</th>");
printf("<th>fileType</th>");
printf("<th>file</th>");
printf("<th>lab</th>");
printf("<th>assembly</th>");
printf("<th>subId</th>");
printf("<th>val-report</th>");
printf("</tr>\n");
safef(query, sizeof query, "select * from %s", docIdTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    docIdSub = docIdSubLoad(row);
    verbose(2, "ix %d\n", docIdSub->ix);
    verbose(2, "submitDate %s\n", docIdSub->submitDate);
    verbose(2, "md5sum %s\n", docIdSub->md5sum);
    verbose(2, "valReport %s\n", docIdSub->valReport);
    verbose(2, "metaData %s\n", docIdSub->metaData);
    verbose(2, "submitPath %s\n", docIdSub->submitPath);
    verbose(2, "submitter %s\n", docIdSub->submitter);

    cgiDecode(docIdSub->metaData, docIdSub->metaData, strlen(docIdSub->metaData));
    //printf("tempFile %s\n", tempFile);
    FILE *f = mustOpen(tempFile, "w");
    fwrite(docIdSub->metaData, strlen(docIdSub->metaData), 1, f);
    fclose(f);
    boolean validated;
    struct mdbObj *mdbObj = mdbObjsLoadFromFormattedFile(tempFile, &validated);
    unlink(tempFile);

    // <Data type> <Cell Type> <Key Metadata> <View>
    char *docIdType = mdbObjFindValue(mdbObj, "type");
    char *docIdComposite = mdbObjFindValue(mdbObj, "composite");
    char buffer[10 * 1024];
    safef(buffer, sizeof buffer, "%d", docIdSub->ix);
    if (sameString(database, "encpipeline_beta"))
        docIdDir = docIdDirBeta;

    printf("<tr>");
    printf("<td>%s</td> ",   mdbObjFindValue(mdbObj, "dataType"));
    printf("<td>%s</td> ",   mdbObjFindValue(mdbObj, "cell"));
    struct dyString *str = newDyString(100);
    addValue(str,  mdbObjFindValue(mdbObj, "antibody"));
    addValue(str,  mdbObjFindValue(mdbObj, "treatment"));
    addValue(str,  mdbObjFindValue(mdbObj, "rnaExtract"));
    addValue(str,  mdbObjFindValue(mdbObj, "localization"));
    printf("<td>%s<a href=docIdView?docId=%s&db=%s&meta=\"\"> ...</a></td>", str->string,buffer, database);
    freeDyString(&str);
        
    printf("<td>%s</td> ",   mdbObjFindValue(mdbObj, "view"));
    printf("<td>%s</td> ",   mdbObjFindValue(mdbObj, "type"));
    printf("<td><a href=%s> %s</a></td>", 
        docIdGetPath(buffer, docIdDir, docIdType, NULL) , 
        docIdDecorate(docIdComposite,docIdSub->ix));
    char *lab = mdbObjFindValue(mdbObj, "lab");
    char *subId = mdbObjFindValue(mdbObj, "subId");
    printf("<td><a href=docIdView?docId=%s&db=%s&lab=\"%s\"> %s</a></td>",buffer, database, subId, lab);
    printf("<td>%s</td> ",   mdbObjFindValue(mdbObj, "assembly"));
    printf("<td>%s</td> ",   subId);
    printf("<td><a href=docIdView?docId=%s&db=%s&report=\"\"> report</a></td>", buffer, database);
    printf("</tr>\n");
    }

printf("</table>");
sqlFreeResult(&sr);
sqlDisconnect(&conn);
cartWebEnd();
}
void docIdTidy(char *database, char *docIdDir)
/* docIdTidy - tidy up the docId library by compressing and md5suming where appropriate. */
{
char query[10 * 1024];
struct sqlResult *sr;
char **row;
struct sqlConnection *conn = sqlConnect(database);
struct tempName tn;
trashDirFile(&tn, "docId", "meta", ".txt");
char *tempFile = tn.forCgi;
struct toDoList *toDoList = NULL;

safef(query, sizeof query, "select * from %s", docIdTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct docIdSub *docIdSub = docIdSubLoad(row);

    cgiDecode(docIdSub->metaData, docIdSub->metaData, strlen(docIdSub->metaData));
    FILE *f = mustOpen(tempFile, "w");
    fwrite(docIdSub->metaData, strlen(docIdSub->metaData), 1, f);
    fclose(f);
    boolean validated;
    //printf("metadata is %s\n", docIdSub->metaData);
    struct mdbObj *mdbObj = mdbObjsLoadFromFormattedFile(tempFile, &validated);
    unlink(tempFile);

    char *docIdType = mdbObjFindValue(mdbObj, "type");
    char buffer[10 * 1024];
    safef(buffer, sizeof buffer, "%d", docIdSub->ix);
    char *path = docIdGetPath(buffer, docIdDir, docIdType, NULL);
        //docIdDecorate(docIdSub->ix));
    printf("path %s\n", path);
    struct toDoList *toDoItem = NULL;

    if (!fileIsCompressed(path))
        {
        if (toDoItem == NULL) AllocVar(toDoItem);
        printf("foo\n");
        toDoItem->needs |= NEEDS_COMPRESSION;
        toDoItem->path = path;
        toDoItem->docId = docIdSub->ix;
        }

    printf("docId %d md5sum %s valReport %s\n",docIdSub->ix, docIdSub->md5sum, docIdSub->valReport);
    //if (docIdSub->md5sum == NULL)
    if (sameString(docIdSub->md5sum, ""))
        {
        printf("mdsum\n");
        if (toDoItem == NULL) AllocVar(toDoItem);
        toDoItem->needs |= NEEDS_MD5SUM;
        toDoItem->path = path;
        toDoItem->docId = docIdSub->ix;
        }

    //if (docIdSub->valReport == NULL)
    if (sameString(docIdSub->valReport,""))
        {
        printf("report\n");
        if (toDoItem == NULL) AllocVar(toDoItem);
        toDoItem->needs |= NEEDS_REPORT;
        toDoItem->path = path;
        toDoItem->docId = docIdSub->ix;
        }

    if (toDoItem)
        slAddHead(&toDoList, toDoItem);
    }
sqlFreeResult(&sr);

doCompression(toDoList);
doReports(toDoList);
doMd5Summing(toDoList);
}
Beispiel #8
0
void doGvf(struct trackDb *tdb, char *item)
/* Show details for variants represented as GVF, stored in a bed8Attrs table */
{
struct sqlConnection *conn = hAllocConn(database);
int start = cartInt(cart, "o");
char query[1024];
sqlSafef(query, sizeof(query),
      "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d",
      tdb->table, item, seqName, start);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
if ((row = sqlNextRow(sr)) == NULL)
    errAbort("doGvfDetails: can't find item '%s' in %s at %s:%d", item, database, seqName, start);
int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
struct bed8Attrs *ba = bed8AttrsLoad(row+rowOffset);
bedPrintPos((struct bed *)ba, 3, tdb);
int i = 0;
// Note: this loop modifies ba->attrVals[i], assuming we won't use them again:
for (i = 0;  i < ba->attrCount;  i++)
    {
    // The ID is the bed8Attrs name and has already been displayed:
    if (sameString(ba->attrTags[i], "ID"))
	continue;
    cgiDecode(ba->attrVals[i], ba->attrVals[i], strlen(ba->attrVals[i]));
    char *tag = ba->attrTags[i];
    // User-defined keywords used in dbVar's GVF:
    if (sameString(tag, "var_type")) // This one isn't anymore, but I add it back (hg18.txt).
	tag = "Variant type";
    else if (sameString(tag, "clinical_int"))
	tag = "Clinical interpretation";
    else if (sameString(tag, "var_origin"))
	tag = "Variant origin";
    else if (islower(tag[0]))
	// Uppercase for nice display, assuming user doesn't care which keywords are
	// user-defined vs. GVF standard:
	tag[0] = toupper(tag[0]);
    // GVF standard Start_range and End_range tags (1-based coords):
    if (sameString(tag, "Start_range") || sameString(tag, "End_range"))
	{
	char *copy = cloneString(ba->attrVals[i]);
	char *words[3];
	int wordCount = chopCommas(copy, words);
	if (wordCount == 2 &&
	    (sameString(".", words[0]) || isInteger(words[0])) &&
	    (sameString(".", words[1]) || isInteger(words[1])))
	    {
	    boolean isStartRange = sameString(tag, "Start_range");
	    char *rangeStart = words[0], *rangeEnd = words[1];
	    if (sameString(".", rangeStart))
		rangeStart = "unknown";
	    if (sameString(".", rangeEnd))
		rangeEnd = "unknown";
	    if (isStartRange)
		printf("<B>Start range</B>: outer start %s, inner start %s<BR>\n",
		       rangeStart, rangeEnd);
	    else
		printf("<B>End range</B>: inner end %s, outer end %s<BR>\n",
		       rangeStart, rangeEnd);
	    }
	else
	    // not formatted as expected, just print as-is:
	    printf("<B>%s</B>: %s<BR>\n", tag, htmlEncode(ba->attrVals[i]));
	}
    // Parent sounds like mom or dad (as in var_origin)... tweak it too:
    else if (sameString(tag, "Parent"))
	{
	printf("<B>Variant region:</B> "
	       "<A HREF=\"http://www.ncbi.nlm.nih.gov/dbvar/variants/%s/\" "
	       "TARGET=_BLANK>%s</A><BR>\n", ba->attrVals[i], htmlEncode(ba->attrVals[i]));
	}
    else if (sameString(tag, "Name"))
	{
	char *url = trackDbSetting(tdb, "url");
	// Show the Name only if it hasn't already appeared in the URL:
	if (url == NULL || !stringIn("$$", url))
	    printf("<B>%s</B>: %s<BR>\n", tag, htmlEncode(ba->attrVals[i]));
	}
    else if (sameWord(tag, "Phenotype_id") && startsWith("HPO:HP:", ba->attrVals[i]))
	{
	subChar(tag, '_', ' ');
	printf("<B>%s</B>: <A HREF=\"http://www.berkeleybop.org/obo/%s\" "
	       "TARGET=_BLANK>%s</A><BR>\n", tag, ba->attrVals[i]+strlen("HPO:"),
	       htmlEncode(ba->attrVals[i]));
	}
    else
	{
	subChar(tag, '_', ' ');
	printf("<B>%s</B>: %s<BR>\n", tag, htmlEncode(ba->attrVals[i]));
	}
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
/* printTrackHtml is done in genericClickHandlerPlus. */
}
char *scanSettingsForCT(char *userName, char *sessionName, char *contents,
			int *pLiveCount, int *pExpiredCount)
/* Parse the CGI-encoded session contents into {var,val} pairs and search
 * for custom tracks.  If found, refresh the custom track.  Parsing code 
 * taken from cartParseOverHash. 
 * If any nonexistent custom track files are found, return a SQL update
 * command that will remove those from this session.  We can't just do 
 * the update here because that messes up the caller's query. */
{
int contentLength = strlen(contents);
struct dyString *newContents = dyStringNew(contentLength+1);
struct dyString *oneSetting = dyStringNew(contentLength / 4);
char *updateIfAny = NULL;
char *contentsToChop = cloneString(contents);
char *namePt = contentsToChop;
verbose(3, "Scanning %s %s\n", userName, sessionName);
while (isNotEmpty(namePt))
    {
    char *dataPt = strchr(namePt, '=');
    char *nextNamePt;
    if (dataPt == NULL)
	errAbort("Mangled session content string %s", namePt);
    *dataPt++ = 0;
    nextNamePt = strchr(dataPt, '&');
    if (nextNamePt != NULL)
	*nextNamePt++ = 0;
    dyStringClear(oneSetting);
    dyStringPrintf(oneSetting, "%s=%s%s",
		   namePt, dataPt, (nextNamePt ? "&" : ""));
    if (startsWith(CT_FILE_VAR_PREFIX, namePt))
	{
	boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE;
	cgiDecode(dataPt, dataPt, strlen(dataPt));
	verbose(3, "Found variable %s = %s\n", namePt, dataPt);
	/* If the file does not exist, omit this setting from newContents so 
	 * it doesn't get copied from session to session.  If it does exist,
	 * leave it up to customFactoryTestExistence to parse the file for 
	 * possible customTrash table references, some of which may exist 
	 * and some not. */
	if (! fileExists(dataPt))
	    {
	    verbose(3, "Removing %s from %s %s\n", oneSetting->string,
		    userName, sessionName);
	    thisGotExpiredCT = TRUE;
	    }
	else
	    {
	    char *db = namePt + strlen(CT_FILE_VAR_PREFIX);
	    dyStringAppend(newContents, oneSetting->string);
	    customFactoryTestExistence(db, dataPt,
				       &thisGotLiveCT, &thisGotExpiredCT);
	    }
	if (thisGotLiveCT && pLiveCount != NULL)
	    (*pLiveCount)++;
	if (thisGotExpiredCT && pExpiredCount != NULL)
	    (*pExpiredCount)++;
	if (thisGotExpiredCT)
	    {
	    if (verboseLevel() >= 3)
		verbose(3, "Found expired custom track in %s %s: %s\n",
			userName, sessionName, dataPt);
	    else
		verbose(2, "Found expired custom track: %s\n", dataPt);
	    }
	if (thisGotLiveCT)
	    verbose(4, "Found live custom track: %s\n", dataPt);
	}
    else
	dyStringAppend(newContents, oneSetting->string);
    namePt = nextNamePt;
    }
if (newContents->stringSize != contentLength)
    {
    struct dyString *update = dyStringNew(contentLength*2);
    if (newContents->stringSize > contentLength)
	errAbort("Uh, why is newContents (%d) longer than original (%d)??",
		 newContents->stringSize, contentLength);
    dyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable);
    dyStringAppendN(update, newContents->string, newContents->stringSize);
    dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 "
		   "where userName=\"%s\" and sessionName=\"%s\";",
		   userName, sessionName);
    verbose(3, "Removing one or more dead CT file settings from %s %s "
	    "(original length %d, now %d)\n", 
	    userName, sessionName,
	    contentLength, newContents->stringSize);
    updateIfAny = dyStringCannibalize(&update);
    }
dyStringFree(&oneSetting);
dyStringFree(&newContents);
freeMem(contentsToChop);
return updateIfAny;
}