static void doPslMapAli(struct sqlConnection *conn, 
    int taxon, char *db, 
    int fromTaxon, char *fromDb)
{
char cmd[256];

struct dyString *dy = dyStringNew(0);
char path[256];
char dnaPath[256];
char toDb[12];

safef(toDb,sizeof(toDb),"%s", db);
toDb[0]=toupper(toDb[0]);

safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/nib", db);
if (!fileExists(dnaPath))
    {
    safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/%s.2bit", db, db);
    if (!fileExists(dnaPath))
	errAbort("unable to locate nib dir or .2bit for %s: %s", db, dnaPath);
    }
    
safef(path,sizeof(path),"/gbdb/%s/liftOver/%sTo%s.over.chain.gz", fromDb, fromDb, toDb);
if (!fileExists(path))
    errAbort("unable to locate chain file %s",path);

/* get non-bac $db.vgProbes not yet aligned */
getPslMapAli(conn, db, fromTaxon, fromDb, FALSE);
/* get bac $db.vgProbes not yet aligned */
getPslMapAli(conn, db, fromTaxon, fromDb, TRUE);
/* get .fa for pslRecalcMatch use */
getPslMapFa(conn, db, fromTaxon);

/* non-bac */
safef(cmd,sizeof(cmd),
"zcat %s | pslMap -chainMapFile -swapMap  nonBac.psl stdin stdout "
"|  sort -k 14,14 -k 16,16n > unscoredNB.psl"
,path);
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),
"pslRecalcMatch unscoredNB.psl %s" 
" pslMap.fa nonBac.psl"
,dnaPath);
verbose(1,"%s\n",cmd); system(cmd);

/* bac */
safef(cmd,sizeof(cmd),
"zcat %s | pslMap -chainMapFile -swapMap  bac.psl stdin stdout "
"|  sort -k 14,14 -k 16,16n > unscoredB.psl"
,path);
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),
"pslRecalcMatch unscoredB.psl %s" 
" pslMap.fa bacTemp.psl"
,dnaPath);
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),
"pslCDnaFilter -globalNearBest=0.00001 -minCover=0.05"
" bacTemp.psl bac.psl");
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),"cat bac.psl nonBac.psl > vgPrbPslMap.psl");
verbose(1,"%s\n",cmd); system(cmd);

dyStringFree(&dy);

}
Ejemplo n.º 2
0
struct fullExperiment *getFullExperimentList(struct sqlConnection *conn,
    struct edwExperiment *eeList, char *assembly, struct hash **retHash)
/* Given list of edwExperiments, return list of ones replicated with full file sets on
 * both replicates.  If optional retHash is non-NULL then return a hash full of same 
 * experiments keyed by experiment accession */
{
/* Build up a list of fullExperiments and a hash keyed by name. */
struct hash *hash = hashNew(14);
struct fullExperiment *fullList = NULL;
struct edwExperiment *ee;
for (ee = eeList; ee != NULL; ee = ee->next)
    {
    struct fullExperiment *full = hashFindVal(hash, ee->accession);
    if (full == NULL)
        {
	AllocVar(full);
	full->name = cloneString(ee->accession);
	full->exp = ee;
	slAddHead(&fullList, full);
	hashAdd(hash, full->name, full);
	}
    }
uglyf("Got %d in eeList, %d in fullList, %d in hash\n", slCount(eeList), slCount(fullList), hash->elCount);

/* Build up SQL query to efficiently fetch all good files and valid files from our experiment */
struct dyString *q = dyStringNew(16*1024);
sqlDyStringPrintf(q, "select edwValidFile.*,edwFile.*,eapOutput.* "
		  " from edwValidFile,edwFile,eapOutput "
                  " where edwValidFile.fileId = edwFile.id and edwFile.id = eapOutput.fileId "
		  " and edwFile.deprecated='' and edwFile.errorMessage='' "
		  " and edwValidFile.ucscDb != 'centro.hg19' "
		  " and edwValidFile.ucscDb like '%%%s' and edwValidFile.experiment in ("
		  , assembly);
for (ee = eeList; ee != NULL; ee = ee->next)
    {
    dyStringPrintf(q, "'%s'", ee->accession);
    if (ee->next != NULL)
        dyStringAppendC(q, ',');
    }
dyStringAppendC(q, ')');

/* Loop through this making up vFiles that ultimately are attached to replicates. */
int vCount = 0;
struct sqlResult *sr = sqlGetResult(conn, q->string);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    ++vCount;
    struct edwValidFile *valid = edwValidFileLoad(row);
    fixOutputType(valid);
    struct edwFile *file = edwFileLoad(row + EDWVALIDFILE_NUM_COLS);
    struct eapOutput *eapOutput = eapOutputLoad(row + EDWVALIDFILE_NUM_COLS + EDWFILE_NUM_COLS);
    struct vFile *vf = vFileNew(file, valid, eapOutput);
    struct fullExperiment *full = hashMustFindVal(hash, valid->experiment);
    struct replicate *rep = findOrMakeReplicate(valid->replicate, &full->repList);
    char *format = valid->format;
    if (sameString(format, "bam"))
        slAddHead(&rep->bamList, vf);
    else if (sameString(format, "bigWig"))
        slAddHead(&rep->bigWigList, vf);
    else if (sameString(format, "narrowPeak") && !sameString(valid->outputType, "replicated_narrowPeak"))
        slAddHead(&rep->narrowList, vf);
    else if (sameString(format, "broadPeak") && !sameString(valid->outputType, "replicated_broadPeak"))
        slAddHead(&rep->broadList, vf);
    }
sqlFreeResult(&sr);
uglyf("Got %d vFiles\n", vCount);

dyStringFree(&q);

/* Free hash or return it, and return list. */
if (retHash == NULL)
    hashFree(&hash);
else
    *retHash = hash;
return fullList;
}
Ejemplo n.º 3
0
static struct bigBedInterval *bigBedIntervalsMatchingName(struct bbiFile *bbi, 
    struct fileOffsetSize *fosList, BbFirstWordMatch matcher, int fieldIx, 
    void *target, struct lm *lm)
/* Return list of intervals inside of sectors of bbiFile defined by fosList where the name 
 * matches target somehow. */
{
struct bigBedInterval *interval, *intervalList = NULL;
struct fileOffsetSize *fos;
boolean isSwapped = bbi->isSwapped;
for (fos = fosList; fos != NULL; fos = fos->next)
    {
    /* Read in raw data */
    udcSeek(bbi->udc, fos->offset);
    char *rawData = needLargeMem(fos->size);
    udcRead(bbi->udc, rawData, fos->size);

    /* Optionally uncompress data, and set data pointer to uncompressed version. */
    char *uncompressedData = NULL;
    char *data = NULL;
    int dataSize = 0;
    if (bbi->uncompressBufSize > 0)
	{
	data = uncompressedData = needLargeMem(bbi->uncompressBufSize);
	dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize);
	}
    else
	{
        data = rawData;
	dataSize = fos->size;
	}

    /* Set up for "memRead" routines to more or less treat memory block like file */
    char *blockPt = data, *blockEnd = data + dataSize;
    struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here


    /* Read next record into local variables. */
    while (blockPt < blockEnd)
	{
	bits32 chromIx = memReadBits32(&blockPt, isSwapped);
	bits32 s = memReadBits32(&blockPt, isSwapped);
	bits32 e = memReadBits32(&blockPt, isSwapped);
	int c;
	dyStringClear(dy);
	// TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ...
	while ((c = *blockPt++) >= 0)
	    {
	    if (c == 0)
		break;
	    dyStringAppendC(dy, c);
	    }
	if ((*matcher)(dy->string, fieldIx, target))
	    {
	    lmAllocVar(lm, interval);
	    interval->start = s;
	    interval->end = e;
	    interval->rest = cloneString(dy->string);
	    interval->chromId = chromIx;
	    slAddHead(&intervalList, interval);
	    }
	}

    /* Clean up temporary buffers. */
    dyStringFree(&dy);
    freez(&uncompressedData);
    freez(&rawData);
    }
slReverse(&intervalList);
return intervalList;
}
Ejemplo n.º 4
0
char *filterClause(char *db, char *table, char *chrom, char *extraClause)
/* Get filter clause (something to put after 'where')
 * for table */
{
struct sqlConnection *conn = NULL;
char varPrefix[128];
int varPrefixSize, fieldNameSize;
struct hashEl *varList, *var;
struct dyString *dy = NULL;
boolean needAnd = FALSE;
char oldDb[128];
char dbTableBuf[256];
char explicitDb[128];
char splitTable[256];
char explicitDbTable[512];

/* Return just extraClause (which may be NULL) if no filter on us. */
if (! (anyFilter() && filteredOrLinked(db, table)))
    return cloneString(extraClause);
safef(oldDb, sizeof(oldDb), "%s", db);
dbOverrideFromTable(dbTableBuf, &db, &table);
if (!sameString(oldDb, db))
     safef(explicitDb, sizeof(explicitDb), "%s.", db);
else
     explicitDb[0] = 0;

/* Cope with split table and/or custom tracks. */
if (isCustomTrack(table))
    {
    conn = hAllocConn(CUSTOM_TRASH);
    struct customTrack *ct = ctLookupName(table);
    safef(explicitDbTable, sizeof(explicitDbTable), "%s", ct->dbTableName);
    }
else
    {
    conn = hAllocConn(db);
    safef(splitTable, sizeof(splitTable), "%s_%s", chrom, table);
    if (!sqlTableExists(conn, splitTable))
	safef(splitTable, sizeof(splitTable), "%s", table);
    safef(explicitDbTable, sizeof(explicitDbTable), "%s%s",
	  explicitDb, splitTable);
    }

/* Get list of filter variables for this table. */
safef(varPrefix, sizeof(varPrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table);
varPrefixSize = strlen(varPrefix);
varList = cartFindPrefix(cart, varPrefix);
if (varList == NULL)
    {
    hFreeConn(&conn);
    return cloneString(extraClause);
    }

/* Create filter clause string, stepping through vars. */
dy = dyStringNew(0);
for (var = varList; var != NULL; var = var->next)
    {
    /* Parse variable name into field and type. */
    char field[64], *s, *type;
    s = var->name + varPrefixSize;
    type = strchr(s, '.');
    if (type == NULL)
        internalErr();
    fieldNameSize = type - s;
    if (fieldNameSize >= sizeof(field))
        internalErr();
    memcpy(field, s, fieldNameSize);
    field[fieldNameSize] = 0;
    sqlCkId(field);
    type += 1;
    /* rawLogic and rawQuery are handled below;
     * filterMaxOutputVar is not really a filter variable and is handled
     * in wiggle.c. */
    if (startsWith("raw", type) || sameString(filterMaxOutputVar, type))
	continue;
    /*	Any other variables that are missing a name:
     *		<varPrefix>..<type>
     *	are illegal
     */
    if (fieldNameSize < 1)
	{
	warn("Missing name in cart variable: %s\n", var->name);
	continue;
	}
    if (sameString(type, filterDdVar))
        {
	char *patVar = filterPatternVarName(db, table, field);
	struct slName *patList = cartOptionalSlNameList(cart, patVar);
	normalizePatList(&patList);
	if (slCount(patList) > 0)
	    {
	    char *ddVal = cartString(cart, var->name);
	    boolean neg = sameString(ddVal, ddOpMenu[1]);
	    char *fieldType = getSqlType(conn, explicitDbTable, field);
	    boolean needOr = FALSE;
	    if (needAnd) dyStringAppend(dy, " and ");
	    needAnd = TRUE;
	    if (neg) dyStringAppend(dy, "not ");
	    boolean composite = (slCount(patList) > 1);
	    if (composite || neg) dyStringAppendC(dy, '(');
	    struct slName *pat;
	    for (pat = patList;  pat != NULL;  pat = pat->next)
		{
		char *sqlPat = sqlLikeFromWild(pat->name);
		if (needOr)
		    dyStringAppend(dy, " OR ");
		needOr = TRUE;
		if (isSqlSetType(fieldType))
		    {
		    sqlDyStringPrintfFrag(dy, "FIND_IN_SET('%s', %s.%s)>0 ",
				   sqlPat, explicitDbTable , field);
		    }
		else
		    {
		    sqlDyStringPrintfFrag(dy, "%s.%s ", explicitDbTable, field);
		    if (sqlWildcardIn(sqlPat))
			dyStringAppend(dy, "like ");
		    else
			dyStringAppend(dy, "= ");
		    sqlDyStringPrintf(dy, "'%s'", sqlPat);
		    }
		freez(&sqlPat);
		}
	    if (composite || neg) dyStringAppendC(dy, ')');
	    }
	}
    else if (sameString(type, filterCmpVar))
        {
	char *patVar = filterPatternVarName(db, table, field);
	char *pat = trimSpaces(cartOptionalString(cart, patVar));
	char *cmpVal = cartString(cart, var->name);
	if (cmpReal(pat, cmpVal))
	    {
	    if (needAnd) dyStringAppend(dy, " and ");
	    needAnd = TRUE;
	    if (sameString(cmpVal, "in range"))
	        {
		char *words[2];
		int wordCount;
		char *dupe = cloneString(pat);

		wordCount = chopString(dupe, ", \t\n", words, ArraySize(words));
		if (wordCount < 2)	/* Fake short input */
		    words[1] = "2000000000";
		if (strchr(pat, '.')) /* Assume floating point */
		    {
		    double a = atof(words[0]), b = atof(words[1]);
		    sqlDyStringPrintfFrag(dy, "%s.%s >= %f && %s.%s <= %f",
		    	explicitDbTable, field, a, explicitDbTable, field, b);
		    }
		else
		    {
		    int a = atoi(words[0]), b = atoi(words[1]);
		    sqlDyStringPrintfFrag(dy, "%s.%s >= %d && %s.%s <= %d",
		    	explicitDbTable, field, a, explicitDbTable, field, b);
		    }
		freez(&dupe);
		}
	    else
	        {
		// cmpVal has been checked already above in cmpReal for legal values.
		sqlDyStringPrintfFrag(dy, "%s.%s %-s ", explicitDbTable, field, cmpVal);
		if (strchr(pat, '.'))	/* Assume floating point. */
		    dyStringPrintf(dy, "%f", atof(pat));
		else
		    dyStringPrintf(dy, "%d", atoi(pat));
		}
	    }
	}
    }
/* Handle rawQuery if any */
    {
    char *varName;
    char *logic, *query;
    varName = filterFieldVarName(db, table, "", filterRawLogicVar);
    logic = cartUsualString(cart, varName, logOpMenu[0]);
    varName = filterFieldVarName(db, table, "", filterRawQueryVar);
    query = trimSpaces(cartOptionalString(cart, varName));
    if (query != NULL && query[0] != 0)
        {
	if (needAnd) dyStringPrintf(dy, " %s ", logic);
	sqlSanityCheckWhere(query, dy);
	}
    }

/* Clean up and return */
hFreeConn(&conn);
hashElFreeList(&varList);
if (dy->stringSize == 0)
    {
    dyStringFree(&dy);
    return cloneString(extraClause);
    }
else
    {
    if (isNotEmpty(extraClause))
	dyStringPrintf(dy, " and %s", extraClause);
    return dyStringCannibalize(&dy);
    }
}
Ejemplo n.º 5
0
void verifyGreatAssemblies()
{
// First read in the assembly name and description information into name lists
struct slName* supportedAssemblies = NULL;
struct lineFile *lf = lineFileOpen(greatData, TRUE);
int fieldCount = 1;
char* row[fieldCount];
int wordCount;
while ((wordCount = lineFileChopTab(lf, row)) != 0)
	{
	if (wordCount != fieldCount)
		errAbort("The %s file is not properly formatted.\n", greatData);
	slNameAddHead(&supportedAssemblies, row[0]);
	}
lineFileClose(&lf);

boolean invalidAssembly = TRUE;
struct slName* currAssembly;
for (currAssembly = supportedAssemblies; currAssembly != NULL; currAssembly = currAssembly->next)
	{
	if (!hDbIsActive(currAssembly->name))
		{
		errAbort("Assembly %s in supported assembly file is not an active assembly.\n", currAssembly->name);
		}
	if (sameOk(database, currAssembly->name))
		{
		invalidAssembly = FALSE;
		break;
		}
	}

if (invalidAssembly)
    {
	slReverse(&supportedAssemblies);
	currAssembly = supportedAssemblies;
	struct dyString* dy = dyStringNew(0);
	addAssemblyToSupportedList(dy, currAssembly->name);

	currAssembly = currAssembly->next;
	while (currAssembly != NULL)
		{
		dyStringAppend(dy, ", ");
		if (currAssembly->next == NULL)
			dyStringAppend(dy, "and ");
		addAssemblyToSupportedList(dy, currAssembly->name);
		currAssembly = currAssembly->next;
		}

    hPrintf("<script type='text/javascript'>\n");
    hPrintf("function logSpecies() {\n");
    hPrintf("try {\n");
    hPrintf("var r = new XMLHttpRequest();\n");
    hPrintf("r.open('GET', 'http://great.stanford.edu/public/cgi-bin/logSpecies.php?species=%s');\n", database);
    hPrintf("r.send(null);\n");
    hPrintf("} catch (err) { }\n");
    hPrintf("}\n");
    hPrintf("window.onload = logSpecies;\n");
    hPrintf("</script>\n");
    errAbort("GREAT only supports the %s assemblies."
    "\nPlease go back and ensure that one of those assemblies is chosen.",
	dyStringContents(dy));
    htmlClose();
	dyStringFree(&dy);
    }

slNameFreeList(&supportedAssemblies);
}
Ejemplo n.º 6
0
boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr,
				boolean *chunked, int *contentLength)
/* Extract HTTP response header from lf into hdr, tell if it's 
 * "Transfer-Encoding: chunked" or if it has a contentLength. */
{
  struct dyString *header = newDyString(1024);
  char *line;
  int lineSize;

  if (chunked != NULL)
    *chunked = FALSE;
  if (contentLength != NULL)
    *contentLength = -1;
  dyStringClear(header);
  if (lineFileNext(lf, &line, &lineSize))
    {
      if (startsWith("HTTP/", line))
	{
	char *version, *code;
	dyStringAppendN(header, line, lineSize-1);
	dyStringAppendC(header, '\n');
	version = nextWord(&line);
	code = nextWord(&line);
	if (code == NULL)
	    {
	    warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
	    *hdr = cloneString(header->string);
	    dyStringFree(&header);
	    return FALSE;
	    }
	if (!sameString(code, "200"))
	    {
	    warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line);
	    *hdr = cloneString(header->string);
	    dyStringFree(&header);
	    return FALSE;
	    }
	while (lineFileNext(lf, &line, &lineSize))
	    {
	    /* blank line means end of HTTP header */
	    if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
	        break;
	    if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL)
	        *chunked = TRUE;
	    dyStringAppendN(header, line, lineSize-1);
	    dyStringAppendC(header, '\n');
	    if (strstr(line, "Content-Length:"))
	      {
		code = nextWord(&line);
		code = nextWord(&line);
		if (contentLength != NULL)
		    *contentLength = atoi(code);
	      }
	    }
	}
      else
	{
	  /* put the line back, don't put it in header/hdr */
	  lineFileReuse(lf);
	  warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
	  *hdr = cloneString(header->string);
	  dyStringFree(&header);
	  return FALSE;
	}
    }
  else
    {
      *hdr = cloneString(header->string);
      dyStringFree(&header);
      return FALSE;
    }

  *hdr = cloneString(header->string);
  dyStringFree(&header);
  return TRUE;
} /* lineFileParseHttpHeader */
Ejemplo n.º 7
0
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom,
	bits32 start, bits32 end, int maxItems, struct lm *lm)
/* Get data for interval.  Return list allocated out of lm.  Set maxItems to maximum
 * number of items to return, or to 0 for all items. */
{
struct bigBedInterval *el, *list = NULL;
int itemCount = 0;
bbiAttachUnzoomedCir(bbi);
bits32 chromId;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir,
	chrom, start, end, &chromId);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct dyString *dy = dyStringNew(32);

/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bbi->uncompressBufSize > 0)
    uncompressBuf = needLargeMem(bbi->uncompressBufSize);

for (block = blockList; block != NULL; )
    {
    /* Find contigious blocks and read them into mergedBuf. */
    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
    bits64 mergedOffset = block->offset;
    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
    udcSeek(udc, mergedOffset);
    char *mergedBuf = needLargeMem(mergedSize);
    udcMustRead(udc, mergedBuf, mergedSize);
    char *blockBuf = mergedBuf;

    /* Loop through individual blocks within merged section. */
    for (;block != afterGap; block = block->next)
        {
	/* Uncompress if necessary. */
	char *blockPt, *blockEnd;
	if (uncompressBuf)
	    {
	    blockPt = uncompressBuf;
	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize);
	    blockEnd = blockPt + uncSize;
	    }
	else
	    {
	    blockPt = blockBuf;
	    blockEnd = blockPt + block->size;
	    }

	while (blockPt < blockEnd)
	    {
	    /* Read next record into local variables. */
	    bits32 chr = memReadBits32(&blockPt, isSwapped);	// Read and discard chromId
	    bits32 s = memReadBits32(&blockPt, isSwapped);
	    bits32 e = memReadBits32(&blockPt, isSwapped);
	    int c;
	    dyStringClear(dy);
	    while ((c = *blockPt++) >= 0)
		{
		if (c == 0)
		    break;
		dyStringAppendC(dy, c);
		}

	    /* If we're actually in range then copy it into a new  element and add to list. */
	    if (chr == chromId && rangeIntersection(s, e, start, end) > 0)
		{
		++itemCount;
		if (maxItems > 0 && itemCount > maxItems)
		    break;

		lmAllocVar(lm, el);
		el->start = s;
		el->end = e;
		if (dy->stringSize > 0)
		    el->rest = lmCloneString(lm, dy->string);
		slAddHead(&list, el);
		}
	    }
	if (maxItems > 0 && itemCount > maxItems)
	    break;
	blockBuf += block->size;
        }
    if (maxItems > 0 && itemCount > maxItems)
        break;
    freez(&mergedBuf);
    }
freeMem(uncompressBuf);
dyStringFree(&dy);
slFreeList(&blockList);
slReverse(&list);
return list;
}
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db)
/* process isPcr results  */
{

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("mrna.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
boolean more = lineFileNext(lf, &line, &lineSize);
while(more)
    {
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line+1);
    verbose(2,"name=%s\n",name);
    dyStringClear(dy);
    while((more=lineFileNext(lf, &line, &lineSize)))
	{
	if (line[0] == '>')
	    {
	    break;
	    }
	dyStringAppend(dy,line);	    
	}
    dna = cloneString(dy->string);

    while(1)
	{
	int oldProbe = 0;
	dyStringClear(dy);
	dyStringPrintf(dy, "select id from vgPrb "
	   "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name);
	oldProbe = sqlQuickNum(conn,dy->string);
	if (oldProbe==0)
	    break;       /* no more records match */
	    
	/* record exists and hasn't already been updated */
	
	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq = '");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " db = '%s',\n", db);
	    dyStringAppend(dy, " state = 'seq'\n");
	    dyStringPrintf(dy, " where id=%d\n", oldProbe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe);
	    sqlUpdate(conn, dy->string);
	    }
	    
	}    

    freez(&name);
    freez(&dna);
    }
lineFileClose(&lf);

dyStringFree(&dy);
}
static void doAccessionsSeq(struct sqlConnection *conn, int taxon, char *db)
/* get probe seq from Accessions */
{
int rc = 0;
struct dyString *dy = dyStringNew(0);

/* get refSeq accessions and rna */
setTName(conn, taxon, db, "refSeq", "refSeq");
rc = getAccMrnas(conn, taxon, db, "refSeq", "refSeqAli");
verbose(1,"rc = %d = count of refSeq mrna for %s\n",rc,db);
advanceType(conn,taxon,"refSeq","genRef");

/* get refSeq-in-gene.genbank accessions and rna */
setTName(conn, taxon, db, "genRef", "genbank");
rc = getAccMrnas(conn, taxon, db, "genRef", "refSeqAli");
verbose(1,"rc = %d = count of genRef mrna for %s\n",rc,db);
advanceType(conn,taxon,"genRef","genbank");

/* get genbank accessions and rna */
setTName(conn, taxon, db, "genbank", "genbank");
rc = getAccMrnas(conn, taxon, db, "genbank", "all_mrna");
verbose(1,"rc = %d = count of genbank mrna for %s\n",rc,db);
advanceType(conn,taxon,"genbank","flatRef");

/* get gene.name -> refFlat to refSeq accessions and rna */
setTNameMapped(conn, taxon, db, "flatRef", "name", "refFlat", "geneName", "name");
rc = getAccMrnas(conn, taxon, db, "flatRef", "refSeqAli");
verbose(1,"rc = %d = count of flatRef mrna for %s\n",rc,db);
advanceType(conn,taxon,"flatRef","flatAll");

/* get gene.name -> refFlat to all_mrna accessions */
setTNameMapped(conn, taxon, db, "flatAll", "name", "refFlat", "geneName", "name");
rc = getAccMrnas(conn, taxon, db, "flatAll", "all_mrna");
verbose(1,"rc = %d = count of flatAll mrna for %s\n",rc,db);
advanceType(conn,taxon,"flatAll","linkRef");



/* get gene.name -> refLink to refSeq accessions and rna */
setTNameMapped(conn, taxon, db, "linkRef", "name", "refLink", "name", "mrnaAcc");
rc = getAccMrnas(conn, taxon, db, "linkRef", "refSeqAli");
verbose(1,"rc = %d = count of linkRef mrna for %s\n",rc,db);
advanceType(conn,taxon,"linkRef","linkAll");

/* get gene.name -> refLink to all_mrna accessions */
setTNameMapped(conn, taxon, db, "linkAll", "name", "refLink", "name", "mrnaAcc");
rc = getAccMrnas(conn, taxon, db, "linkAll", "all_mrna");
verbose(1,"rc = %d = count of linkAll mrna for %s\n",rc,db);
advanceType(conn,taxon,"linkAll","kgAlRef");



/* get gene.name -> kgAlias to refSeq accessions and rna */
setTNameMapped(conn, taxon, db, "kgAlRef", "name", "kgAlias", "alias", "kgId");
rc = getAccMrnas(conn, taxon, db, "kgAlRef", "refSeqAli");
verbose(1,"rc = %d = count of kgAlRef mrna for %s\n",rc,db);
advanceType(conn,taxon,"kgAlRef","kgAlAll");

/* get gene.name -> kgAlias to all_mrna accessions */
setTNameMapped(conn, taxon, db, "kgAlAll", "name", "kgAlias", "alias", "kgId");
rc = getAccMrnas(conn, taxon, db, "kgAlAll", "all_mrna");
verbose(1,"rc = %d = count of kgAlAll mrna for %s\n",rc,db);
advanceType(conn,taxon,"kgAlAll","gene");

dyStringFree(&dy);
}
static int doBacs(struct sqlConnection *conn, int taxon, char *db)
/* fetch available sequence for bacEndPairs */
{
struct dyString *dy = dyStringNew(0);
struct dnaSeq *chromSeq = NULL;
struct bac *bacs = bacRead(conn, taxon, db);
struct bac *bac = NULL;
char *chrom = cloneString("");
int count = 0;

verbose(1,"bac list read done.\n");

for(bac=bacs;bac;bac=bac->next)
    {
    
    if (differentWord(chrom,bac->chrom))
	{
	verbose(1,"switching to chrom %s\n",bac->chrom);
	dnaSeqFree(&chromSeq); 
	chromSeq = hLoadChrom(bac->chrom,db);
	freez(&chrom);
	chrom = cloneString(bac->chrom);
	}

    char *dna = checkAndFetchBacDna(chromSeq, bac);
    if (sameString(bac->strand,"-"))
	{
	reverseComplement(dna,strlen(dna));
	}


    dyStringClear(dy);
    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",bac->probe);
    if (sqlQuickNum(conn,dy->string)>0)
	{
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", bac->chrom);
	    dyStringPrintf(dy, " tStart=%d,\n", bac->chromStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", bac->chromEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", bac->strand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", bac->probe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    //verbose(2, "%s\n", dy->string); // the sql string could be quite large
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,bac->probe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",bac->probe);
	    sqlUpdate(conn, dy->string);
	    }
	    
	++count; 
	
    
	verbose(2,"%d finished bac for probe id %d size %d\n", 
	    count, bac->probe, bac->chromEnd - bac->chromStart);
	}

    freez(&dna);
    }

freez(&chrom);
dnaSeqFree(&chromSeq);

bacFreeList(&bacs);

dyStringFree(&dy);

return count;  
}
static void doPrimers(struct sqlConnection *conn, int taxon, char *db)
/* get probe seq from primers */
{
int rc = 0;
struct dyString *dy = dyStringNew(0);
char cmdLine[256];
char path1[256];
char path2[256];

dyStringClear(dy);
dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g");
dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon);
dyStringAppend(dy, " and e.state = 'new' and e.type='primersMrna'");
rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE);
verbose(1,"rc = %d = count of primers for mrna search for taxon %d\n",rc,taxon);

if (rc > 0) /* something to do */
    {

    dyStringClear(dy);
    dyStringPrintf(dy, "select qName from %s.all_mrna",db);
    rc = 0;
    rc = sqlSaveQuery(conn, dy->string, "accFile.txt", FALSE);
    safef(cmdLine,sizeof(cmdLine),"getRna %s accFile.txt mrna.fa",db);
    system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");
    
    verbose(1,"rc = %d = count of mrna for %s\n",rc,db);

    system("date"); system("isPcr mrna.fa primers.query isPcr.fa -out=fa"); system("date");
    system("ls -l");

    processIsPcr(conn,taxon,db);

    unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa");

    }
unlink("primers.query");    

/* find any remaining type primersMrna that couldn't be resolved and demote 
 * them to type primersGenome
 */
dyStringClear(dy);
dyStringAppend(dy, "update vgPrb set type='primersGenome'"); 
dyStringPrintf(dy, " where taxon = %d",taxon);
dyStringAppend(dy, " and state = 'new' and type='primersMrna'");
sqlUpdate(conn, dy->string);



/* get primers for those probes that did not find mrna isPcr matches 
 * and then do them against the genome instead */
dyStringClear(dy);
dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g");
dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon);
dyStringAppend(dy, " and e.state = 'new' and e.type='primersGenome'");
rc = 0;
rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE);
verbose(1,"rc = %d = count of primers for genome search for taxon %d\n",rc,taxon);

if (rc > 0) /* something to do */
    {
    safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db);
    safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db);
    verbose(1,"copy: [%s] to [%s]\n",path1,path2);  copyFile(path1,path2);

    safef(cmdLine,sizeof(cmdLine),
	    "ssh kolossus 'cd %s; isPcr %s.2bit primers.query isPcr.fa -out=fa'",
	    getCurrentDir(),db);
    system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");
    safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db);
    verbose(1,"rm %s\n",path2); unlink(path2); system("ls -l");

    processIsPcr(conn,taxon,db);
    
    unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa");

    }
unlink("primers.query");    

/* find any remaining type primersGenome that couldn't be resolved and demote 
 * them to type refSeq
 */
dyStringClear(dy);
dyStringAppend(dy, "update vgPrb set type='refSeq'"); 
dyStringPrintf(dy, " where taxon = %d",taxon);
dyStringAppend(dy, " and state = 'new' and type='primersGenome'");
sqlUpdate(conn, dy->string);

dyStringFree(&dy);
}
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db)
/* process isPcr results  */
{

/* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
char *word, *end;
char *tName;
int tStart;
int tEnd;
char *tStrand;
int probeid=0;  /* really a vgPrb id */
boolean more = lineFileNext(lf, &line, &lineSize);
while(more)
    {
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line);
    verbose(1,"name=%s\n",name);
    dyStringClear(dy);
    while((more=lineFileNext(lf, &line, &lineSize)))
	{
	if (line[0] == '>')
	    {
	    break;
	    }
	dyStringAppend(dy,line);	    
	}
    dna = cloneString(dy->string);
    word = name+1;
    end = strchr(word,':');
    tName = cloneStringZ(word,end-word); 
    word = end+1;
    end = strchr(word,'+');
    tStrand = "+";
    if (!end)
	{
	end = strchr(word,'-');
	tStrand = "-";
	}
    tStart = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    tEnd = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    probeid = atoi(word); 

    dyStringClear(dy);
    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid);
    if (sqlQuickNum(conn,dy->string)>0)
	{
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", tName);
	    dyStringPrintf(dy, " tStart=%d,\n", tStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", probeid);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid);
	    sqlUpdate(conn, dy->string);
	    }
	}
    
    freez(&tName);
    freez(&name);
    freez(&dna);
    }
lineFileClose(&lf);

dyStringFree(&dy);
}
static void populateMissingVgPrb(struct sqlConnection *conn)
/* populate vgPrb where missing, usually after new records added to visiGene */
{
struct sqlResult *sr;
char **row;
struct dyString *dy = dyStringNew(0);
struct sqlConnection *conn2 = sqlConnect(database);
struct sqlConnection *conn3 = sqlConnect(database);
int probeCount=0, vgPrbCount=0;

dyStringAppend(dy, 
"select p.id,p.gene,antibody,probeType,fPrimer,rPrimer,p.seq,bac,g.taxon"
" from probe p join gene g"
" left join vgPrbMap m on m.probe = p.id"
" where g.id = p.gene"
"   and m.probe is NULL");
sr = sqlGetResult(conn, dy->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    int id = sqlUnsigned(row[0]); 
    /* int gene = sqlUnsigned(row[1]); */
    /* int antibody = sqlUnsigned(row[2]); */
    /* int probeType = sqlUnsigned(row[3]); */
    char *fPrimer = row[4]; 
    char *rPrimer = row[5]; 
    char *seq = row[6]; 
    int bac = sqlUnsigned(row[7]); 
    int taxon = sqlUnsigned(row[8]); 

    char *peType = "none";
    int peProbe = id;
    char *peSeq = seq;
    char *tName = "";
    int tStart = 0;
    int tEnd = 0;
    char *tStrand = " ";
    /*
    char *peGene = "";
    int bacInfo = 0;
    int seqid = 0;
    int pslid = 0;
    */
    char *state = "new";
    char *db = "";
    int vgPrb = 0;

    if (isNotEmpty(seq))
    	{
	peType = "probe";
	state = "seq";
	}
    else if (isNotEmpty(fPrimer) && isNotEmpty(rPrimer))
    	{
	peType = "primersMrna";
	}
    else if (isNotEmpty(fPrimer) && isEmpty(rPrimer))
    	{ /* only have fPrimer, it's probably a comment, not dna seq */
	peType = "refSeq";   /* use accession or gene */
	}
    else if (bac > 0)
    	{
	peType = "bac";   /* use bacEndPairs */
	}
    else	    
    	{
	peType = "refSeq";   /* use accession or gene */
	}

    if (!sameString(peSeq,""))
	{
	vgPrb = findVgPrbBySeq(conn3,peSeq,taxon);
	}

    if (vgPrb == 0)
	{
	dyStringClear(dy);
	dyStringAppend(dy, "insert into vgPrb set");
	dyStringPrintf(dy, " id=default,\n");
	dyStringPrintf(dy, " type='%s',\n", peType);
	dyStringAppend(dy, " seq='");
	dyStringAppend(dy, peSeq);
	dyStringAppend(dy, "',\n");
	dyStringPrintf(dy, " tName='%s',\n", tName);
	dyStringPrintf(dy, " tStart=%d,\n", tStart);
	dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	dyStringPrintf(dy, " db='%s',\n", db);
	dyStringPrintf(dy, " taxon='%d',\n", taxon);
	dyStringPrintf(dy, " state='%s'\n", state);
	verbose(2, "%s\n", dy->string);
	sqlUpdate(conn2, dy->string);
	vgPrb = sqlLastAutoId(conn2);
	vgPrbCount++;
	}
	
    dyStringClear(dy);
    dyStringAppend(dy, "insert into vgPrbMap set");
    dyStringPrintf(dy, " probe=%d,\n", peProbe);
    dyStringPrintf(dy, " vgPrb=%d \n", vgPrb);
    verbose(2, "%s\n", dy->string);
    sqlUpdate(conn2, dy->string);

    probeCount++;
	
    }

verbose(1, "# new probe records found = %d, # new vgPrb records added = %d\n", probeCount, vgPrbCount);

dyStringFree(&dy);
    
sqlFreeResult(&sr);

sqlDisconnect(&conn3);
sqlDisconnect(&conn2);

}
static void doSeqAndExtFile(struct sqlConnection *conn, char *db, char *table)
{
int rc = 0;
char cmd[256];
char path[256];
char bedPath[256];
char gbdbPath[256];
char *fname=NULL;
struct dyString *dy = dyStringNew(0);
dyStringClear(dy);
dyStringPrintf(dy, 
"select distinct concat('vgPrb_',e.id), e.seq"
" from vgPrb e join %s.%s v"
" left join %s.seq s on s.acc = v.qName"
" where concat('vgPrb_',e.id) = v.qName"
" and s.acc is NULL"
" order by e.id"
    , db, table, db);
rc = sqlSaveQuery(conn, dy->string, "vgPrbExt.fa", TRUE);
verbose(1,"rc = %d = count of sequences for vgPrbExt.fa, to use with %s track %s\n",rc,db,table);
if (rc > 0)  /* can set any desired minimum */
    {
    safef(bedPath,sizeof(bedPath),"/cluster/data/%s/bed/visiGene/",db);
    if (!fileExists(bedPath))
	{
	safef(cmd,sizeof(cmd),"mkdir %s",bedPath);
	verbose(1,"%s\n",cmd); system(cmd);
	}
    
    safef(gbdbPath,sizeof(gbdbPath),"/gbdb/%s/visiGene/",db);
    if (!fileExists(gbdbPath))
	{
	safef(cmd,sizeof(cmd),"mkdir %s",gbdbPath);
    	verbose(1,"%s\n",cmd); system(cmd);
	}
   
    while(1)
	{
	int i=0;
	safef(path,sizeof(path),"%svgPrbExt_AAAAAA.fa",bedPath);
        char *c = rStringIn("AAAAAA",path);
        srand( (unsigned)time( NULL ) );
        for(i=0;i<6;++i)
            {
            *c++ += (int) 26 * (rand() / (RAND_MAX + 1.0));
            }
	if (!fileExists(path))
	    break;
	}

    
    safef(cmd,sizeof(cmd),"cp vgPrbExt.fa %s",path);
    verbose(1,"%s\n",cmd); system(cmd);
    
    fname = rStringIn("/", path);
    ++fname;
    
    safef(cmd,sizeof(cmd),"ln -s %s %s%s",path,gbdbPath,fname);
    verbose(1,"%s\n",cmd); system(cmd);
    
    safef(cmd,sizeof(cmd),"hgLoadSeq %s %s%s", db, gbdbPath,fname);
    verbose(1,"%s\n",cmd); system(cmd);
    }

dyStringFree(&dy);
}
Ejemplo n.º 15
0
/* write here first, then move to hg/lib/genePred.c */
void printExons(char *snpName, struct genePred *gene, char *chrom, int start, int end, struct dnaSeq *seq)
{
int iExon = 0;
// which exon 
int startExon = 0;
int endExon = 0;
struct dyString *dy = NULL;
char *seqBuffer;
int size;
char *ptr = seq->dna;
// actual nucleotide positions
int exonStart = 0;
int exonEnd = 0;

// arg checking
if (start > end) 
    {
    fprintf(stderr, "error with %s (start exceeds end)\n", gene->name);
    return;
    }

startExon = findExonPos(gene, start);
endExon = findExonPos(gene, end);

// more checking
if (startExon == -1 || endExon == -1) 
    {
    fprintf(stderr, "error with %s (startExon = %d; endExon = %d)\n", gene->name, startExon, endExon);
    return;
    }

// simple case
if (startExon == endExon)
    {
    // printf("simple case; all in one exon\n");
    size = end - start + 1;
    seqBuffer = needMem(size);
    strncpy(seqBuffer, &ptr[start], size);
    printf("> %s %s:%d-%d (%s)\n", gene->name, chrom, start, end, snpName);
    printLines(stdout, seqBuffer, 50);
    freeMem(seqBuffer);
    // *seqBuffer = 0;
    return;
    }

// printf("not simple case; flank in multiple exons\n");
// printf("startExon = %d; endExon = %d\n", startExon, endExon);
// append to dyString
dy = newDyString(512);

// remainder of first exon
exonEnd = gene->exonEnds[startExon-1];
size = exonEnd - start + 1;
seqBuffer = needMem(size);
strncpy(seqBuffer, &ptr[start], size);
dyStringPrintf(dy, "%s", seqBuffer);
freeMem(seqBuffer);

// middle exons
for (iExon = startExon + 1; iExon < endExon; iExon++)
    {
        exonStart = gene->exonStarts[iExon-1];
        exonEnd = gene->exonEnds[iExon-1];
	size = exonEnd - exonStart + 1;
	seqBuffer = needMem(size);
	strncpy(seqBuffer, &ptr[exonStart], size);
        dyStringPrintf(dy, "%s", seqBuffer);
        freeMem(seqBuffer);
    }

// start of last exon
exonStart = gene->exonStarts[endExon-1];
size = end - exonStart + 1;
seqBuffer = needMem(size);
strncpy(seqBuffer, &ptr[exonStart], size);
dyStringPrintf(dy, "%s", seqBuffer);
freeMem(seqBuffer);

printf("> %s %s:%d-%d (%s)\n", gene->name, chrom, start, end, snpName);
printLines(stdout, dy->string, 50);

dyStringFree(&dy);

}
Ejemplo n.º 16
0
static void asdDoQueryChunking(struct annoStreamDb *self, char *minChrom, uint minEnd)
/* Return a sqlResult for a query on table items in position range.
 * If doing a whole genome query, just select all rows from table. */
{
struct annoStreamer *sSelf = &(self->streamer);
boolean hasWhere = FALSE;
struct dyString *query = self->makeBaselineQuery(self, &hasWhere);
if (sSelf->chrom != NULL && self->rowBuf.size > 0 && !self->doNextChunk)
    {
    // We're doing a region query, we already got some rows, and don't need another chunk:
    resetRowBuf(&self->rowBuf);
    self->eof = TRUE;
    }
if (self->useMaxOutRows)
    {
    self->maxOutRows -= self->rowBuf.size;
    if (self->maxOutRows <= 0)
	self->eof = TRUE;
    }
if (self->eof)
    return;
int queryMaxItems = ASD_CHUNK_SIZE;
if (self->useMaxOutRows && self->maxOutRows < queryMaxItems)
    queryMaxItems = self->maxOutRows;
if (self->hasBin)
    {
    // Results will be in bin order, but we can restore chromStart order by
    // accumulating initial coarse-bin items and merge-sorting them with
    // subsequent finest-bin items which will be in chromStart order.
    if (self->doNextChunk && self->mergeBins && !self->gotFinestBin)
	errAbort("annoStreamDb %s: can't continue merge in chunking query; "
		 "increase ASD_CHUNK_SIZE", sSelf->name);
    self->mergeBins = TRUE;
    if (self->qLm == NULL)
	self->qLm = lmInit(0);
    }
if (self->endFieldIndexName != NULL)
    // Don't let mysql use a (chrom, chromEnd) index because that messes up
    // sorting by chromStart.
    sqlDyStringPrintf(query, " IGNORE INDEX (%s) ", self->endFieldIndexName);
if (sSelf->chrom != NULL)
    {
    uint start = sSelf->regionStart;
    if (minChrom)
	{
	if (differentString(minChrom, sSelf->chrom))
	    errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'",
		     sSelf->name, minChrom, sSelf->chrom);
	if (start < minEnd)
	    start = minEnd;
	}
    if (self->doNextChunk && start < self->nextChunkStart)
	start = self->nextChunkStart;
    sqlDyStringAppend(query, hasWhere ? " and " : " where ");
    sqlDyStringPrintf(query, "%s = '%s' and ", self->chromField, sSelf->chrom);
    if (self->hasBin)
	{
	if (self->doNextChunk && self->gotFinestBin)
	    // It would be way more elegant to make a hAddBinTopLevelOnly but this will do:
	    dyStringPrintf(query, "bin > %d and ", self->minFinestBin);
	hAddBinToQuery(start, sSelf->regionEnd, query);
	}
    if (self->doNextChunk)
	sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart);
    sqlDyStringPrintf(query, "%s < %u and %s > %u ", self->startField, sSelf->regionEnd,
		      self->endField, start);
    if (self->notSorted)
	sqlDyStringPrintf(query, "order by %s ", self->startField);
    sqlDyStringPrintf(query, "limit %d", queryMaxItems);
    bufferRowsFromSqlQuery(self, query->string, queryMaxItems);
    if (self->rowBuf.size == 0)
	self->eof = TRUE;
    }
else
    {
    // Genome-wide query: break it into chrom-by-chrom queries.
    if (self->queryChrom == NULL)
	self->queryChrom = self->chromList;
    else if (!self->doNextChunk)
	{
	self->queryChrom = self->queryChrom->next;
	resetMergeState(self);
	}
    if (minChrom != NULL)
	{
	// Skip chroms that precede minChrom
	while (self->queryChrom != NULL && strcmp(self->queryChrom->name, minChrom) < 0)
	    {
	    self->queryChrom = self->queryChrom->next;
	    self->doNextChunk = FALSE;
	    resetMergeState(self);
	    }
	if (self->hasBin)
	    {
	    self->mergeBins = TRUE;
	    if (self->qLm == NULL)
		self->qLm = lmInit(0);
	    }
	}
    if (self->queryChrom == NULL)
	self->eof = TRUE;
    else
	{
	char *chrom = self->queryChrom->name;
	int start = 0;
	if (minChrom != NULL && sameString(chrom, minChrom))
	    start = minEnd;
	if (self->doNextChunk && start < self->nextChunkStart)
	    start = self->nextChunkStart;
	uint end = annoAssemblySeqSize(self->streamer.assembly, self->queryChrom->name);
	sqlDyStringAppend(query, hasWhere ? " and " : " where ");
	sqlDyStringPrintf(query, "%s = '%s' ", self->chromField, chrom);
	if (start > 0 || self->doNextChunk)
	    {
	    dyStringAppend(query, "and ");
	    if (self->hasBin)
		{
		if (self->doNextChunk && self->gotFinestBin)
		    // It would be way more elegant to make a hAddBinTopLevelOnly but this will do:
		    dyStringPrintf(query, "bin > %d and ", self->minFinestBin);
		hAddBinToQuery(start, end, query);
		}
	    if (self->doNextChunk)
		sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart);
	    // region end is chromSize, so no need to constrain startField here:
	    sqlDyStringPrintf(query, "%s > %u ", self->endField, start);
	    }
	if (self->notSorted)
	    sqlDyStringPrintf(query, "order by %s ", self->startField);
	dyStringPrintf(query, "limit %d", queryMaxItems);
	bufferRowsFromSqlQuery(self, query->string, queryMaxItems);
	// If there happens to be no items on chrom, try again with the next chrom:
	if (! self->eof && self->rowBuf.size == 0)
	    asdDoQueryChunking(self, minChrom, minEnd);
	}
    }
dyStringFree(&query);
}
Ejemplo n.º 17
0
void syncOneRecord(struct sqlConnection *conn, char *type, struct jsonWrite *json, 
    char *table, long long id)
/* Send over one record and save UUID result to row of table defined by id in idField. */
{
/* Construct dyString for URL */
struct dyString *dyUrl = dyStringNew(0);
dyStringPrintf(dyUrl, "http://%s:%s@%s/%s/", gUserId, gPassword, gHost, type);
verbose(2, "%s\n", dyUrl->string);

/* Construct dyString for http header */
struct dyString *dyHeader = dyStringNew(0);
dyStringPrintf(dyHeader, "Content-length: %d\r\n", json->dy->stringSize);
dyStringPrintf(dyHeader, "Content-type: text/javascript\r\n");

/* Send header and then JSON */
int sd = netOpenHttpExt(dyUrl->string, "POST", dyHeader->string);
mustWriteFd(sd, json->dy->string, json->dy->stringSize);

/* Grab response */
struct dyString *dyText = netSlurpFile(sd);
close(sd);
uglyf("%s\n", dyText->string);

/* Turn it into htmlPage structure - this will also parse out http header */
struct htmlPage *response = htmlPageParse(dyUrl->string, dyText->string);
uglyf("status %s %d\n", response->status->version, response->status->status);

/* If we got bad status abort with hopefully very informative message. */
int status = response->status->status;
if (status != 200 && status != 201)  // HTTP codes
    {
    errAbort("ERROR - Metadatabase returns %d to our post request\n"
	     "POSTED JSON: %s\n"
             "URL: %s\n"
	     "FULL RESPONSE: %s\n",  status, json->dy->string, dyUrl->string, dyText->string);
    }

/* Parse uuid out of json response.  It should look something like
	{
	"status": "success", 
	"@graph": [
	     {
	     "description": "The macs2 peak calling software from Tao Liu.", 
	     "name": "macs2", "title": "macs2", "url": "https://github.com/taoliu/MACS/", 
	     "uuid": "9bda84fd-9872-49e3-9aa0-b71adbf9f31d", 
	     "schema_version": "1", 
	     "source_url": "https://github.com/taoliu/MACS/", 
	     "references": [], 
	     "@id": "/software/9bda84fd-9872-49e3-9aa0-b71adbf9f31d/", 
	     "@type": ["software", "item"], 
	     "aliases": []
	     }
	],     
	"@type": ["result"]
	}
*/
struct jsonElement *jsonRoot = jsonParse(response->htmlText);
struct jsonElement *graph = jsonMustFindNamedField(jsonRoot, "", "@graph");
struct slRef *ref = jsonListVal(graph, "@graph");
assert(slCount(ref) == 1);
struct jsonElement *graphEl = ref->val;
char *uuid = jsonStringField(graphEl, "uuid");
uglyf("Got uuid %s\n", uuid);

/* Save uuid to table */
char query[256];
sqlSafef(query, sizeof(query),
    "update %s set metaUuid='%s' where id=%lld", table, uuid, id);
sqlUpdate(conn, query);

/* Clean up */
dyStringFree(&dyUrl);
dyStringFree(&dyHeader);
dyStringFree(&dyText);
response->fullText = NULL;  // avoid double free of this
htmlPageFree(&response);
}
Ejemplo n.º 18
0
struct tagStorm *idfToStormTop(char *fileName)
/* Convert an idf.txt format file to a tagStorm with a single top-level stanza */
{
/* Create a tag storm with one as yet empty stanza */
struct tagStorm *storm = tagStormNew(fileName);
struct tagStanza *stanza = tagStanzaNew(storm, NULL);

/* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */
char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data";
struct dyString *additionalFileDy = dyStringNew(0);

/* There can be multiple secondary accession tags, so handle these too */
char *secondaryAccessionTag = "idf.Comment_SecondaryAccession";
struct dyString *secondaryAccessionDy = dyStringNew(0);


/* Parse lines from idf file into stanza */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct dyString *dyVal = dyStringNew(0);
while (lineFileNextReal(lf, &line))
    {
    /* Erase trailing tab... */
    eraseTrailingSpaces(line);

    /* Parse line into tab-separated array and make sure it's a reasonable size */
    char *row[256];
    int rowSize = chopTabs(line, row);
    if (rowSize == ArraySize(row))
        errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName);
    if (rowSize < 2)
	continue;

    /* Convert first element to tagName */
    char tagName[256];
    aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName));

    /* Special case where we already are a comma separated list */
    if (sameString(tagName, "idf.Publication_Author_List"))
        {
	tagStanzaAppend(storm, stanza, tagName, row[1]);
	}
    else if (startsWith(additionalFilePrefix, tagName))
        {
	csvEscapeAndAppend(additionalFileDy, row[1]);
	}
    else if (sameString(secondaryAccessionTag, tagName))
        {
	csvEscapeAndAppend(secondaryAccessionDy, row[1]);
	}
    else
	{
	/* Convert rest of elements to possibly comma separated values */
	dyStringClear(dyVal);
	int i;
	for (i=1; i<rowSize; ++i)
	    csvEscapeAndAppend(dyVal, row[i]);
	tagStanzaAppend(storm, stanza, tagName, dyVal->string);
	}
    }
if (additionalFileDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string);
if (secondaryAccessionDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string);
dyStringFree(&secondaryAccessionDy);
dyStringFree(&additionalFileDy);
dyStringFree(&dyVal);
lineFileClose(&lf);
return storm;
}
Ejemplo n.º 19
0
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f,
	       boolean isTabix)
/* Print out selected fields from VCF.  If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;

/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
    idHash = identifierHash(db, table);

if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = vcfGetFields();
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    {
    /* if we know the field for identifiers, save it away */
    if ((idField != NULL) && sameString(idField, bb->name))
	idFieldNum = i;
    hashAddInt(fieldHash, bb->name, i);
    }

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

// If we are outputting a subset of fields, invalidate the VCF header.
boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS);
if (!allFields)
    fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n");

struct asObject *as = vcfAsObj();
struct asFilter *filter = NULL;
if (anyFilter())
    filter = asFilterFromCart(cart, db, table, as);

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
int maxOut = bigFileMaxOutput();
struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
// Include the header, absolutely necessary for VCF parsing.
boolean printedHeader = FALSE;
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
    {
    char *fileName = vcfFileName(tdb, conn, table, region->chrom);
    struct vcfFile *vcff;
    if (isTabix)
	vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
				   100, maxOut);
    else
	vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
			      100, maxOut, TRUE);
    if (vcff == NULL)
	noWarnAbort();
    // If we are outputting all fields, but this VCF has no genotype info, omit the
    // genotype columns from output:
    if (allFields && vcff->genotypeCount == 0)
	fieldCount = VCFDATALINE_NUM_COLS - 2;
    if (!printedHeader)
	{
	fprintf(f, "%s", vcff->headerString);
	if (filter)
	    fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	if (!allFields)
	    {
	    fprintf(f, "#%s", fieldArray[0]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", fieldArray[i]);
	    fprintf(f, "\n");
	    }
	printedHeader = TRUE;
	}
    char *row[VCFDATALINE_NUM_COLS];
    char numBuf[VCF_NUM_BUF_SIZE];
    for (rec = vcff->records;  rec != NULL && (maxOut > 0);  rec = rec->next)
        {
	vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row);
	if (asFilterOnRow(filter, row))
	    {
	    /* if we're looking for identifiers, check if this matches */
	    if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL))
		continue;
	    // All fields output: after asFilter'ing, preserve original VCF chrom
	    if (allFields && !sameString(rec->chrom, region->chrom))
		row[0] = rec->chrom;
	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		{
		fprintf(f, "\t%s", row[columnArray[i]]);
		}
	    fprintf(f, "\n");
	    maxOut --;
	    }
	}
    vcfFileFree(&vcff);
    freeMem(fileName);
    }

if (maxOut == 0)
    warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
/* Clean up and exit. */
dyStringFree(&dyAlt);  dyStringFree(&dyFilter);  dyStringFree(&dyInfo);  dyStringFree(&dyGt);
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}
Ejemplo n.º 20
0
void initStep(struct sqlConnection *conn, struct stepInit *init)
/* Create step based on initializer */
{
/* Do a little validation on while counting up inputs and outputs */
int inCount = commaSepCount(init->inputTypes);
int matchCount = commaSepCount(init->inputFormats);
if (inCount != matchCount)
    errAbort("inputTypes has %d elements but inputFormats has %d in step %s", 
	    inCount, matchCount, init->name);
int outCount = commaSepCount(init->outputTypes);
matchCount = commaSepCount(init->outputFormats);
if (outCount != matchCount)
    errAbort("outputTypes has %d elements but outputFormats has %d in step %s", 
	    outCount, matchCount, init->name);
matchCount = commaSepCount(init->outputNamesInTempDir);
if (outCount != matchCount)
    errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", 
	    outCount, matchCount, init->name);

struct dyString *query = dyStringNew(0);
dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name);
int existingCount = sqlQuickNum(conn, query->string);
if (existingCount > 0)
    {
    warn("%s already exists in eapStep", init->name);
    dyStringFree(&query);
    return;
    }

/* Parse out software part and make sure that all pieces are there. */
char **softwareArray;
int softwareCount;
sqlStringDynamicArray(init->software, &softwareArray, &softwareCount);
unsigned softwareIds[softwareCount];
int i;
for (i=0; i<softwareCount; ++i)
    {
    char *name = softwareArray[i];
    dyStringClear(query);
    dyStringPrintf(query, "select id from eapSoftware where name='%s'", name);
    unsigned softwareId = sqlQuickNum(conn, query->string);
    if (softwareId == 0)
        errAbort("Software %s doesn't exist by that name in eapSoftware", name);
    softwareIds[i] = softwareId;
    }

/* Make step record. */
dyStringClear(query);
dyStringAppend(query,
	"insert eapStep (name,cpusRequested,"
        " inCount,inputTypes,inputFormats,"
	" outCount,outputNamesInTempDir,outputTypes,outputFormats)"
	" values (");
dyStringPrintf(query, "'%s',", init->name);
dyStringPrintf(query, "%d,", init->cpusRequested);
dyStringPrintf(query, "%d,", inCount);
dyStringPrintf(query, "'%s',", init->inputTypes);
dyStringPrintf(query, "'%s',", init->inputFormats);
dyStringPrintf(query, "%d,", outCount);
dyStringPrintf(query, "'%s',", init->outputNamesInTempDir);
dyStringPrintf(query, "'%s',", init->outputTypes);
dyStringPrintf(query, "'%s'", init->outputFormats);
dyStringPrintf(query, ")");
sqlUpdate(conn, query->string);

/* Make software/step associations. */
for (i=0; i<softwareCount; ++i)
    {
    dyStringClear(query);
    dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')",
	    init->name, softwareArray[i]);
    sqlUpdate(conn, query->string);
    }

/* Force step version stuff to be made right away */
eapCurrentStepVersion(conn, init->name);

/* Clean up. */
dyStringFree(&query);
freez(&softwareArray[0]);
freez(&softwareArray);
}
struct mafAli *hgMafFrag(
	char *database,     /* Database, must already have hSetDb to this */
	char *track, 	    /* Name of MAF track */
	char *chrom, 	    /* Chromosome (in database genome) */
	int start, int end, /* start/end in chromosome */
	char strand, 	    /* Chromosome strand. */
	char *outName, 	    /* Optional name to use in first component */
	struct slName *orderList /* Optional order of organisms. */
	)
/* mafFrag- Extract maf sequences for a region from database.
 * This creates a somewhat unusual MAF that extends from start
 * to end whether or not there are actually alignments.  Where
 * there are no alignments (or alignments missing a species)
 * a . character fills in.   The score is always zero, and
 * the sources just indicate the species.  You can mafFree this
 * as normal. */
{
int chromSize = hChromSize(database, chrom);
struct sqlConnection *conn = hAllocConn(database);
struct dnaSeq *native = hChromSeq(database, chrom, start, end);
struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end);
char masterSrc[128];
struct hash *orgHash = newHash(10);
struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL;
int curPos = start, symCount = 0;
struct slName *name;
int order = 0;

/* Check that the mafs are really copacetic, the particular
 * subtype we think is in the database that this (relatively)
 * simple code can handle. */
safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom);
mafCheckFirstComponentSrc(mafList, masterSrc);
mafCheckFirstComponentStrand(mafList, '+');
slSort(&mafList, mafCmp);

/* Prebuild organisms if possible from input orderList. */
for (name = orderList; name != NULL; name = name->next)
    {
    AllocVar(org);
    slAddHead(&orgList, org);
    hashAddSaveName(orgHash, name->name, org, &org->name);
    org->dy = dyStringNew(native->size*1.5);
    org->order = order++;
    if (nativeOrg == NULL)
        nativeOrg = org;
    }
if (orderList == NULL)
    {
    AllocVar(org);
    slAddHead(&orgList, org);
    hashAddSaveName(orgHash, database, org, &org->name);
    org->dy = dyStringNew(native->size*1.5);
    if (nativeOrg == NULL)
        nativeOrg = org;
    }

/* Go through all mafs in window, mostly building up
 * org->dy strings. */
for (maf = mafList; maf != NULL; maf = maf->next)
    {
    struct mafComp *mc, *mcMaster = maf->components;
    struct mafAli *subMaf = NULL;
    order = 0;
    if (curPos < mcMaster->start)
	{
	fillInMissing(nativeOrg, orgList, native, start,
		curPos, mcMaster->start);
	symCount += mcMaster->start - curPos;
	}
    if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst
    						    * backtracking */
	{
	if (mafNeedSubset(maf, masterSrc, curPos, end))
	    {
	    subMaf = mafSubset(maf, masterSrc, curPos, end);
	    if (subMaf == NULL)
	        continue;
	    }
	else
	    subMaf = maf;
	for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order)
	    {
	    /* Extract name up to dot into 'orgName' */
	    char buf[128], *e, *orgName;

	    if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */
		continue;

	    mc->leftStatus = mc->rightStatus = 0; /* squash annotation */

	    e = strchr(mc->src, '.');
	    if (e == NULL)
		orgName = mc->src;
	    else
		{
		int len = e - mc->src;
		if (len >= sizeof(buf))
		    errAbort("organism/database name %s too long", mc->src);
		memcpy(buf, mc->src, len);
		buf[len] = 0;
		orgName = buf;
		}

	    /* Look up dyString corresponding to  org, and create a
	     * new one if necessary. */
	    org = hashFindVal(orgHash, orgName);
	    if (org == NULL)
		{
		if (orderList != NULL)
		   errAbort("%s is not in orderList", orgName);
		AllocVar(org);
		slAddHead(&orgList, org);
		hashAddSaveName(orgHash, orgName, org, &org->name);
		org->dy = dyStringNew(native->size*1.5);
		dyStringAppendMultiC(org->dy, '.', symCount);
		if (nativeOrg == NULL)
		    nativeOrg = org;
		}
	    if (orderList == NULL && order > org->order)
		org->order = order;
	    org->hit = TRUE;

	    /* Fill it up with alignment. */
	    dyStringAppendN(org->dy, mc->text, subMaf->textSize);
	    }
	for (org = orgList; org != NULL; org = org->next)
	    {
	    if (!org->hit)
		dyStringAppendMultiC(org->dy, '.', subMaf->textSize);
	    org->hit = FALSE;
	    }
	symCount += subMaf->textSize;
	curPos = mcMaster->start + mcMaster->size;
	if (subMaf != maf)
	    mafAliFree(&subMaf);
	}
    }
if (curPos < end)
    {
    fillInMissing(nativeOrg, orgList, native, start, curPos, end);
    symCount += end - curPos;
    }
mafAliFreeList(&mafList);

slSort(&orgList, oneOrgCmp);
if (strand == '-')
    {
    for (org = orgList; org != NULL; org = org->next)
	reverseComplement(org->dy->string, org->dy->stringSize);
    }

/* Construct our maf */
AllocVar(maf);
maf->textSize = symCount;
for (org = orgList; org != NULL; org = org->next)
    {
    struct mafComp *mc;
    AllocVar(mc);
    if (org == orgList)
        {
	if (outName != NULL)
	    {
	    mc->src = cloneString(outName);
	    mc->srcSize = native->size;
	    mc->strand = '+';
	    mc->start = 0;
	    mc->size = native->size;
	    }
	else
	    {
	    mc->src = cloneString(masterSrc);
	    mc->srcSize = chromSize;
	    mc->strand = strand;
	    if (strand == '-')
	       reverseIntRange(&start, &end, chromSize);
	    mc->start = start;
	    mc->size = end-start;
	    }
	}
    else
        {
	int size = countAlpha(org->dy->string);
	mc->src = cloneString(org->name);
	mc->srcSize = size;
	mc->strand = '+';
	mc->start = 0;
	mc->size = size;
	}
    mc->text = cloneString(org->dy->string);
    dyStringFree(&org->dy);
    slAddHead(&maf->components, mc);
    }
slReverse(&maf->components);

slFreeList(&orgList);
freeHash(&orgHash);
hFreeConn(&conn);
return maf;
}
Ejemplo n.º 22
0
struct axt *pslToAxt(struct psl *psl, struct hash *qHash, char *tNibDir, 
	struct dlList *fileCache)
{
static char *tName = NULL, *qName = NULL;
static struct dnaSeq *tSeq = NULL;
struct dyString *q = newDyString(16*1024);
struct dyString *t = newDyString(16*1024);
int blockIx;
int qs, ts ;
int lastQ = 0, lastT = 0, size;
int qOffset = 0;
int tOffset = 0;
struct axt *axt = NULL;
boolean qIsNib = FALSE;
boolean tIsNib = FALSE;
int cnt = 0;
//struct dnaSeq *tSeq = NULL;
struct nibInfo *tNib = NULL;

struct dnaSeq *qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);
   // hGenBankGetMrna(psl->qName, NULL);
/*
freeDnaSeq(&qSeq);
freez(&qName);
assert(mrnaList != NULL);
for (mrna = mrnaList; mrna != NULL ; mrna = mrna->next)
    {
    assert(mrna != NULL);
    cnt++;
    if (sameString(mrna->name, psl->qName))
        {
        qSeq = cloneDnaSeq(mrna);
        assert(qSeq != NULL);
        break;
        }
    }
    */
if (qSeq == NULL)
    {
    warn("mrna sequence data not found %s, searched %d sequences\n",psl->qName,cnt);
    dyStringFree(&q);
    dyStringFree(&t);
    dnaSeqFree(&tSeq);
    dnaSeqFree(&qSeq);
    return NULL;
    }
if (qSeq->size != psl->qSize)
    {
    warn("sequence %s aligned is different size %d from mrna.fa file %d \n",psl->qName,psl->qSize,qSeq->size);
    dyStringFree(&q);
    dyStringFree(&t);
    dnaSeqFree(&tSeq);
    dnaSeqFree(&qSeq);
    return NULL;
    }
qName = cloneString(psl->qName);
if (qIsNib && psl->strand[0] == '-')
    qOffset = psl->qSize - psl->qEnd;
else
    qOffset = 0;
verbose(5,"qString len = %d qOffset = %d\n",qSeq->size,qOffset);
if (tName == NULL || !sameString(tName, psl->tName) || tIsNib)
    {
    freeDnaSeq(&tSeq);
    freez(&tName);
    tName = cloneString(psl->tName);
    tNib = nibInfoFromCache(nibHash, tNibDir, tName);
    assert(tNib !=NULL);
    tSeq = nibInfoLoadStrand(tNib, psl->tStart, psl->tEnd, '+');
    assert(tSeq !=NULL);
    tOffset = psl->tStart;
    //readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart, 
//	tHash, fileCache, &tSeq, &tOffset, &tIsNib);
    }
verbose(4,"strand t %s \n",psl->strand);
if (tSeq != NULL)
    verbose(5,"tString len = %d tOffset = %d\n",tSeq->size,tOffset);
else
    errAbort("tSeq is NULL\n");
if (psl->strand[0] == '-')
    reverseComplement(qSeq->dna, qSeq->size);
//if (strlen(psl->strand) > 1 )
//    if (psl->strand[1] == '-')
//        reverseComplement(tSeq->dna, tSeq->size);
for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
    {
    qs = psl->qStarts[blockIx] - qOffset;
    ts = psl->tStarts[blockIx] - tOffset;

    if (blockIx != 0)
        {
	int qGap, tGap, minGap;
	qGap = qs - lastQ;
	tGap = ts - lastT;
	minGap = min(qGap, tGap);
	if (minGap > 0)
	    {
	    writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT);
	    }
	else if (qGap > 0)
	    {
	    writeInsert(q, t, qSeq->dna + lastQ, qGap);
	    }
	else if (tGap > 0)
	    {
	    writeInsert(t, q, tSeq->dna + lastT, tGap);
	    }
	}
    size = psl->blockSizes[blockIx];
    assert(qSeq != NULL);
    dyStringAppendN(q, qSeq->dna + qs, size);
    lastQ = qs + size;
    dyStringAppendN(t, tSeq->dna + ts, size);
    lastT = ts + size;
    }

if (strlen(q->string) != strlen(t->string))
    warn("Symbol count(t) %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n",
    	(int)strlen(t->string), (int)strlen(q->string), psl->tName, psl->tStart, psl->qName, t->string, q->string);
if (psl->strand[0] == '-')
    {
    reverseComplement(q->string, q->stringSize);
    reverseComplement(t->string, t->stringSize);
    }
axt = axtCreate(q->string, t->string, min(q->stringSize,t->stringSize), psl);
dyStringFree(&q);
dyStringFree(&t);
//dnaSeqFree(&tSeq);
dnaSeqFree(&qSeq);
if (qIsNib)
    freez(&qName);
//if (tIsNib)
//    freez(&tName);
return axt;
}
Ejemplo n.º 23
0
struct rqlStatement *rqlStatementParse(struct lineFile *lf)
/* Parse an RQL statement out of text */
{
struct tokenizer *tkz = tokenizerOnLineFile(lf);
tkz->uncommentShell = TRUE;
tkz->uncommentC = TRUE;
tkz->leaveQuotes = TRUE;
struct rqlStatement *rql;
AllocVar(rql);
rql->command = cloneString(tokenizerMustHaveNext(tkz));
if (sameString(rql->command, "select"))
    {
    struct dyString *buf = dyStringNew(0);
    struct slName *list = NULL;
    char *tok = rqlParseFieldSpec(tkz, buf);
    /* Look for count(*) as special case. */
    boolean countOnly = FALSE;
    if (sameString(tok, "count"))
        {
	char *paren = tokenizerNext(tkz);
	if (paren[0] == '(')
	    {
	    while ((paren = tokenizerMustHaveNext(tkz)) != NULL)
	        {
		if (paren[0] == ')')
		    break;
		}
	    countOnly = TRUE;
	    freez(&rql->command);
	    rql->command = cloneString("count");
	    }
	else
	    {
	    tokenizerReuse(tkz);
	    }
	}
    if (!countOnly)
	{
	list = slNameNew(tok);
	for (;;)
	    {
	    /* Parse out comma-separated field list. */
	    char *comma = tokenizerNext(tkz);
	    if (comma == NULL || comma[0] != ',')
		{
		tokenizerReuse(tkz);
		break;
		}
	    slNameAddHead(&list, rqlParseFieldSpec(tkz, buf));
	    }
	slReverse(&list);
	rql->fieldList = list;
	}
    dyStringFree(&buf);
    }
else if (sameString(rql->command, "count"))
    {
    /* No parameters to count. */
    }
else
    errAbort("Unknown RQL command '%s line %d of %s\n", rql->command, lf->lineIx, lf->fileName);
    
char *from = tokenizerNext(tkz);
if (from != NULL)
    {
    if (sameString(from, "from"))
        {
	for (;;)
	    {
	    struct dyString *buf = dyStringNew(0);
	    char *table = rqlParseFieldSpec(tkz, buf);
	    slNameAddTail(&rql->tableList, table);
	    char *comma = tokenizerNext(tkz);
	    if (comma == NULL)
	        break;
	    if (comma[0] != ',')
	        {
		tokenizerReuse(tkz);
		break;
		}
	    dyStringFree(&buf);
	    }
	}
    else
	{
	errAbort("missing 'from' clause in %s\n", rql->command);
	}
    }

/* Parse where clause. */
char *where = tokenizerNext(tkz);
if (where != NULL)
    {
    if (!sameString(where, "where"))
	{
        tokenizerReuse(tkz);
	}
    else
        {
	rql->whereClause = rqlParseExpression(tkz);
	rqlParseVarsUsed(rql->whereClause, &rql->whereVarList);
	}
    }

/* Parse limit clause. */
char *limit = tokenizerNext(tkz);
rql->limit = -1;	
if (limit != NULL)
    {
    if (!sameString(limit, "limit"))
        errAbort("Unknown clause '%s' line %d of %s", limit, lf->lineIx, lf->fileName);
    char *count = tokenizerMustHaveNext(tkz);
    if (!isdigit(count[0]))
       errAbort("Expecting number after limit, got %s line %d of %s", 
       	count, lf->lineIx, lf->fileName);
    rql->limit = atoi(count);
    }

/* Check that are at end of statement. */
char *extra = tokenizerNext(tkz);
if (extra != NULL)
    errAbort("Extra stuff starting with '%s' past end of statement line %d of %s", 
    	extra, lf->lineIx, lf->fileName);
return rql;
}
Ejemplo n.º 24
0
void checkExp(char *bedFileName, char *tNibDir, char *nibList)
{
struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL;
char *row[PSEUDOGENELINK_NUM_COLS] ;
struct pseudoGeneLink *ps;
char *tmpName[512], cmd[512];
struct axt *axtList = NULL, *axt, *mAxt = NULL;
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL;
struct nibInfo *qNib = NULL, *tNib = NULL;
FILE *op;
int ret;

if (nibHash == NULL)
    nibHash = hashNew(0);
while (lineFileNextRow(bf, row, ArraySize(row)))
    {
    struct misMatch *misMatchList = NULL;
    struct binKeeper *bk = NULL;
    struct binElement *el, *elist = NULL;
    struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ;
    struct misMatch *mf = NULL;
    ps = pseudoGeneLinkLoad(row);
    tmpName[0] = cloneString(ps->name);
    chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName));
    verbose(2,"name %s %s:%d-%d\n",
            ps->name, ps->chrom, ps->chromStart,ps->chromEnd);
    /* get expressed retro from hash */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        rPsl = el->val;
        verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd);
        }
    /* find mrnas that overlap parent gene */
    bk = hashFindVal(mrnaHash, ps->gChrom);
    elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        pPsl = el->val;
        verbose(2,"parent %s %s:%d %d,%d\n",
                pPsl->qName, pPsl->tName,pPsl->tStart,
                pPsl->match, pPsl->misMatch);
        }
    /* find self chain */
    bk = hashFindVal(chainHash, ps->chrom);
    elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ;
    slSort(&elist, chainCmpScoreDesc);
    for (el = elist; el != NULL ; el = el->next)
        {
        struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2;
        int qs = chain->qStart;
        int qe = chain->qEnd;
        int id = chain->id;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        if (!sameString(chain->qName , ps->gChrom) || 
                !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd))
            {
            verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd,
                ps->gChrom,ps->gStart,ps->gEnd);
            continue;
            }
        verbose(2,"chain id %d %4.0f",chain->id, chain->score);
        chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, 
            &subChain,  &retChainToFree);
        if (subChain != NULL)
            chain = subChain;
        chainSubsetOnQ(chain, ps->gStart, ps->gEnd, 
            &subChain,  &retChainToFree2);
        if (subChain != NULL)
            chain = subChain;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        verbose(2," %s:%d-%d %s:%d-%d ", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd);
        if (subChain != NULL)
            verbose(2,"subChain %s:%d-%d %s:%d-%d\n",
                    subChain->qName, subChain->qStart, subChain->qEnd, 
                    subChain->tName,subChain->tStart,subChain->tEnd);

	qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName);
	tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName);
	tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+');
	qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand);
	axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart,
	    maxGap, BIGNUM);
        verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList));
        for (axt = axtList; axt != NULL ; axt = axt->next)
            {
            addMisMatch(&misMatchList, axt, chain->qSize);
            }
        verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id);
        chainFree(&retChainToFree);
        chainFree(&retChainToFree2);
        break;
        }
    /* create axt of each expressed retroGene to parent gene */
        /* get alignment for each mrna overlapping retroGene */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ;
    {
    char queryName[512];
    char axtName[512];
    char pslName[512];
    safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom);
    safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom);
    safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom);
    op = fopen(pslName,"w");
    for (el = elist ; el != NULL ; el = el->next)
        {
        psl = el->val;
        pslOutput(psl, op, '\t','\n');
        qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);

        if (qSeq != NULL)
            slAddHead(&seqList, qSeq);
        else
            errAbort("seq %s not found \n", psl->qName);
        }
    fclose(op);
    faWriteAll(queryName, seqList);
    safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName);
    ret = system(cmd);
    if (ret != 0)
        errAbort("ret is %d %s\n",ret,cmd);
    verbose(2, "ret is %d %s\n",ret,cmd);
    af = lineFileOpen(axtName, TRUE);
    while ((axt = axtRead(af)) != NULL)
        slAddHead(&mAxt, axt);
    lineFileClose(&af);
    }
    slReverse(&mAxt);
    /* for each parent/retro pair, count bases matching retro and parent better */
    for (el = elist; el != NULL ; el = el->next)
        {
        int i, scoreRetro=0, scoreParent=0, scoreNeither=0;
        struct dyString *parentMatch = newDyString(16*1024);
        struct dyString *retroMatch = newDyString(16*1024);
        mPsl = el->val;

        if (mAxt != NULL)
            {
            verbose(2,"mrna %s %s:%d %d,%d axt %s\n",
                    mPsl->qName, mPsl->tName,mPsl->tStart,
                    mPsl->match, mPsl->misMatch, 
                    mAxt->qName);
            assert(sameString(mPsl->qName, mAxt->qName));
            for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++)
                {
                int j = mAxt->tStart - mPsl->tStart;
                verbose(5, "listLen = %d\n",slCount(&misMatchList));
                if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL)
                    {
                    if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match retro[%d] %d %c == %c parent %c %d\n",
                                i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], 
                                mf->parentBase, mf->parentLoc);
                        dyStringPrintf(retroMatch, "%d,", mf->retroLoc);
                        scoreRetro++;
                        }
                    else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match parent[%d] %d %c == %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        dyStringPrintf(parentMatch, "%d,", mf->parentLoc);
                        scoreParent++;
                        }
                    else
                        {
                        verbose (3,"match neither[%d] %d %c != %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        scoreNeither++;
                        }
                    }
                }
            verbose(2,"final score %s parent %d retro %d  neither %d\n",
                    mPsl->qName, scoreParent, scoreRetro, scoreNeither);
            fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n",
                    ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, 
                    mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, 
                    scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string);
            mAxt = mAxt->next;
            }
        dyStringFree(&parentMatch);
        dyStringFree(&retroMatch);
        }
    }
}
void configPageSetTrackVis(int vis)
/* Do config page after setting track visibility. If vis is -2, then visibility
 * is unchanged.  If -1 then set visibility to default, otherwise it should
 * be tvHide, tvDense, etc. */
{
struct dyString *title = dyStringNew(0);
char *groupTarget = NULL;
struct track *trackList =  NULL;
struct track *ideoTrack = NULL;
struct group *groupList = NULL;

withPriorityOverride = cartUsualBoolean(cart, configPriorityOverride, FALSE);

/* Get track list and group them. */
ctList = customTracksParseCart(database, cart, &browserLines, &ctFileName);
trackList = getTrackList(&groupList, vis);

/* The ideogram for some reason is considered a track.
 * We don't really want to process it as one though, so
 * we see if it's there, and if necessary remove it. */
ideoTrack = chromIdeoTrack(trackList);
if (ideoTrack != NULL)
    removeTrackFromGroup(ideoTrack);

/* Fetch group to change on if any from CGI,
 * and remove var so it doesn't get used again. */
groupTarget = cloneString(cartUsualString(cart, configGroupTarget, ""));
cartRemove(cart, configGroupTarget);
if (sameString(groupTarget, "none"))
    freez(&groupTarget);

dyStringPrintf(title, "Configure Image");


hPrintf("<FORM ACTION=\"%s\" NAME=\"mainForm\" METHOD=%s>\n", hgTracksName(),
	cartUsualString(cart, "formMethod", "POST"));
webStartWrapperDetailedNoArgs(cart, database, "", title->string, FALSE, FALSE, FALSE, FALSE);
cartSaveSession(cart);

hPrintf("<INPUT TYPE=HIDDEN NAME=\"hgTracksConfigPage\" VALUE=\"\">");
/* do not want all the submit buttons named the same thing, this one is: */
cgiMakeButton("topSubmit", "submit");

// 3 column table
hPrintf("<TABLE style=\"border:0px; \">\n");
hPrintf("<TR><TD>image width:");
hPrintf("<TD style=\"text-align: right\">");
hIntVar("pix", tl.picWidth, 4);
hPrintf("<TD>pixels</TR>");

hPrintf("<TR><TD>label area width:");
hPrintf("<TD style=\"text-align: right\">");
hIntVar("hgt.labelWidth", leftLabelWidthChars, 2);
hPrintf("<TD>characters<TD></TR>");

hPrintf("<TR><TD>text size:");
hPrintf("<TD style=\"text-align: right\">");
textSizeDropDown();
hPrintf("<TD>");
if (trackLayoutInclFontExtras())
    {
    char *defaultStyle = cartUsualString(cart, "fontType", "medium");
    cartMakeRadioButton(cart, "fontType", "medium", defaultStyle);
    hPrintf("&nbsp;medium&nbsp;");
    cartMakeRadioButton(cart, "fontType", "fixed", defaultStyle);
    hPrintf("&nbsp;fixed&nbsp;");
    cartMakeRadioButton(cart, "fontType", "bold", defaultStyle);
    hPrintf("&nbsp;bold&nbsp;");
    hPrintf("&nbsp;");
    }
hPrintf("<TR><BR>");
hTableStart();
if (ideoTrack != NULL)
    {
    hPrintf("<TR><TD>");
    hCheckBox("ideogram", cartUsualBoolean(cart, "ideogram", TRUE));
    hPrintf("</TD><TD>");
    hPrintf("Display chromosome ideogram above main graphic");
    hPrintf("</TD></TR>\n");
    }
hPrintf("<TR><TD>");
hCheckBox("guidelines", cartUsualBoolean(cart, "guidelines", TRUE));
hPrintf("</TD><TD>");
hPrintf("Show light blue vertical guidelines");
hPrintf("</TD></TR>\n");

hPrintf("<TR><TD>");
hCheckBox("leftLabels", cartUsualBoolean(cart, "leftLabels", TRUE));
hPrintf("</TD><TD>");
hPrintf("Display labels to the left of items in tracks");
hPrintf("</TD></TR>\n");

hPrintf("<TR><TD>");
hCheckBox("centerLabels", cartUsualBoolean(cart, "centerLabels", TRUE));
hPrintf("</TD><TD>");
hPrintf("Display description above each track");
hPrintf("</TD></TR>\n");

hPrintf("<TR><TD>");
hCheckBox("trackControlsOnMain", cartUsualBoolean(cart, "trackControlsOnMain", TRUE));
hPrintf("</TD><TD>");
hPrintf("Show track controls under main graphic");
hPrintf("</TD></TR>\n");

hPrintf("<TR><TD>");
hCheckBox("nextItemArrows", cartUsualBoolean(cart, "nextItemArrows", FALSE));
hPrintf("</TD><TD>");
hPrintf("Next/previous item navigation");
hPrintf("</TD></TR>\n");

hPrintf("<TR><TD>");
hCheckBox("nextExonArrows", cartUsualBoolean(cart, "nextExonArrows", TRUE));
hPrintf("</TD><TD>");
hPrintf("Next/previous exon navigation");
hPrintf("</TD></TR>\n");

#ifdef PRIORITY_CHANGES_IN_CONFIG_UI
hPrintf("<TR><TD>");
char *javascript="onClick=\"document.mainForm.hgTracksConfigPage.value='configure';document.mainForm.submit();\"";
hCheckBoxJS(configPriorityOverride,
	cartUsualBoolean(cart, configPriorityOverride , FALSE), javascript);
hPrintf("</TD><TD>");
hPrintf("Enable track re-ordering");
hPrintf("</TD></TR>\n");
#endif///def PRIORITY_CHANGES_IN_CONFIG_UI

hPrintf("<TR><TD>");
hCheckBox("enableAdvancedJavascript", advancedJavascriptFeaturesEnabled(cart));
hPrintf("</TD><TD>");
hPrintf("Enable advanced javascript features");
hPrintf("</TD></TR>\n");


hTableEnd();

char *freeze = hFreezeFromDb(database);
char buf[128];
if (stringIn(database, freeze))
    safef(buf, sizeof buf, "Configure Tracks on %s %s: %s %s",
	  organization, browserName, organism, freeze);
else
    safef(buf, sizeof buf, "Configure Tracks on %s %s: %s %s (%s)",
	  organization, browserName, organism, freeze, database);
webNewSection(buf);
hPrintf("Tracks: ");
if(isSearchTracksSupported(database,cart))
    {
    cgiMakeButtonWithMsg(TRACK_SEARCH, TRACK_SEARCH_BUTTON,TRACK_SEARCH_HINT);
    hPrintf(" ");
    }
cgiMakeButtonWithMsg(configHideAll, "hide all","Hide all tracks in this genome assembly");
hPrintf(" ");
cgiMakeButtonWithMsg(configShowAll, "show all","Show all tracks in this genome assembly");
hPrintf(" ");
cgiMakeButtonWithMsg(configDefaultAll, "default","Display only default tracks");
hPrintf("&nbsp;&nbsp;&nbsp;Groups:  ");
hButtonWithOnClick("hgt.collapseGroups", "collapse all", "Collapse all track groups", "return setAllTrackGroupVisibility(false)");
hPrintf(" ");
hButtonWithOnClick("hgt.expandGroups", "expand all", "Expand all track groups", "return setAllTrackGroupVisibility(true)");
hPrintf("<P STYLE=\"margin-top:5;\">Control track and group visibility more selectively below.<P>");
trackConfig(trackList, groupList, groupTarget, vis);

dyStringFree(&title);
freez(&groupTarget);
webEndSectionTables();
hPrintf("</FORM>");
}
Ejemplo n.º 26
0
static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn,
	char *sourceTable, char *inputTrackTable, 
	struct slName *fieldList, boolean invert, char *vocab)
/* Put out a lines in an html table that shows assayed sources that have hits in this
 * cluster, or if invert is set, that have misses. */
{
char *vocabFile = NULL;
if (vocab)
    {
    vocabFile = cloneFirstWord(vocab);
    }

/* Make the monster SQL query to get all assays*/
struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, 
	inputTrackTable);
struct slName *field;
for (field = fieldList; field != NULL; field = field->next)
    sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name);
sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable);
sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable);
sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable);

boolean encodeStanford = FALSE;
if (startsWith("encode3", sourceTable) || startsWith("encode4", sourceTable))
    encodeStanford = TRUE;

int displayNo = 0;
int fieldCount = slCount(fieldList);
struct sqlResult *sr = sqlGetResult(conn, query->string);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    int sourceId = sqlUnsigned(row[0]);
    boolean hit = FALSE;
    int i;
    double signal = 0.0;
    for (i=0; i<cluster->expCount; i++)
        {
        if (cluster->expNums[i] == sourceId)
            {
            hit = TRUE;
            signal = cluster->expScores[i];
            break;
            }
        }
    if (hit ^ invert)
        {
	printf("</TR><TR>\n");
	webPrintIntCell(++displayNo);
	if (!invert)
	    webPrintDoubleCell(signal);
	webPrintLinkCell(row[1]);
	int i = 0;
        // find position of CV metadata in field list
        int offset = 3;
        struct slName *field = fieldList;
	for (i=0; i<fieldCount && field != NULL; ++i, field = field->next)
	    {
	    char *fieldVal = row[i+offset];
	    if (vocab)
	        {
                char *link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal));
		webPrintLinkCell(link);
		}
	    else
		webPrintLinkCell(fieldVal);
	    }
        char *table = row[2];
        if (encodeStanford)
            {
            char *file = stringIn("ENCFF", table);
            if (!file)
                webPrintLinkCell(table);
            else
                {
                webPrintLinkCellStart();
                printf("<A target='_blank'"
                        "href='https://www.encodeproject.org/files/%s'>%s</A>", file, file);
                webPrintLinkCellEnd();
               } 
            }
        else
            printMetadataForTable(table);
	}
    }
sqlFreeResult(&sr);
freez(&vocabFile);
dyStringFree(&query);
}
Ejemplo n.º 27
0
void submitRefToFiles(struct sqlConnection *conn, struct sqlConnection *conn2, struct sqlConnection *connSp,
    char *ref, char *fileRoot, char *inJax)
/* Create a .ra and a .tab file for given reference. */
{
/* Initially the tab file will have some duplicate lines, so
 * write to temp file, and then filter. */
char raName[PATH_LEN], tabName[PATH_LEN], capName[PATH_LEN];
FILE *ra = NULL, *tab = NULL, *cap = NULL;
struct dyString *query = dyStringNew(0);
struct sqlResult *sr;
char **row;
char *pubMed;
struct slName *list, *el;
boolean gotAny = FALSE;
struct hash *uniqImageHash = newHash(0);
struct hash *captionHash = newHash(0);
int imageWidth = 0, imageHeight = 0;
char path[PATH_LEN];
struct dyString *caption = dyStringNew(0);
struct dyString *copyright = dyStringNew(0);
struct dyString *probeNotes = dyStringNew(0);
boolean lookedForCopyright = FALSE;
	
safef(raName, sizeof(raName), "%s.ra", fileRoot);
safef(tabName, sizeof(tabName), "%s.tab", fileRoot);
safef(capName, sizeof(capName), "%s.txt", fileRoot);
tab = mustOpen(tabName, "w");
cap = mustOpen(capName, "w");


sqlDyStringPrintf(query, "select authors,journal,title,year from BIB_Refs where ");
sqlDyStringPrintf(query, "_Refs_key = '%s'", ref);
sr = sqlGetResultVerbose(conn, query->string);
row = sqlNextRow(sr);
if (row == NULL)
    errAbort("Can't find _Refs_key %s in BIB_Refs", ref);

/* Make ra file with stuff common to whole submission set. */
ra = mustOpen(raName, "w");
fprintf(ra, "submissionSource MGI\n");
fprintf(ra, "acknowledgement Thanks to the Gene Expression Database group at "
            "Mouse Genome Informatics (MGI) for collecting, annotating and sharing "
	    "this image. The MGI images were last updated in VisiGene on March 28, 2006. "
	    "Additional and more up to date annotations and images may be available "
	    "directly at <A HREF='http://www.informatics.jax.org' target='_blank'>MGI.</A>\n");
fprintf(ra, "submitSet jax%s\n", ref);
fprintf(ra, "taxon 10090\n");	/* Mus musculus taxon */
fprintf(ra, "fullDir http://hgwdev.gi.ucsc.edu/visiGene/full/inSitu/Mouse/jax\n");
fprintf(ra, "thumbDir http://hgwdev.gi.ucsc.edu/visiGene/200/inSitu/Mouse/jax\n");
fprintf(ra, "setUrl http://www.informatics.jax.org/\n");
fprintf(ra, "itemUrl http://www.informatics.jax.org/searches/image.cgi?%%s\n");
fprintf(ra, "abUrl http://www.informatics.jax.org/searches/antibody.cgi?%%s\n");
fprintf(ra, "journal %s\n", row[1]);
fprintf(ra, "publication %s\n", row[2]);
fprintf(ra, "year %s\n", row[3]);

/* The contributor (author) list is in format Kent WJ; Haussler DH; format in
 * Jackson.  We convert it to Kent W.J.,Haussler D.H., format for visiGene. */
fprintf(ra, "contributor ");
list = charSepToSlNames(row[0], ';');
for (el = list; el != NULL; el = el->next)
    {
    char *lastName = skipLeadingSpaces(el->name);
    char *initials = strrchr(lastName, ' ');
    if (initials == NULL)
	initials = "";
    else
	*initials++ = 0;
    fprintf(ra, "%s", lastName);
    if (initials[0] != 0)
	{
	char c;
	fprintf(ra, " ");
	while ((c = *initials++) != 0)
	    fprintf(ra, "%c.", c);
	}
    fprintf(ra, ",");
    }
fprintf(ra, "\n");
slNameFreeList(&list);
sqlFreeResult(&sr);

/* Add in link to PubMed record on publication. */
dyStringClear(query);
sqlDyStringPrintf(query,
   "select ACC_Accession.accID from ACC_Accession,ACC_LogicalDB "
   "where ACC_Accession._Object_key = %s "
   "and ACC_Accession._LogicalDB_key = ACC_LogicalDB._LogicalDB_key "
   "and ACC_LogicalDB.name = 'PubMed'", ref);
pubMed = sqlQuickStringVerbose(conn, query->string);
if (pubMed != NULL)
    fprintf(ra, "pubUrl https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=%s\n", pubMed);
freez(&pubMed);

dyStringClear(query);
sqlDyStringPrintf(query, 
	"select distinct MRK_Marker.symbol as gene,"
               "GXD_Specimen.sex as sex,"
	       "GXD_Specimen.age as age,"
	       "GXD_Specimen.ageMin as ageMin,"
	       "GXD_Specimen.ageMax as ageMax,"
	       "IMG_ImagePane.paneLabel as paneLabel,"
	       "ACC_Accession.numericPart as fileKey,"
	       "IMG_Image._Image_key as imageKey,"
	       "GXD_Assay._ProbePrep_key as probePrepKey,"
	       "GXD_Assay._AntibodyPrep_key as antibodyPrepKey,"
	       "GXD_Assay._ReporterGene_key as reporterGeneKey,"
	       "GXD_FixationMethod.fixation as fixation,"
	       "GXD_EmbeddingMethod.embeddingMethod as embedding,"
	       "GXD_Assay._Assay_key as assayKey,"
	       "GXD_Specimen.hybridization as sliceType,"
	       "GXD_Specimen._Genotype_key as genotypeKey,"
	       "IMG_ImagePane._ImagePane_key as imagePaneKey\n"
	"from MRK_Marker,"
	     "GXD_Assay,"
	     "GXD_Specimen,"
	     "GXD_InSituResult,"
	     "GXD_InSituResultImage,"
	     "GXD_FixationMethod,"
	     "GXD_EmbeddingMethod,"
	     "IMG_ImagePane,"
	     "IMG_Image,"
	     "ACC_Accession\n"
	"where MRK_Marker._Marker_key = GXD_Assay._Marker_key "
	  "and GXD_Assay._Assay_key = GXD_Specimen._Assay_key "
	  "and GXD_Specimen._Specimen_key = GXD_InSituResult._Specimen_key "
	  "and GXD_InSituResult._Result_key = GXD_InSituResultImage._Result_key "
	  "and GXD_InSituResultImage._ImagePane_key = IMG_ImagePane._ImagePane_key "
	  "and GXD_FixationMethod._Fixation_key = GXD_Specimen._Fixation_key "
	  "and GXD_EmbeddingMethod._Embedding_key = GXD_Specimen._Embedding_key "
	  "and IMG_ImagePane._Image_key = IMG_Image._Image_key "
	  "and IMG_Image._Image_key = ACC_Accession._Object_key "
	  "and ACC_Accession.prefixPart = 'PIX:' "
	  "and GXD_Assay._ImagePane_key is NULL "
	);
sqlDyStringPrintf(query, "and GXD_Assay._Refs_key = '%s'", ref);
sr = sqlGetResultVerbose(conn, query->string);

fprintf(tab, "#");
fprintf(tab, "gene\t");
fprintf(tab, "probeColor\t");
fprintf(tab, "sex\t");
fprintf(tab, "age\t");
fprintf(tab, "ageMin\t");
fprintf(tab, "ageMax\t");
fprintf(tab, "paneLabel\t");
fprintf(tab, "fileName\t");
fprintf(tab, "submitId\t");
fprintf(tab, "fPrimer\t");
fprintf(tab, "rPrimer\t");
fprintf(tab, "abName\t");
fprintf(tab, "abTaxon\t");
fprintf(tab, "abSubmitId\t");
fprintf(tab, "fixation\t");
fprintf(tab, "embedding\t");
fprintf(tab, "bodyPart\t");
fprintf(tab, "sliceType\t");
fprintf(tab, "genotype\t");
fprintf(tab, "strain\t");
fprintf(tab, "priority\t");
fprintf(tab, "captionId\t");
fprintf(tab, "imageWidth\t");
fprintf(tab, "imageHeight\n");

while ((row = sqlNextRow(sr)) != NULL)
    {
    char *gene = row[0];
    char *sex = row[1];
    char *age = row[2];
    char *ageMin = row[3];
    char *ageMax = row[4];
    char *paneLabel = row[5];
    char *fileKey = row[6];
    char *imageKey = row[7];
    char *probePrepKey = row[8];
    char *antibodyPrepKey = row[9];
    char *reporterGeneKey = row[10];
    char *fixation = row[11];
    char *embedding = row[12];
    char *assayKey = row[13];
    char *sliceType = row[14];
    char *genotypeKey = row[15];
    char *imagePaneKey = row[16];
    double calcAge = -1;
    char *probeColor = "";
    char *bodyPart = "";
    char *abName = NULL;
    char *rPrimer = NULL, *fPrimer = NULL;
    char *genotype = NULL;
    char *strain = NULL;
    char *priority = NULL;
    char abTaxon[32];
    char *captionId = "";
    char *abSubmitId = NULL;

    verbose(3, "   "); dumpRow(row, 16);

    if (age == NULL)
        continue;

    if (!lookedForCopyright)
	{
	struct sqlResult *sr = NULL;
	char **row;
	lookedForCopyright = TRUE;

	dyStringClear(query);
	sqlDyStringPrintf(query, 
	     "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType "
	     "where MGI_Note._Object_key = %s "
	     "and ACC_MGIType.name = 'Image' "
	     "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key "
	     "and MGI_NoteType.noteType='Copyright' "
	     "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key "
	     "and MGI_Note._Note_key = MGI_NoteChunk._Note_key "
	     "order by sequenceNum"
	     , imageKey);
	sr = sqlGetResultVerbose(conn2, query->string);
	while ((row = sqlNextRow(sr)) != NULL)
	   dyStringAppend(copyright, row[0]);
	sqlFreeResult(&sr);

	verbose(2,"imageKey=%s\n",imageKey);

	if (copyright->stringSize != 0)
	    {
	    fprintf(ra, "copyright %s\n", copyright->string);
	    }
	}

    /* Massage sex */
        {
	if (sameString(sex, "Male"))
	    sex = "male";
	else if (sameString(sex, "Female"))
	    sex = "female";
	else
	    sex = "";
	}

    /* Massage age */
	{
	char *embryoPat = "embryonic day ";
	char *newbornPat = "postnatal newborn";
	char *dayPat = "postnatal day ";
	char *weekPat = "postnatal week ";
	char *adultPat = "postnatal adult";
	double calcMinAge = atof(ageMin);
	double calcMaxAge = atof(ageMax);
	double mouseBirthAge = 21.0;
	//double mouseAdultAge = 63.0;	/* Relative to conception, not birth */

	if (age[0] == 0)
	    {
	    warn("age null, ageMin %s, ageMax %s\n", ageMin, ageMax);
	    calcAge = (calcMinAge + calcMaxAge) * 0.5;
	    }
	else if (startsWith(embryoPat, age))
	    calcAge = atof(age+strlen(embryoPat));
	else if (sameString(newbornPat, age))
	    calcAge = mouseBirthAge;
	else if (startsWith(dayPat, age))
	    calcAge = atof(age+strlen(dayPat)) + mouseBirthAge;
        else if (startsWith(weekPat, age))
	    calcAge = 7.0 * atof(age+strlen(weekPat)) + mouseBirthAge;
	else if (sameString(adultPat, age) && calcMaxAge - calcMinAge > 1000 
		&& calcMinAge < 365)
	    calcAge = 365;	/* Most adult mice are relatively young */
	else
	    {
	    warn("Calculating age from %s", age);
	    calcAge = (calcMinAge + calcMaxAge) * 0.5;
	    }
	if (calcAge < calcMinAge)
	    calcAge = calcMinAge;
	if (calcAge > calcMaxAge)
	    calcAge = calcMaxAge;
	}
    
    /* Massage probeColor */
        {
	if (!isStrNull(reporterGeneKey))
	    {
	    /* Fixme: make sure that reporterGene's end up in probeType table. */
	    char *name = NULL;
	    dyStringClear(query);
	    sqlDyStringPrintf(query, 
	    	"select term from VOC_Term where _Term_key = %s", 
	    	reporterGeneKey);
	    name = sqlQuickStringVerbose(conn2, query->string);
	    if (name == NULL)
	        warn("Can't find _ReporterGene_key %s in VOC_Term", 
			reporterGeneKey);
	    else if (sameString(name, "GFP"))
	        probeColor = "green";
	    else if (sameString(name, "lacZ"))
	        probeColor = "blue";
	    else 
	        warn("Don't know color of reporter gene %s", name);
	    freez(&name);
	    }
	if (!isStrNull(probePrepKey))
	    {
	    char *name = NULL;
	    dyStringClear(query);
	    sqlDyStringPrintf(query, 
	      "select GXD_VisualizationMethod.visualization "
	      "from GXD_VisualizationMethod,GXD_ProbePrep "
	      "where GXD_ProbePrep._ProbePrep_key = %s "
	      "and GXD_ProbePrep._Visualization_key = GXD_VisualizationMethod._Visualization_key"
	      , probePrepKey);
	    name = sqlQuickStringVerbose(conn2, query->string);
	    if (name == NULL)
	        warn("Can't find visualization from _ProbePrep_key %s", probePrepKey);
	    probeColor = colorFromLabel(name, gene);
	    freez(&name);
	    if (probeColor[0] == 0)
	        {
		dyStringClear(query);
		sqlDyStringPrintf(query, 
			"select GXD_Label.label from GXD_Label,GXD_ProbePrep "
		        "where GXD_ProbePrep._ProbePrep_key = %s " 
			"and GXD_ProbePrep._Label_key = GXD_Label._Label_key"
		        , probePrepKey);
		name = sqlQuickStringVerbose(conn2, query->string);
		if (name == NULL)
		    warn("Can't find label from _ProbePrep_key %s", 
		    	probePrepKey);
		probeColor = colorFromLabel(name, gene);
		}
	    freez(&name);
	    }
	if (!isStrNull(antibodyPrepKey) && probeColor[0] == 0 )
	    {
	    char *name = NULL;
	    dyStringClear(query);
	    sqlDyStringPrintf(query, 
		  "select GXD_Label.label from GXD_Label,GXD_AntibodyPrep "
		  "where GXD_AntibodyPrep._AntibodyPrep_key = %s "
		  "and GXD_AntibodyPrep._Label_key = GXD_Label._Label_key"
		  , antibodyPrepKey);
	    name = sqlQuickStringVerbose(conn2, query->string);
	    if (name == NULL)
		warn("Can't find label from _AntibodyPrep_key %s", antibodyPrepKey);
	    probeColor = colorFromLabel(name, gene);
	    freez(&name);
	    }
	}

    /* Get abName, abTaxon, abSubmitId */
    abTaxon[0] = 0;
    if (!isStrNull(antibodyPrepKey))
        {
	struct sqlResult *sr = NULL;
	int orgKey = 0;
	char **row;
	dyStringClear(query);
	sqlDyStringPrintf(query, 
		"select antibodyName,_Organism_key,GXD_Antibody._Antibody_key "
		"from GXD_AntibodyPrep,GXD_Antibody "
		"where GXD_AntibodyPrep._AntibodyPrep_key = %s "
		"and GXD_AntibodyPrep._Antibody_key = GXD_Antibody._Antibody_key"
		, antibodyPrepKey);
	sr = sqlGetResultVerbose(conn2, query->string);
	row = sqlNextRow(sr);
	if (row != NULL)
	    {
	    abName = cloneString(row[0]);
	    orgKey = atoi(row[1]);
	    abSubmitId = cloneString(row[2]);
	    }
	sqlFreeResult(&sr);

	if (orgKey > 0)
	    {
	    char *latinName = NULL, *commonName = NULL;
	    int spTaxon = 0;
	    dyStringClear(query);
	    sqlDyStringPrintf(query, "select latinName from MGI_Organism "
	                          "where _Organism_key = %d", orgKey);
	    latinName = sqlQuickStringVerbose(conn2, query->string);
	    if (latinName != NULL 
		&& !sameString(latinName, "Not Specified")
		&& !sameString(latinName, "Not Applicable"))
		{
		char *e = strchr(latinName, '/');
		if (e != NULL) 
		   *e = 0;	/* Chop off / and after. */
		spTaxon = spBinomialToTaxon(connSp, latinName);
		}
	    else
	        {
		dyStringClear(query);
		sqlDyStringPrintf(query, "select commonName from MGI_Organism "
	                          "where _Organism_key = %d", orgKey);
		commonName = sqlQuickStringVerbose(conn2, query->string);
		if (commonName != NULL 
		    && !sameString(commonName, "Not Applicable")
		    && !sameString(commonName, "Not Specified"))
		    {
		    spTaxon = spCommonToTaxon(connSp, commonName);
		    }
		}
	    if (spTaxon != 0)
	        safef(abTaxon, sizeof(abTaxon), "%d", spTaxon);
	    freez(&latinName);
	    freez(&commonName);
	    }
	}
    if (abName == NULL)
        abName = cloneString("");
    if (abSubmitId == NULL)
        abSubmitId = cloneString("");

    /* Get rPrimer, lPrimer */
    if (!isStrNull(probePrepKey))
        {
	struct sqlResult *sr = NULL;
	char **row;
	dyStringClear(query);
	sqlDyStringPrintf(query,
	    "select primer1sequence,primer2sequence "
	    "from PRB_Probe,GXD_ProbePrep "
	    "where PRB_Probe._Probe_key = GXD_ProbePrep._Probe_key "
	    "and GXD_ProbePrep._ProbePrep_key = %s"
	    , probePrepKey);
	sr = sqlGetResultVerbose(conn2, query->string);
	row = sqlNextRow(sr);
	if (row != NULL)
	    {
	    fPrimer = cloneString(row[0]);
	    rPrimer = cloneString(row[1]);
	    }
	sqlFreeResult(&sr);
	}

    /* Note Jackson database actually stores the primers very
     * erratically.  In all the cases I can find for in situs
     * the primers are actually stored in free text in the PRB_Notes
     * e.g.  ... primers CGCGGATCCAGGGGAAACAGAAGGGCTGCG and CCCAAGCTTAGACTGTACAGGCTGAGCC ...
     */
    if (fPrimer == NULL || fPrimer[0]==0)
        {
	struct sqlResult *sr = NULL;
	char **row;
	dyStringClear(query);
	sqlDyStringPrintf(query,
	    "select PRB_Notes.note from GXD_ProbePrep, PRB_Notes"
	    " where GXD_ProbePrep._ProbePrep_key = %s"
	    "  and GXD_ProbePrep._Probe_key = PRB_Notes._Probe_key"
	    " order by PRB_Notes.sequenceNum"
	    , probePrepKey);
	sr = sqlGetResultVerbose(conn2, query->string);
	dyStringClear(probeNotes);
	while ((row = sqlNextRow(sr)) != NULL)
	   dyStringAppend(probeNotes, row[0]);
	sqlFreeResult(&sr);

	if (probeNotes->stringSize > 0)
	    {
	    char f[256];
	    char r[256];
	    int i = 0;
	    char *s = strstr(probeNotes->string," primers ");
	    if (s)
		{
		s += strlen(" primers ");
		i = 0;
		while (strchr("ACGT",*s) && (i<sizeof(f)))
		    f[i++] = *s++;
		f[i]=0;
		if (strstr(s," and ")==s)
		    {
		    s += strlen(" and ");
		    i = 0;
    		    while (strchr("ACGT",*s) && (i<sizeof(r)))
    			r[i++] = *s++;
    		    r[i]=0;
		    if (strlen(f) >= 10 && strlen(r) >= 10)
			{
			fPrimer = cloneString(f);
			rPrimer = cloneString(r);
			}
		    else
			{
			verbose(1, "bad primer parse:_ProbePrep_key=%s fPrimer=[%s], rPrimer=[%s]\n",
			    probePrepKey,f,r);
			}
		    }
		}
	    }
	}
	
    if (fPrimer == NULL)
        fPrimer = cloneString("");
    if (rPrimer == NULL)
        rPrimer = cloneString("");

    fixation = blankOutUnknown(fixation);
    embedding = blankOutUnknown(embedding);

    /* Massage body part and slice type.  We only handle whole mounts. */
    if (sameString(sliceType, "whole mount"))
	{
	bodyPart = "whole";
	priority = "100";
	}
    else
	{
        sliceType = "";
	priority = "1000";
	}

    genotypeAndStrainFromKey(genotypeKey, conn2, &genotype, &strain);

    if (isStrNull(paneLabel))
	paneLabel = cloneString("");	  /* trying to suppress nulls in output */
    stripChar(paneLabel, '"');	/* Get rid of a difficult quote to process. */
    
    /* Fetch image dimensions from file. */
    imageWidth=0;
    imageHeight=0;
    safef(path, sizeof(path), "%s/%s.jpg", inJax, fileKey);
    if (fileExists(path))
    	jpegSize(path,&imageWidth,&imageHeight);  /* will errAbort if no valid .jpeg exists */
    else
	warn("Picture Missing! %s ",path);
    
    /* Deal caption if any.  Most of the work only happens the
     * first time see the image. */
    if (!hashLookup(uniqImageHash, imageKey))
        {
	struct sqlResult *sr = NULL;
	char **row;
	hashAdd(uniqImageHash, imageKey, NULL);
	dyStringClear(caption);
	dyStringClear(query);
	sqlDyStringPrintf(query, 
	     "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType "
	     "where MGI_Note._Object_key = %s "
	     "and ACC_MGIType.name = 'Image' "
	     "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key "
	     "and MGI_NoteType.noteType='Caption' "
	     "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key "
	     "and MGI_Note._Note_key = MGI_NoteChunk._Note_key "
	     "order by sequenceNum"
	     , imageKey);
	sr = sqlGetResultVerbose(conn2, query->string);
	while ((row = sqlNextRow(sr)) != NULL)
	   dyStringAppend(caption, row[0]);
	sqlFreeResult(&sr);

	if (caption->stringSize > 0)
	    {
	    subChar(caption->string, '\t', ' ');
	    subChar(caption->string, '\n', ' ');
	    fprintf(cap, "%s\t%s\n", imageKey, caption->string);
	    hashAdd(captionHash, imageKey, imageKey);
	    }
	}
    if (hashLookup(captionHash, imageKey))
        captionId = imageKey;
    else
        captionId = "";

    fprintf(tab, "%s\t", gene);
    fprintf(tab, "%s\t", probeColor);
    fprintf(tab, "%s\t", sex);
    fprintf(tab, "%3.2f\t", calcAge);
    fprintf(tab, "%s\t", ageMin);
    fprintf(tab, "%s\t", ageMax);
    fprintf(tab, "%s\t", paneLabel);   /* may have to change NULL to empty string or "0" ? */
    fprintf(tab, "%s.jpg\t", fileKey);
    fprintf(tab, "%s\t", imageKey);
    fprintf(tab, "%s\t", fPrimer);
    fprintf(tab, "%s\t", rPrimer);
    fprintf(tab, "%s\t", abName);
    fprintf(tab, "%s\t", abTaxon);
    fprintf(tab, "%s\t", abSubmitId);
    fprintf(tab, "%s\t", fixation);
    fprintf(tab, "%s\t", embedding);
    fprintf(tab, "%s\t", bodyPart);
    fprintf(tab, "%s\t", sliceType);
    fprintf(tab, "%s\t", genotype);
    fprintf(tab, "%s\t", strain);
    fprintf(tab, "%s\t", priority);
    fprintf(tab, "%s\t", captionId);
    fprintf(tab, "%d\t", imageWidth);
    fprintf(tab, "%d\n", imageHeight);

    printExpression(tab,  conn2,  imagePaneKey, assayKey);
    gotAny = TRUE;
    freez(&genotype);
    freez(&abName);
    freez(&abSubmitId);
    freez(&rPrimer);
    freez(&fPrimer);
    }
sqlFreeResult(&sr);

carefulClose(&ra);
carefulClose(&tab);
carefulClose(&cap);

if (!gotAny)
    {
    remove(raName);
    remove(capName);
    remove(tabName);
    }
dyStringFree(&probeNotes);
dyStringFree(&copyright);
dyStringFree(&caption);
dyStringFree(&query);
hashFree(&uniqImageHash);
hashFree(&captionHash);

}
Ejemplo n.º 28
0
char *gsS3Upload(char *s3UploadUrl, char *inputFileName, off_t contentLength, char *base64Md5, char *hexMd5, char *contentType, boolean progress, char *fileName)
/* call s3 upload */
{
// S3 UPLOAD  to Amazon Storage

struct dyString *reqExtra = newDyString(256);
dyStringPrintf(reqExtra, "Content-Length: %lld\r\n", (long long)contentLength);
dyStringPrintf(reqExtra, "Content-MD5: %s\r\n", base64Md5);
dyStringPrintf(reqExtra, "Content-Type: %s\r\n", contentType);

int sd = netOpenHttpExt(s3UploadUrl, "PUT", reqExtra->string);
if (sd < 0)
    errAbort("failed to open socket for [%s]", s3UploadUrl);


unsigned char buffer[S3UPBUFSIZE];
int bufRead = 0;
FILE *f = mustOpen(inputFileName,"rb");
off_t totalUploaded = 0;
int lastPctUploaded = -1;
// upload the file contents
while ((bufRead = fread(&buffer, 1, S3UPBUFSIZE, f)) > 0) 
    {
    int bufWrite = 0;
    while (bufWrite < bufRead)
	{
	int socketWrite = write(sd, buffer + bufWrite, bufRead - bufWrite);
	if (socketWrite == -1)
	    {
	    if (errno == 32) // broken pipe often happens when the ssh connection shuts down or has errors.
		{
		warn("broken pipe, S3 server closed the ssh connection.");
		break;
		}
	    errnoAbort("error writing to socket for GenomeSpace upload");
	    }
	bufWrite += socketWrite;
	}
    if (errno == 32)
	break;
    totalUploaded += bufRead;
    int pctUploaded = 100.0*totalUploaded/contentLength;
    if (progress && (pctUploaded != lastPctUploaded))
	{

	char nicenumber[1024]="";
	sprintWithGreekByte(nicenumber, sizeof(nicenumber), contentLength);

	// Various global flags must be reset to draw a fresh html output page.
	webHeadAlreadyOutputed = FALSE;
	webInTextMode = FALSE;
	includedResourceFiles = NULL;
	htmlWarnBoxSetUpAlready=FALSE;

	htmlOpen("Uploading Output to GenomeSpace");

	printf("Name: %s<br>\n", fileName);
	printf("Size: %s<br>\n", nicenumber);
	printf("Progress: %0d%%<br>\n", pctUploaded);
	printf("<br>\n");

	printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>\n"
	    "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Back\" >"
	    "<INPUT TYPE=SUBMIT NAME=\"Refresh\" VALUE=\"Refresh\" onclick='window.location=window.location;return false;' >"
	    "</FORM>\n"
	    , hgtaDoMainPage);
	puts("<script type=\"text/javascript\">");
	puts("<!--");
	puts("setTimeout(\"location = location;\",5000);");
	puts("-->");
	puts("</script>");

	htmlClose();
	fflush(stdout);

	lastPctUploaded = pctUploaded;

	}
    }

carefulClose(&f);

char *responseCode = NULL;
char *s3UploadResponse = parseResponse(sd, &responseCode);
if (!sameString(responseCode, "200 OK"))
    errAbort("Amazon S3 Response: %s", responseCode);

dyStringFree(&reqExtra);

return s3UploadResponse;

}
Ejemplo n.º 29
0
void testDbFilters(struct htmlPage *dbPage, char *org, char *db, 
	char *accColumn, struct slName *geneList)
/* Test filter that returns just geneList. */
{
struct slName *gene;
int rowCount;
char accFilter[256];

/* Start out with filter page. */
struct htmlPage *page = quickSubmit(dbPage, NULL, org, db, accColumn, NULL,
	"accOneFilterPage", advFilterVarName, "on");
verbose(1, "testFilters %s %s\n", org, db);
if (page == NULL)
    return;

/* Set up to filter exactly one gene. */
safef(accFilter, sizeof(accFilter), "near.as.%s.wild", accColumn);
    {
    htmlPageSetVar(page, NULL, accFilter, geneList->name);
    htmlPageSetVar(page, NULL, searchVarName, geneList->name);
    serialSubmit(&page, NULL, org, db, accColumn, geneList->name,
	    "accOneFilterSubmit", "Submit", "on");
    if (page == NULL)
	return;

    /* Make sure really got one gene. */
    rowCount = nearCountUniqAccRows(page);
    if (rowCount != 1)
	{
	qaStatusSoftError(nearTestList->status, 
	    "Acc exact filter returned %d items", rowCount);
	}
    }

/* Set up filter for all genes in list. */
    {
    struct dyString *dy = newDyString(0);
    int geneCount = slCount(geneList);
    for (gene = geneList; gene != NULL; gene = gene->next)
	dyStringPrintf(dy, "%s ", gene->name);
    htmlPageSetVar(page, NULL, accFilter, dy->string);
    htmlPageSetVar(page, NULL, countVarName, "all");  /* despite 3 genes requested, must see all if many dupes */
    serialSubmit(&page, NULL, org, db, accColumn, dy->string,
	    "accMultiFilterSubmit", "Submit", "on");
    dyStringFree(&dy);
    if (page == NULL)
	return;
    rowCount = nearCountUniqAccRows(page);
    if (rowCount != geneCount)
	{
	qaStatusSoftError(nearTestList->status, 
	    "Acc multi filter expecting %d, got %d items", geneCount, rowCount);
	}
    }

/* Set up filter for wildcard in list. */
    {
    struct dyString *dy = newDyString(0);
    char len = strlen(geneList->name);
    dyStringAppendN(dy, geneList->name, len-1);
    dyStringAppendC(dy, '*');
    htmlPageSetVar(page, NULL, accFilter, dy->string);
    serialSubmit(&page, NULL, org, db, accColumn, dy->string,
	    "accWildFilterSubmit", "Submit", "on");
    dyStringFree(&dy);
    if (page == NULL)
	return;
    rowCount = nearCountRows(page);
    if (rowCount < 1)
	{
	qaStatusSoftError(nearTestList->status, 
	    "Acc wild filter no match");
	}
    }


/* Clear out advanced filters. */
    {
    htmlPageFree(&page);
    page = quickSubmit(dbPage, NULL, org, db, NULL, NULL,
	"advFilterClear", advFilterClearVarName, "on");
    }
htmlPageFree(&page);
}
static void doBlat(struct sqlConnection *conn, int taxon, char *db)
/* place probe seq from non-BAC with blat that have no alignments yet */
{
int rc = 0;
char *blatSpec=NULL;
char cmdLine[256];
char path1[256];
char path2[256];
struct dyString *dy = dyStringNew(0);

/* (non-BACs needing alignment) */
dyStringClear(dy);
dyStringPrintf(dy, 
    "select concat(\"vgPrb_\",e.id), e.seq"
    " from vgPrb e, vgPrbAli a"
    " where e.id = a.vgPrb"
    " and a.db = '%s'"
    " and a.status = 'new'"
    " and e.taxon = %d"
    " and e.type <> 'bac'"
    " and e.seq <> ''"
    " order by e.id"
    , db, taxon);
//restore: 
rc = sqlSaveQuery(conn, dy->string, "blat.fa", TRUE);
verbose(1,"rc = %d = count of sequences for blat, to get psls for taxon %d\n",rc,taxon);

if (rc == 0) 
    {
    unlink("blat.fa");
    system("rm -f blatNearBest.psl; touch blatNearBest.psl");  /* make empty file */
    return;
    }

/* make .ooc and blat on kolossus */

safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db);
safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db);
//restore: 
verbose(1,"copy: [%s] to [%s]\n",path1,path2);  copyFile(path1,path2);

safef(cmdLine,sizeof(cmdLine),
"ssh kolossus 'cd %s; blat -makeOoc=11.ooc -tileSize=11"
" -repMatch=1024 %s.2bit /dev/null /dev/null'",
    getCurrentDir(),db);
//restore: 
system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");

safef(cmdLine,sizeof(cmdLine),
	"ssh kolossus 'cd %s; blat %s.2bit blat.fa -ooc=11.ooc -noHead blat.psl'",
	getCurrentDir(),db);
//restore: 
system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");

/* using blat even with -fastMap was way too slow - took over a day,
 * so instead I will make a procedure to write a fake psl for the BACs
 * which you will see called below */

safef(path2,sizeof(path2),"%s.2bit",db);
verbose(1,"rm %s\n",path2); unlink(path2); 

safef(path2,sizeof(path2),"11.ooc");
verbose(1,"rm %s\n",path2); unlink(path2); 


/* skip psl header and sort on query name */
safef(cmdLine,sizeof(cmdLine), "sort -k 10,10 blat.psl > blatS.psl");
verbose(1,"cmdLine=[%s]\n",cmdLine);
system(cmdLine); 

/* keep near best within 5% of the best */
safef(cmdLine,sizeof(cmdLine), 
    "pslCDnaFilter -globalNearBest=0.005 -minId=0.96 -minNonRepSize=20 -minCover=0.50"
    " blatS.psl blatNearBest.psl");
verbose(1,"cmdLine=[%s]\n",cmdLine);
system(cmdLine); 

unlink("blat.fa");
unlink("blat.psl");
unlink("blatS.psl");

freez(&blatSpec);
dyStringFree(&dy);
}