Beispiel #1
0
struct dyString *lineFileSlurpHttpBody(struct lineFile *lf,
				       boolean chunked, int contentLength)
/* Return a dyString that contains the http response body in lf.  Handle
 * chunk-encoding and content-length. */
{
  struct dyString *body = newDyString(64*1024);
  char *line;
  int lineSize;

  dyStringClear(body);
  if (chunked)
    {
      /* Handle "Transfer-Encoding: chunked" body */
      /* Procedure from RFC2068 section 19.4.6 */
      char *csword;
      unsigned chunkSize = 0;
      unsigned size;
      do
	{
	  /* Read line that has chunk size (in hex) as first word. */
	  if (lineFileNext(lf, &line, NULL))
	    csword = nextWord(&line);
	  else break;
	  if (sscanf(csword, "%x", &chunkSize) < 1)
	    {
	      warn("%s: chunked transfer-encoding chunk size parse error.\n",
		   lf->fileName);
	      break;
	    }
	  /* If chunk size is 0, read in a blank line & then we're done. */
	  if (chunkSize == 0)
	    {
	      lineFileNext(lf, &line, NULL);
	      if (line == NULL || (line[0] != '\r' && line[0] != 0))
		warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
		     lf->fileName, line);

	      break;
	    }
	  /* Read (and save) lines until we have read in chunk. */
	  for (size = 0;  size < chunkSize;  size += lineSize)
	    {
	      if (! lineFileNext(lf, &line, &lineSize))
		break;
	      dyStringAppendN(body, line, lineSize-1);
	      dyStringAppendC(body, '\n');
	    }
	  /* Read blank line - or extra CRLF inserted in the middle of the
	   * current line, in which case we need to trim it. */
	  if (size > chunkSize)
	    {
	      body->stringSize -= (size - chunkSize);
	      body->string[body->stringSize] = 0;
	    }
	  else if (size == chunkSize)
	    {
	      lineFileNext(lf, &line, NULL);
	      if (line == NULL || (line[0] != '\r' && line[0] != 0))
		warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
		     lf->fileName, line);
	    }
	} while (chunkSize > 0);
      /* Try to read in next line.  If it's an HTTP header, put it back. */
      /* If there is a next line but it's not an HTTP header, it's a footer. */
      if (lineFileNext(lf, &line, NULL))
	{
	  if (startsWith("HTTP/", line))
	    lineFileReuse(lf);
	  else
	    {
	      /* Got a footer -- keep reading until blank line */
	      warn("%s: chunked transfer-encoding: got footer %s, discarding it.\n",
		   lf->fileName, line);
	      while (lineFileNext(lf, &line, NULL))
		{
		  if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
		    break;
		  warn("discarding footer line: %s\n", line);
		}
	    }
	}
    }
  else if (contentLength >= 0)
    {
      /* Read in known length */
      int size;
      for (size = 0;  size < contentLength;  size += lineSize)
	{
	  if (! lineFileNext(lf, &line, &lineSize))
	    break;
	  dyStringAppendN(body, line, lineSize-1);
	  dyStringAppendC(body, '\n');
	}
    }
  else
    {
      /* Read in to end of file (assume it's not a persistent connection) */
      while (lineFileNext(lf, &line, &lineSize))
	{
	  dyStringAppendN(body, line, lineSize-1);
	  dyStringAppendC(body, '\n');
	}
    }

  return(body);
} /* lineFileSlurpHttpBody */
Beispiel #2
0
void initStep(struct sqlConnection *conn, struct stepInit *init)
/* Create step based on initializer */
{
/* Do a little validation on while counting up inputs and outputs */
int inCount = commaSepCount(init->inputTypes);
int matchCount = commaSepCount(init->inputFormats);
if (inCount != matchCount)
    errAbort("inputTypes has %d elements but inputFormats has %d in step %s", 
	    inCount, matchCount, init->name);
int outCount = commaSepCount(init->outputTypes);
matchCount = commaSepCount(init->outputFormats);
if (outCount != matchCount)
    errAbort("outputTypes has %d elements but outputFormats has %d in step %s", 
	    outCount, matchCount, init->name);
matchCount = commaSepCount(init->outputNamesInTempDir);
if (outCount != matchCount)
    errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", 
	    outCount, matchCount, init->name);

struct dyString *query = dyStringNew(0);
dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name);
int existingCount = sqlQuickNum(conn, query->string);
if (existingCount > 0)
    {
    warn("%s already exists in eapStep", init->name);
    dyStringFree(&query);
    return;
    }

/* Parse out software part and make sure that all pieces are there. */
char **softwareArray;
int softwareCount;
sqlStringDynamicArray(init->software, &softwareArray, &softwareCount);
unsigned softwareIds[softwareCount];
int i;
for (i=0; i<softwareCount; ++i)
    {
    char *name = softwareArray[i];
    dyStringClear(query);
    dyStringPrintf(query, "select id from eapSoftware where name='%s'", name);
    unsigned softwareId = sqlQuickNum(conn, query->string);
    if (softwareId == 0)
        errAbort("Software %s doesn't exist by that name in eapSoftware", name);
    softwareIds[i] = softwareId;
    }

/* Make step record. */
dyStringClear(query);
dyStringAppend(query,
	"insert eapStep (name,cpusRequested,"
        " inCount,inputTypes,inputFormats,"
	" outCount,outputNamesInTempDir,outputTypes,outputFormats)"
	" values (");
dyStringPrintf(query, "'%s',", init->name);
dyStringPrintf(query, "%d,", init->cpusRequested);
dyStringPrintf(query, "%d,", inCount);
dyStringPrintf(query, "'%s',", init->inputTypes);
dyStringPrintf(query, "'%s',", init->inputFormats);
dyStringPrintf(query, "%d,", outCount);
dyStringPrintf(query, "'%s',", init->outputNamesInTempDir);
dyStringPrintf(query, "'%s',", init->outputTypes);
dyStringPrintf(query, "'%s'", init->outputFormats);
dyStringPrintf(query, ")");
sqlUpdate(conn, query->string);

/* Make software/step associations. */
for (i=0; i<softwareCount; ++i)
    {
    dyStringClear(query);
    dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')",
	    init->name, softwareArray[i]);
    sqlUpdate(conn, query->string);
    }

/* Force step version stuff to be made right away */
eapCurrentStepVersion(conn, init->name);

/* Clean up. */
dyStringFree(&query);
freez(&softwareArray[0]);
freez(&softwareArray);
}
void bioImageLoad(char *setRaFile, char *itemTabFile)
/* bioImageLoad - Load data into bioImage database. */
{
struct hash *raHash = raReadSingle(setRaFile);
struct hash *rowHash;
struct lineFile *lf = lineFileOpen(itemTabFile, TRUE);
char *line, *words[256];
struct sqlConnection *conn = sqlConnect(database);
int rowSize;
int submissionSetId;
struct hash *fullDirHash = newHash(0);
struct hash *screenDirHash = newHash(0);
struct hash *thumbDirHash = newHash(0);
struct hash *treatmentHash = newHash(0);
struct hash *bodyPartHash = newHash(0);
struct hash *sliceTypeHash = newHash(0);
struct hash *imageTypeHash = newHash(0);
struct hash *sectionSetHash = newHash(0);
struct dyString *dy = dyStringNew(0);

/* Read first line of tab file, and from it get all the field names. */
if (!lineFileNext(lf, &line, NULL))
    errAbort("%s appears to be empty", lf->fileName);
if (line[0] != '#')
    errAbort("First line of %s needs to start with #, and then contain field names",
    	lf->fileName);
rowHash = hashRowOffsets(line+1);
rowSize = rowHash->elCount;
if (rowSize >= ArraySize(words))
    errAbort("Too many fields in %s", lf->fileName);

/* Check that have all required fields */
    {
    char *fieldName;
    int i;

    for (i=0; i<ArraySize(requiredSetFields); ++i)
        {
	fieldName = requiredSetFields[i];
	if (!hashLookup(raHash, fieldName))
	    errAbort("Field %s is not in %s", fieldName, setRaFile);
	}

    for (i=0; i<ArraySize(requiredItemFields); ++i)
        {
	fieldName = requiredItemFields[i];
	if (!hashLookup(rowHash, fieldName))
	    errAbort("Field %s is not in %s", fieldName, itemTabFile);
	}

    for (i=0; i<ArraySize(requiredFields); ++i)
        {
	fieldName = requiredFields[i];
	if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName))
	    errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile);
	}
    }

/* Create/find submission record. */
submissionSetId = saveSubmissionSet(conn, raHash);

/* Process rest of tab file. */
while (lineFileNextRowTab(lf, words, rowSize))
    {
    int fullDir = cachedId(conn, "location", "name", 
    	fullDirHash, "fullDir", raHash, rowHash, words);
    int screenDir = cachedId(conn, "location", "name", 
    	screenDirHash, "screenDir", raHash, rowHash, words);
    int thumbDir = cachedId(conn, "location", 
    	"name", thumbDirHash, "thumbDir", raHash, rowHash, words);
    int bodyPart = cachedId(conn, "bodyPart", 
    	"name", bodyPartHash, "bodyPart", raHash, rowHash, words);
    int sliceType = cachedId(conn, "sliceType", 
    	"name", sliceTypeHash, "sliceType", raHash, rowHash, words);
    int imageType = cachedId(conn, "imageType", 
    	"name", imageTypeHash, "imageType", raHash, rowHash, words);
    int treatment = cachedId(conn, "treatment", 
    	"conditions", treatmentHash, "treatment", raHash, rowHash, words);
    char *fileName = getVal("fileName", raHash, rowHash, words, NULL);
    char *submitId = getVal("submitId", raHash, rowHash, words, NULL);
    char *taxon = getVal("taxon", raHash, rowHash, words, NULL);
    char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL);
    char *age = getVal("age", raHash, rowHash, words, NULL);
    char *sectionSet = getVal("sectionSet", raHash, rowHash, words, "");
    char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0");
    char *gene = getVal("gene", raHash, rowHash, words, "");
    char *locusLink = getVal("locusLink", raHash, rowHash, words, "");
    char *refSeq = getVal("refSeq", raHash, rowHash, words, "");
    char *genbank = getVal("genbank", raHash, rowHash, words, "");
    char *priority = getVal("priority", raHash, rowHash, words, "200");
    int sectionId = 0;
    int oldId;
    // char *xzy = getVal("xzy", raHash, rowHash, words, xzy);

    if (sectionSet[0] != 0 && !sameString(sectionSet, "0"))
        {
	struct hashEl *hel = hashLookup(sectionSetHash, sectionSet);
	if (hel != NULL)
	    sectionId = ptToInt(hel->val);
	else
	    {
	    sqlUpdate(conn, "insert into sectionSet values(default)");
	    sectionId = sqlLastAutoId(conn);
	    hashAdd(sectionSetHash, sectionSet, intToPt(sectionId));
	    }
	}

    dyStringClear(dy);
    dyStringAppend(dy, "select id from image ");
    dyStringPrintf(dy, "where fileName = '%s' ", fileName);
    dyStringPrintf(dy, "and fullLocation = %d",  fullDir);
    oldId = sqlQuickNum(conn, dy->string);
    if (oldId != 0)
        {
	if (replace)
	    {
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from image where id = %d", oldId);
	    sqlUpdate(conn, dy->string);
	    }
	else
	    errAbort("%s is already in database line %d of %s", 
	    	fileName, lf->lineIx, lf->fileName);
	}

    dyStringClear(dy);
    dyStringAppend(dy, "insert into image set\n");
    dyStringPrintf(dy, " id = default,\n");
    dyStringPrintf(dy, " fileName = '%s',\n", fileName);
    dyStringPrintf(dy, " fullLocation = %d,\n", fullDir);
    dyStringPrintf(dy, " screenLocation = %d,\n", screenDir);
    dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir);
    dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId);
    dyStringPrintf(dy, " sectionSet = %d,\n", sectionId);
    dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx);
    dyStringPrintf(dy, " submitId = '%s',\n", submitId);
    dyStringPrintf(dy, " gene = '%s',\n", gene);
    dyStringPrintf(dy, " locusLink = '%s',\n", locusLink);
    dyStringPrintf(dy, " refSeq = '%s',\n", refSeq);
    dyStringPrintf(dy, " genbank = '%s',\n", genbank);
    dyStringPrintf(dy, " priority = %s,\n", priority);
    dyStringPrintf(dy, " taxon = %s,\n", taxon);
    dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo);
    dyStringPrintf(dy, " age = %s,\n", age);
    dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart);
    dyStringPrintf(dy, " sliceType = %d,\n", sliceType);
    dyStringPrintf(dy, " imageType = %d,\n", imageType);
    dyStringPrintf(dy, " treatment = %d\n", treatment);

    sqlUpdate(conn, dy->string);
    }
}
Beispiel #4
0
boolean xpParseNext(struct xp *xp, char *tag)
/* Skip through file until get given tag.  Then parse out the
 * tag and all of it's children (calling atStartTag/atEndTag).
 * You can call this repeatedly to process all of a given tag
 * in file. */

{
char c;
int i, attCount = 0;
struct dyString *text = NULL;
boolean isClosed;
boolean inside = (tag == NULL);
struct xpStack *initialStack = xp->stack;

for (;;)
    {
    /* Load up text until next tag. */
    for (;;)
        {
	if ((c = xpGetChar(xp)) == 0)
	    return FALSE;
	if (c == '<')
	    break;
	if (c == '&')
	   xpLookup(xp, xp->endTag, text);
	else 
	    {
	    if (c == '\n')
		++xp->lineIx;
	    if (text != NULL)
		dyStringAppendC(text, c);
	    }
	}

    /* Get next character to figure out what type of tag. */
    c = xpGetChar(xp);
    if (c == 0)
       xpError(xp, "End of file inside tag");
    else if (c == '?' || c == '!')
        xpEatComment(xp, c);
    else if (c == '/')  /* Closing tag. */
        {
	struct xpStack *stack = xp->stack;
	if (stack >= xp->stackBufEnd)
	    xpError(xp, "Extra end tag");
	xpParseEndTag(xp, stack->tag->string);
	if (inside)
	    xp->atEndTag(xp->userData, stack->tag->string, stack->text->string);
	xp->stack += 1;
	if (xp->stack == initialStack)
	    return TRUE;
	}
    else	/* Start tag. */
        {
	/* Push new frame on stack and check for overflow and unallocated strings. */
	struct xpStack *stack = --xp->stack;
	if (stack < xp->stackBuf)
	    xpError(xp, "Stack overflow");
	if (stack->tag == NULL)
	    stack->tag = newDyString(32);
	else
	    dyStringClear(stack->tag);
	if (stack->text == NULL)
	    stack->text = newDyString(256);
	else
	    dyStringClear(stack->text);
	text = stack->text;

	/* Parse the start tag. */
	xpUngetChar(xp);
	xpParseStartTag(xp, ArraySize(xp->attDyBuf), stack->tag, 
		&attCount, xp->attDyBuf, &isClosed);

	if (!inside && sameString(stack->tag->string, tag))
	    {
	    inside = TRUE;
	    initialStack = xp->stack + 1;
	    }

	/* Call user start function, and if closed tag, end function too. */
	if (inside)
	    {
	    /* Unpack attributes into simple array of strings. */
	    for (i=0; i<attCount; ++i)
		xp->attBuf[i] = xp->attDyBuf[i]->string;
	    xp->attBuf[attCount] = NULL;
	    xp->atStartTag(xp->userData, stack->tag->string, xp->attBuf);
	    }
	if (isClosed)
	    {
	    if (inside)
		xp->atEndTag(xp->userData, stack->tag->string, stack->text->string);
	    xp->stack += 1;
	    if (xp->stack == initialStack)
		return TRUE;
	    }
	}
    }
}
struct tagStorm *idfToStormTop(char *fileName)
/* Convert an idf.txt format file to a tagStorm with a single top-level stanza */
{
/* Create a tag storm with one as yet empty stanza */
struct tagStorm *storm = tagStormNew(fileName);
struct tagStanza *stanza = tagStanzaNew(storm, NULL);

/* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */
char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data";
struct dyString *additionalFileDy = dyStringNew(0);

/* There can be multiple secondary accession tags, so handle these too */
char *secondaryAccessionTag = "idf.Comment_SecondaryAccession";
struct dyString *secondaryAccessionDy = dyStringNew(0);


/* Parse lines from idf file into stanza */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct dyString *dyVal = dyStringNew(0);
while (lineFileNextReal(lf, &line))
    {
    /* Erase trailing tab... */
    eraseTrailingSpaces(line);

    /* Parse line into tab-separated array and make sure it's a reasonable size */
    char *row[256];
    int rowSize = chopTabs(line, row);
    if (rowSize == ArraySize(row))
        errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName);
    if (rowSize < 2)
	continue;

    /* Convert first element to tagName */
    char tagName[256];
    aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName));

    /* Special case where we already are a comma separated list */
    if (sameString(tagName, "idf.Publication_Author_List"))
        {
	tagStanzaAppend(storm, stanza, tagName, row[1]);
	}
    else if (startsWith(additionalFilePrefix, tagName))
        {
	csvEscapeAndAppend(additionalFileDy, row[1]);
	}
    else if (sameString(secondaryAccessionTag, tagName))
        {
	csvEscapeAndAppend(secondaryAccessionDy, row[1]);
	}
    else
	{
	/* Convert rest of elements to possibly comma separated values */
	dyStringClear(dyVal);
	int i;
	for (i=1; i<rowSize; ++i)
	    csvEscapeAndAppend(dyVal, row[i]);
	tagStanzaAppend(storm, stanza, tagName, dyVal->string);
	}
    }
if (additionalFileDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string);
if (secondaryAccessionDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string);
dyStringFree(&secondaryAccessionDy);
dyStringFree(&additionalFileDy);
dyStringFree(&dyVal);
lineFileClose(&lf);
return storm;
}
static int doBacs(struct sqlConnection *conn, int taxon, char *db)
/* fetch available sequence for bacEndPairs */
{
struct dyString *dy = dyStringNew(0);
struct dnaSeq *chromSeq = NULL;
struct bac *bacs = bacRead(conn, taxon, db);
struct bac *bac = NULL;
char *chrom = cloneString("");
int count = 0;

verbose(1,"bac list read done.\n");

for(bac=bacs;bac;bac=bac->next)
    {
    
    if (differentWord(chrom,bac->chrom))
	{
	verbose(1,"switching to chrom %s\n",bac->chrom);
	dnaSeqFree(&chromSeq); 
	chromSeq = hLoadChrom(bac->chrom,db);
	freez(&chrom);
	chrom = cloneString(bac->chrom);
	}

    char *dna = checkAndFetchBacDna(chromSeq, bac);
    if (sameString(bac->strand,"-"))
	{
	reverseComplement(dna,strlen(dna));
	}


    dyStringClear(dy);
    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",bac->probe);
    if (sqlQuickNum(conn,dy->string)>0)
	{
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", bac->chrom);
	    dyStringPrintf(dy, " tStart=%d,\n", bac->chromStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", bac->chromEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", bac->strand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", bac->probe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    //verbose(2, "%s\n", dy->string); // the sql string could be quite large
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,bac->probe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",bac->probe);
	    sqlUpdate(conn, dy->string);
	    }
	    
	++count; 
	
    
	verbose(2,"%d finished bac for probe id %d size %d\n", 
	    count, bac->probe, bac->chromEnd - bac->chromStart);
	}

    freez(&dna);
    }

freez(&chrom);
dnaSeqFree(&chromSeq);

bacFreeList(&bacs);

dyStringFree(&dy);

return count;  
}
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db)
/* process isPcr results  */
{

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("mrna.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
boolean more = lineFileNext(lf, &line, &lineSize);
while(more)
    {
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line+1);
    verbose(2,"name=%s\n",name);
    dyStringClear(dy);
    while((more=lineFileNext(lf, &line, &lineSize)))
	{
	if (line[0] == '>')
	    {
	    break;
	    }
	dyStringAppend(dy,line);	    
	}
    dna = cloneString(dy->string);

    while(1)
	{
	int oldProbe = 0;
	dyStringClear(dy);
	dyStringPrintf(dy, "select id from vgPrb "
	   "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name);
	oldProbe = sqlQuickNum(conn,dy->string);
	if (oldProbe==0)
	    break;       /* no more records match */
	    
	/* record exists and hasn't already been updated */
	
	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq = '");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " db = '%s',\n", db);
	    dyStringAppend(dy, " state = 'seq'\n");
	    dyStringPrintf(dy, " where id=%d\n", oldProbe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe);
	    sqlUpdate(conn, dy->string);
	    }
	    
	}    

    freez(&name);
    freez(&dna);
    }
lineFileClose(&lf);

dyStringFree(&dy);
}
Beispiel #8
0
void paraFlow(char *fileName, int pfArgc, char **pfArgv)
/* parse and dump. */
{
struct pfCompile *pfc;
struct pfParse *program, *module;
char baseDir[256], baseName[128], baseSuffix[64];
char defFile[PATH_LEN];
char *parseFile = "out.parse";
char *typeFile = "out.typed";
char *boundFile = "out.bound";
char *scopeFile = "out.scope";
char *foldedFile = "out.folded";
char *cFile = "out.c";
FILE *parseF = mustOpen(parseFile, "w");
FILE *typeF = mustOpen(typeFile, "w");
FILE *scopeF = mustOpen(scopeFile, "w");
FILE *boundF = mustOpen(boundFile, "w");
FILE *foldedF = mustOpen(foldedFile, "w");

if (endPhase < 0)
    return;
verbose(2, "Phase 0 - initialization\n");
pfc = pfCompileNew();
getPaths(pfc);
splitPath(fileName, baseDir, baseName, baseSuffix);
pfc->baseDir = cloneString(baseDir);
safef(defFile, sizeof(defFile), "%s%s.pfh", baseDir, baseName);

if (endPhase < 1)
   return ;
verbose(2, "Phase 1 - tokenizing\n");
pfTokenizeInto(pfc, baseDir, baseName);

if (endPhase < 2)
    return;
verbose(2, "Phase 2 - parsing\n");
program = pfParseInto(pfc);
dumpParseTree(pfc, program, parseF);
carefulClose(&parseF);

if (endPhase < 3)
    return;
verbose(2, "Phase 3 - binding names\n");
pfBindVars(pfc, program);
dumpParseTree(pfc, program, boundF);
carefulClose(&boundF);

if (endPhase < 4)
    return;
verbose(2, "Phase 4 - type checking\n");
pfTypeCheck(pfc, &program);
dumpParseTree(pfc, program, typeF);
carefulClose(&typeF);

if (endPhase < 5)
    return;
verbose(2, "Phase 5 - polymorphic, para, and flow checks\n");
checkPolymorphic(pfc, pfc->scopeRefList);
checkParaFlow(pfc, program);
printScopeInfo(scopeF, 0, program);
carefulClose(&scopeF);

if (endPhase < 6)
    return;
verbose(2, "Phase 6 - constant folding\n");
pfConstFold(pfc, program);
dumpParseTree(pfc, program, foldedF);

if (optionExists("asm"))
    {
    struct dyString *gccFiles;

    if (endPhase < 7)
	return;
    verbose(2, "Phase 7 - nothing\n");

    if (endPhase < 8)
	return;
    verbose(2, "Phase 8 - Code generation\n");

    pfc->backEnd = backEndFind("mac-pentium");
    gccFiles = asmCoder(pfc, program, baseDir, baseName);

    if (endPhase < 9)
        return;
    verbose(2, "Phase 9 - Assembling pentium code\n");
        {
	char *libName = hashMustFindVal(pfc->cfgHash,"runAsmLib");
	struct dyString *dy = dyStringNew(0);
	int err;
	dyStringPrintf(dy, "gcc ");
	dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir);
	dyStringPrintf(dy, "-o %s%s ", baseDir, baseName);
	dyStringAppend(dy, gccFiles->string);
	dyStringPrintf(dy, "%s ", libName);
	dyStringPrintf(dy, " %s ", pfc->runtimeLib);
	dyStringPrintf(dy, "%s ", pfc->jkwebLib);
	verbose(2, "%s\n", dy->string);
	err = system(dy->string);
	if (err != 0)
	    errAbort("Couldn't assemble: %s", dy->string);
	dyStringFree(&dy);
	}
    dyStringFree(&gccFiles);
    }
else
    {
    verbose(2, "Phase 7 - nothing\n");
    if (endPhase < 8)
	return;
    verbose(2, "Phase 8 - C code generation\n");
    pfCodeC(pfc, program, baseDir, cFile);
    verbose(2, "%d modules, %d tokens, %d parseNodes\n",
	pfc->moduleHash->elCount, pfc->tkz->tokenCount, pfParseCount(program));

    if (endPhase < 9)
	return;
    verbose(2, "Phase 9 - compiling C code\n");
    /* Now run gcc on it. */
	{
	struct dyString *dy = dyStringNew(0);
	int err;
	for (module = program->children; module != NULL; module = module->next)
	    {
	    if (module->name[0] != '<' && module->type != pptModuleRef)
		{
		struct pfModule *mod = hashMustFindVal(pfc->moduleHash, module->name);
		char *cName = replaceSuffix(mod->fileName, ".pf", ".c");
		char *oName = replaceSuffix(mod->fileName, ".pf", ".o");
		dyStringClear(dy);
		dyStringAppend(dy, "gcc ");
		dyStringAppend(dy, "-O ");
		dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir);
		dyStringAppend(dy, "-c ");
		dyStringAppend(dy, "-o ");
		dyStringPrintf(dy, "%s ", oName);
		dyStringPrintf(dy, "%s ", cName);
		verbose(2, "%s\n", dy->string);
		err = system(dy->string);
		if (err != 0)
		    errAbort("Couldn't compile %s.c", module->name);
		freeMem(oName);
		freeMem(cName);
		}
	    }
	dyStringClear(dy);
	dyStringAppend(dy, "gcc ");
	dyStringAppend(dy, "-O ");
	dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir);
	dyStringPrintf(dy, "-o %s%s ", baseDir, baseName);
	dyStringPrintf(dy, "%s ", cFile);
	for (module = program->children; module != NULL; module = module->next)
	    {
	    if (module->name[0] != '<')
		{
		struct pfModule *mod = hashMustFindVal(pfc->moduleHash, module->name);
		char *suffix = (module->type == pptModuleRef ? ".pfh" : ".pf");
		char *oName = replaceSuffix(mod->fileName, suffix, ".o");
		dyStringPrintf(dy, "%s ", oName);
		freeMem(oName);
		}
	    }
	dyStringPrintf(dy, " %s ", pfc->runtimeLib);
	dyStringPrintf(dy, "%s ", pfc->jkwebLib);
	dyStringAppend(dy, "-lpthread -lm");
	verbose(2, "%s\n", dy->string);
	err = system(dy->string);
	if (err != 0)
	    errnoAbort("problem compiling:\n", dy->string);
	dyStringFree(&dy);
	}

    }
if (endPhase < 10)
    return;

verbose(2, "Phase 10 - execution\n");
/* Now go run program itself. */
    {
    struct dyString *dy = dyStringNew(0);
    int err;
    int i;
    if (baseDir[0] == 0)
	dyStringPrintf(dy, "./%s", baseName);
    else
	dyStringPrintf(dy, "%s%s", baseDir, baseName);
    for (i=0; i<pfArgc; ++i)
	{
	dyStringAppendC(dy, ' ');
	dyStringAppend(dy, pfArgv[i]);
	}
    err = system(dy->string);
    if (err != 0)
	errAbort("problem running %s", baseName);
    dyStringFree(&dy);
    }
}
Beispiel #9
0
struct submitFileRow *submitFileRowFromFieldedTable(
    struct sqlConnection *conn, struct fieldedTable *table,
    int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx)
/* Turn parsed out table (still all just strings) into list of edwFiles. */
{
struct submitFileRow *sfr, *sfrList = NULL;
struct edwFile *bf;
struct fieldedRow *fr;
struct dyString *tags = dyStringNew(0);
char *ucscDbTag = "ucsc_db";
int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount);


for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char **row = fr->row;
    AllocVar(bf);
    bf->submitFileName = cloneString(row[fileIx]);
    safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]);
    bf->size = sqlLongLong(row[sizeIx]);
    bf->updateTime = sqlLongLong(row[modifiedIx]);

    /* Add as tags any fields not included in fixed fields. */
    dyStringClear(tags);
    int i;
    for (i=0; i<table->fieldCount; ++i)
        {
	if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx)
	    {
	    cgiEncodeIntoDy(table->fields[i], row[i], tags);
	    }
	}
    if (ucscDbField < 0)
        {
	/* Try to make this field up from file name */
	char *slash = strchr(bf->submitFileName, '/');
	if (slash == NULL)
	    errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName);
	int len = slash - bf->submitFileName;
	char ucscDbVal[len+1];
	memcpy(ucscDbVal, bf->submitFileName, len);
	ucscDbVal[len] = 0;

	/* Do a little check on it */
	if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal)
	    && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal)
	    && !sameString("hg19", ucscDbVal))
	    errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " 
	             "level directory in the fileName in the manifest is a UCSC database name "
		     "like 'hg19' or 'mm10.'  Alternatively please include a ucsc_db column.",
		     ucscDbVal);

	/* Add it to tags. */
	cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags);
	}
    bf->tags = cloneString(tags->string);

    /* Fake other fields. */
    bf->edwFileName  = cloneString("");

    /* Allocate wrapper structure */
    AllocVar(sfr);
    sfr->file = bf;

    /* fill in fields about replacement maybe */
    if (replacesIx != -1)
        {
	char *replacesAcc = row[replacesIx];
	char *reason = row[replaceReasonIx];
	int fileId = edwFileIdForLicensePlate(conn, replacesAcc);
	if (fileId == 0)
	    errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag);
	sfr->replaces = cloneString(replacesAcc);
	sfr->replaceReason = cloneString(reason);
	sfr->replacesFile = fileId;
	}

    slAddHead(&sfrList, sfr);
    }
slReverse(&sfrList);
dyStringFree(&tags);
return sfrList;
}
static struct slName *getProbeList(struct sqlConnection *conn, int id)
/* Get list of probes with hyperlinks to probe info page. */
{
struct slName *returnList = NULL;
char query[256];
char *sidUrl = cartSidUrlString(cart);
struct dyString *dy = dyStringNew(0);
struct slInt *probeList = NULL, *probe;
int submissionSource = 0;

/* Make up a list of all probes in this image. */
safef(query, sizeof(query),
   "select probe from imageProbe where image=%d", id);
probeList = sqlQuickNumList(conn, query);

safef(query, sizeof(query),
   "select submissionSet.submissionSource from image, submissionSet"
   " where image.submissionSet = submissionSet.id and image.id=%d", id);
submissionSource = sqlQuickNum(conn, query);

for (probe = probeList; probe != NULL; probe = probe->next)
    {
    char *type;

    /* Create hyperlink to probe page around gene name. */
    dyStringClear(dy);
    dyStringPrintf(dy, "<A HREF=\"%s?%s&%s=%d&%s=%d\" target=_parent>",
    	hgVisiGeneCgiName(), sidUrl, hgpDoProbe, probe->val, hgpSs, submissionSource);
    safef(query, sizeof(query), 
    	"select probeType.name from probeType,probe where probe.id = %d "
	"and probe.probeType = probeType.id", 
	probe->val);
    type = sqlQuickString(conn, query);
    dyStringPrintf(dy, "%s", naForEmpty(type));
    if (sameWord(type, "antibody"))
        {
	char *abName;
	safef(query, sizeof(query), 
	   "select antibody.name from probe,antibody "
	   "where probe.id = %d and probe.antibody = antibody.id"
	   , probe->val);
	abName = sqlQuickString(conn, query);
	if (abName != NULL)
	    {
	    dyStringPrintf(dy, " %s", abName);
	    freeMem(abName);
	    }
	}
    else if (sameWord(type, "RNA"))
        {
	safef(query, sizeof(query),
	    "select length(seq) from probe where id=%d", probe->val);
	if (sqlQuickNum(conn, query) > 0)
	    dyStringPrintf(dy, " sequenced");
	else
	    {
	    safef(query, sizeof(query),
		"select length(fPrimer) from probe where id=%d", probe->val);
	    if (sqlQuickNum(conn, query) > 0)
	        dyStringPrintf(dy, " from primers");
	    }
	}
    else if (sameWord(type, "BAC"))
        {
	char *name;
	safef(query, sizeof(query), 
	   "select bac.name from probe,bac "
	   "where probe.id = %d and probe.bac = bac.id"
	   , probe->val);
	name = sqlQuickString(conn, query);
	if (name != NULL)
	    {
	    dyStringPrintf(dy, " %s", name);
	    freeMem(name);
	    }
	}
    dyStringPrintf(dy, "</A>");
    freez(&type);

    /* Add to return list. */
    slNameAddTail(&returnList, dy->string);
    }

slFreeList(&probeList);
slReverse(&returnList);
return returnList;
}
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId)
/* Given gene ID, try and find known gene on browser in same
 * species. */
{
char query[256];
int taxon;
char *url = NULL;
char *genomeDb = NULL;

/* Figure out taxon. */
safef(query, sizeof(query), 
    "select taxon from gene where id = %d", geneId);
taxon = sqlQuickNum(conn, query);

genomeDb = hDbForTaxon(conn, taxon);
if (genomeDb != NULL)
    {
    /* Make sure known genes track exists - we may need
     * to tweak this at some point for model organisms. */
    safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb);
    if (!sqlTableExists(conn, query))
	genomeDb = NULL;
    }

/* If no db for that organism revert to human. */
if (genomeDb == NULL)
    genomeDb = hDefaultDb();

safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb);
if (sqlTableExists(conn, query))
    {
    struct dyString *dy = dyStringNew(0);
    char *knownGene = NULL;
    if (sqlCountColumnsInTable(conn, query) == 3)
	{
	dyStringPrintf(dy, 
	   "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId);
	}
    else
	{
	struct slName *imageList, *image;
	safef(query, sizeof(query), 
	    "select imageProbe.image from probe,imageProbe "
	    "where probe.gene=%d and imageProbe.probe=probe.id", geneId);
	imageList = sqlQuickList(conn, query);
	if (imageList != NULL)
	    {
	    dyStringPrintf(dy, 
	       "select name from %s.knownToVisiGene ", genomeDb);
	    dyStringAppend(dy,
	       "where value in(");
	    for (image = imageList; image != NULL; image = image->next)
		{
		dyStringPrintf(dy, "'%s'", image->name);
		if (image->next != NULL)
		    dyStringAppendC(dy, ',');
		}
	    dyStringAppend(dy, ")");
	    slFreeList(&imageList);
	    }
	}
    if (dy->stringSize > 0)
	{
	knownGene = sqlQuickString(conn, dy->string);
	if (knownGene != NULL)
	    {
	    dyStringClear(dy);
	    dyStringPrintf(dy, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none",
		genomeDb, knownGene);
	    url = dyStringCannibalize(&dy);
	    }
	}
    dyStringFree(&dy);
    }
freez(&genomeDb);
return url;
}
static struct slName *geneProbeList(struct sqlConnection *conn, int id)
/* Get list of gene names with hyperlinks to probe info page. */
{
struct slName *returnList = NULL;
char query[256], **row;
struct sqlResult *sr;
struct dyString *dy = dyStringNew(0);
struct probeAndColor *pcList = NULL, *pc;
int probeCount = 0;

/* Make up a list of all probes in this image. */
safef(query, sizeof(query),
   "select probe,probeColor from imageProbe where image=%d", id);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    AllocVar(pc);
    pc->probe = sqlUnsigned(row[0]);
    pc->probeColor = sqlUnsigned(row[1]);
    slAddHead(&pcList, pc);
    ++probeCount;
    }
slReverse(&pcList);

for (pc = pcList; pc != NULL; pc = pc->next)
    {
    int geneId;
    char *geneName;
    int probe = pc->probe;
    char *geneUrl = NULL;

    /* Get gene ID and name. */
    safef(query, sizeof(query), 
    	"select gene from probe where id = %d", probe);
    geneId = sqlQuickNum(conn, query);
    geneName = vgGeneNameFromId(conn, geneId);
    
    /* Get url for known genes page if any. */
    geneUrl = getKnownGeneUrl(conn, geneId);

    /* Print gene name, surrounded by hyperlink to known genes
     * page if possible. */
    dyStringClear(dy);
    if (geneUrl != NULL)
	dyStringPrintf(dy, "<A HREF=\"%s\" target=_parent>",
	    geneUrl);
    dyStringPrintf(dy, "%s", geneName);
    if (geneUrl != NULL)
	dyStringAppend(dy, "</A>");
    freez(&geneName);

    /* Add color if there's more than one probe for this image. */
    if (probeCount > 1)
        {
	char *color;
	safef(query, sizeof(query), 
	    "select probeColor.name from probeColor "
	    "where probeColor.id = %d"
	    , pc->probeColor);
	color = sqlQuickString(conn, query);
	if (color != NULL)
	    dyStringPrintf(dy, " (%s)", color);
	freez(&color);
	}

    /* Add to return list. */
    slNameAddTail(&returnList, dy->string);
    }

slFreeList(&pcList);
slReverse(&returnList);
return returnList;
}
Beispiel #13
0
boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr,
				boolean *chunked, int *contentLength)
/* Extract HTTP response header from lf into hdr, tell if it's
 * "Transfer-Encoding: chunked" or if it has a contentLength. */
{
  struct dyString *header = newDyString(1024);
  char *line;
  int lineSize;

  if (chunked != NULL)
    *chunked = FALSE;
  if (contentLength != NULL)
    *contentLength = -1;
  dyStringClear(header);
  if (lineFileNext(lf, &line, &lineSize))
    {
      if (startsWith("HTTP/", line))
	{
	char *version, *code;
	dyStringAppendN(header, line, lineSize-1);
	dyStringAppendC(header, '\n');
	version = nextWord(&line);
	code = nextWord(&line);
	if (code == NULL)
	    {
	    warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
	    *hdr = cloneString(header->string);
	    dyStringFree(&header);
	    return FALSE;
	    }
	if (!sameString(code, "200"))
	    {
	    warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line);
	    *hdr = cloneString(header->string);
	    dyStringFree(&header);
	    return FALSE;
	    }
	while (lineFileNext(lf, &line, &lineSize))
	    {
	    /* blank line means end of HTTP header */
	    if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
	        break;
	    if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL)
	        *chunked = TRUE;
	    dyStringAppendN(header, line, lineSize-1);
	    dyStringAppendC(header, '\n');
	    if (strstr(line, "Content-Length:"))
	      {
		code = nextWord(&line);
		code = nextWord(&line);
		if (contentLength != NULL)
		    *contentLength = atoi(code);
	      }
	    }
	}
      else
	{
	  /* put the line back, don't put it in header/hdr */
	  lineFileReuse(lf);
	  warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
	  *hdr = cloneString(header->string);
	  dyStringFree(&header);
	  return FALSE;
	}
    }
  else
    {
      *hdr = cloneString(header->string);
      dyStringFree(&header);
      return FALSE;
    }

  *hdr = cloneString(header->string);
  dyStringFree(&header);
  return TRUE;
} /* lineFileParseHttpHeader */
Beispiel #14
0
boolean lineFileNextFull(struct lineFile *lf, char **retFull, int *retFullSize,
                        char **retRaw, int *retRawSize)
// Fetch next line from file joining up any that are continued by ending '\'
// If requested, and was joined, the unjoined raw lines are also returned
// NOTE: comment lines can't be continued!  ("# comment \ \n more comment" is 2 lines.)
{
// May have requested reusing the last full line.
if (lf->fullLineReuse)
    {
    lf->fullLineReuse = FALSE;
    assert(lf->fullLine != NULL);
    *retFull = dyStringContents(lf->fullLine);
    if (retFullSize)
        *retFullSize = dyStringLen(lf->fullLine);
    if (retRaw != NULL)
        {
        assert(lf->rawLines != NULL);
        *retRaw = dyStringContents(lf->rawLines);
        if (retRawSize)
            *retRawSize = dyStringLen(lf->rawLines);
        }
    return TRUE;
    }

// Empty pointers
*retFull = NULL;
if (retRaw != NULL)
    *retRaw = NULL;

// Prepare lf buffers
if (lf->fullLine == NULL)
    {
    lf->fullLine = dyStringNew(1024);
    lf->rawLines = dyStringNew(1024); // Better to always create it than test every time
    }
else
    {
    dyStringClear(lf->fullLine);
    dyStringClear(lf->rawLines);
    }

char *line;
while (lineFileNext(lf, &line, NULL))
    {
    char *start = skipLeadingSpaces(line);

    // Will the next line continue this one?
    char *end = start;
    if (*start == '#')  // Comment lines can't be continued!
        end = start + strlen(start);
    else
        {
        while (*end != '\0')  // walking forward for efficiency (avoid strlens())
            {
            for (;*end != '\0' && *end != '\\'; end++) ; // Tight loop to find '\'
            if (*end == '\0')
                break;

            // This could be a continuation
            char *slash = end;
            if (*(++end) == '\\')  // escaped
                continue;
            end = skipLeadingSpaces(end);

            if (*end == '\0') // Just whitespace after '\', so true continuation mark
                {
                if (retRaw != NULL) // Only if actually requested.
                    {
                    dyStringAppendN(lf->rawLines,line,(end - line));
                    dyStringAppendC(lf->rawLines,'\n'); // New lines delimit raw lines.
                    }
                end = slash; // Don't need to zero, because of appending by length
                break;
                }
            }
        }

    // Stitch together full lines
    if (dyStringLen(lf->fullLine) == 0)
        dyStringAppendN(lf->fullLine,line,(end - line)); // includes first line's whitespace
    else if (start < end)             // don't include continued line's leading spaces
        dyStringAppendN(lf->fullLine,start,(end - start));

    if (*end == '\\')
        continue;

    // Got a full line now!
    *retFull = dyStringContents(lf->fullLine);
    if (retFullSize)
        *retFullSize = dyStringLen(lf->fullLine);

    if (retRaw != NULL && dyStringLen(lf->rawLines) > 0) // Only if actually requested & continued
        {
        // This is the final line which doesn't have a continuation char
        dyStringAppendN(lf->rawLines,line,(end - line));
        *retRaw = dyStringContents(lf->rawLines);
        if (retRawSize)
            *retRawSize = dyStringLen(lf->rawLines);
        }
    return TRUE;
    }
return FALSE;
}
static void populateMissingVgPrb(struct sqlConnection *conn)
/* populate vgPrb where missing, usually after new records added to visiGene */
{
struct sqlResult *sr;
char **row;
struct dyString *dy = dyStringNew(0);
struct sqlConnection *conn2 = sqlConnect(database);
struct sqlConnection *conn3 = sqlConnect(database);
int probeCount=0, vgPrbCount=0;

dyStringAppend(dy, 
"select p.id,p.gene,antibody,probeType,fPrimer,rPrimer,p.seq,bac,g.taxon"
" from probe p join gene g"
" left join vgPrbMap m on m.probe = p.id"
" where g.id = p.gene"
"   and m.probe is NULL");
sr = sqlGetResult(conn, dy->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    int id = sqlUnsigned(row[0]); 
    /* int gene = sqlUnsigned(row[1]); */
    /* int antibody = sqlUnsigned(row[2]); */
    /* int probeType = sqlUnsigned(row[3]); */
    char *fPrimer = row[4]; 
    char *rPrimer = row[5]; 
    char *seq = row[6]; 
    int bac = sqlUnsigned(row[7]); 
    int taxon = sqlUnsigned(row[8]); 

    char *peType = "none";
    int peProbe = id;
    char *peSeq = seq;
    char *tName = "";
    int tStart = 0;
    int tEnd = 0;
    char *tStrand = " ";
    /*
    char *peGene = "";
    int bacInfo = 0;
    int seqid = 0;
    int pslid = 0;
    */
    char *state = "new";
    char *db = "";
    int vgPrb = 0;

    if (isNotEmpty(seq))
    	{
	peType = "probe";
	state = "seq";
	}
    else if (isNotEmpty(fPrimer) && isNotEmpty(rPrimer))
    	{
	peType = "primersMrna";
	}
    else if (isNotEmpty(fPrimer) && isEmpty(rPrimer))
    	{ /* only have fPrimer, it's probably a comment, not dna seq */
	peType = "refSeq";   /* use accession or gene */
	}
    else if (bac > 0)
    	{
	peType = "bac";   /* use bacEndPairs */
	}
    else	    
    	{
	peType = "refSeq";   /* use accession or gene */
	}

    if (!sameString(peSeq,""))
	{
	vgPrb = findVgPrbBySeq(conn3,peSeq,taxon);
	}

    if (vgPrb == 0)
	{
	dyStringClear(dy);
	dyStringAppend(dy, "insert into vgPrb set");
	dyStringPrintf(dy, " id=default,\n");
	dyStringPrintf(dy, " type='%s',\n", peType);
	dyStringAppend(dy, " seq='");
	dyStringAppend(dy, peSeq);
	dyStringAppend(dy, "',\n");
	dyStringPrintf(dy, " tName='%s',\n", tName);
	dyStringPrintf(dy, " tStart=%d,\n", tStart);
	dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	dyStringPrintf(dy, " db='%s',\n", db);
	dyStringPrintf(dy, " taxon='%d',\n", taxon);
	dyStringPrintf(dy, " state='%s'\n", state);
	verbose(2, "%s\n", dy->string);
	sqlUpdate(conn2, dy->string);
	vgPrb = sqlLastAutoId(conn2);
	vgPrbCount++;
	}
	
    dyStringClear(dy);
    dyStringAppend(dy, "insert into vgPrbMap set");
    dyStringPrintf(dy, " probe=%d,\n", peProbe);
    dyStringPrintf(dy, " vgPrb=%d \n", vgPrb);
    verbose(2, "%s\n", dy->string);
    sqlUpdate(conn2, dy->string);

    probeCount++;
	
    }

verbose(1, "# new probe records found = %d, # new vgPrb records added = %d\n", probeCount, vgPrbCount);

dyStringFree(&dy);
    
sqlFreeResult(&sr);

sqlDisconnect(&conn3);
sqlDisconnect(&conn2);

}
Beispiel #16
0
static void readPartHeaderMB(struct mimeBuf *b, struct mimePart *p, char *altHeader)
/* Reads the header lines of the mimePart,
   saves the header settings in a hash.  */
{
struct dyString *fullLine = dyStringNew(0);
char *key=NULL, *val=NULL;
struct lineFile *lf = NULL;
char *line = NULL;
char *lineAhead = NULL;
int size = 0;
p->hdr = newHash(3);
	//debug
    	//fprintf(stderr,"headers dumpMB: ");
	//dumpMB(b);  //debug
if (altHeader)
    {
    lf = lineFileOnString("MIME Header", TRUE, altHeader);
    }
/* read ahead one line, skipping any leading blanks lines */   
do
    {
    if (altHeader)
	lineFileNext(lf, &lineAhead, &size);
    else
	lineAhead = getLineMB(b);
    } 
    while (sameString(lineAhead,""));

do
    {
    /* accumulate a full header line - some emailers split into mpl lines */
    dyStringClear(fullLine);
    do 
	{
	line = lineAhead;
	if (altHeader)
	    lineFileNext(lf, &lineAhead, &size);
	else
	    lineAhead = getLineMB(b);
	dyStringAppend(fullLine,line);    
	if (!altHeader) 
	    freez(&line);
	} while (isspace(lineAhead[0]));
    line = fullLine->string;
    //fprintf(stderr,"found a line! [%s]\n",line);  //debug
    key = line;
    val = strchr(line,':');
    if (!val)
	errAbort("readPartHeaderMB error - header-line colon not found, line=[%s]",line);
    *val = 0;
    val++;
    key=trimSpaces(key);
    // since the hash is case-sensitive, convert to lower case for ease of matching
    tolowers(key);  
    val=trimSpaces(val);
    hashAdd(p->hdr,key,cloneString(val));
    
    //debug
    //fprintf(stderr,"MIME header: key=[%s], val=[%s]\n",key,val);
    //fflush(stderr); 
    
    } while (!sameString(lineAhead,""));
if (altHeader)
    {
    if (nlType == nlt_undet)
	nlType = lf->nlType;
    lineFileClose(&lf);
    }
else
    {
    freez(&lineAhead);
    }
dyStringFree(&fullLine);
    
}
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db)
/* process isPcr results  */
{

/* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
char *word, *end;
char *tName;
int tStart;
int tEnd;
char *tStrand;
int probeid=0;  /* really a vgPrb id */
boolean more = lineFileNext(lf, &line, &lineSize);
while(more)
    {
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line);
    verbose(1,"name=%s\n",name);
    dyStringClear(dy);
    while((more=lineFileNext(lf, &line, &lineSize)))
	{
	if (line[0] == '>')
	    {
	    break;
	    }
	dyStringAppend(dy,line);	    
	}
    dna = cloneString(dy->string);
    word = name+1;
    end = strchr(word,':');
    tName = cloneStringZ(word,end-word); 
    word = end+1;
    end = strchr(word,'+');
    tStrand = "+";
    if (!end)
	{
	end = strchr(word,'-');
	tStrand = "-";
	}
    tStart = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    tEnd = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    probeid = atoi(word); 

    dyStringClear(dy);
    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid);
    if (sqlQuickNum(conn,dy->string)>0)
	{
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", tName);
	    dyStringPrintf(dy, " tStart=%d,\n", tStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", probeid);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid);
	    sqlUpdate(conn, dy->string);
	    }
	}
    
    freez(&tName);
    freez(&name);
    freez(&dna);
    }
lineFileClose(&lf);

dyStringFree(&dy);
}
Beispiel #18
0
void condenseValues()
/* combine values for single snp */
{
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
FILE *f;
struct dyString *ssList = newDyString(255);
struct dyString *buildList = newDyString(255);
char *currentSnpString = NULL;
int currentSnpNum = 0;
int count = 0;
char firstBuild[32];
char lastBuild[32];

f = hgCreateTabFile(".", "SNPSubSNPLinkCondense");

sqlSafef(query, sizeof(query), "select snp_id, subsnp_id, build_id from SNPSubSNPLink");

sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (currentSnpString == NULL) 
        {
        currentSnpString = cloneString(row[0]);
	currentSnpNum = sqlUnsigned(row[0]);
	dyStringPrintf(ssList, "%s", row[1]);
	dyStringPrintf(buildList, "%s", row[2]);
	safef(firstBuild, sizeof(firstBuild), row[2]);
	safef(lastBuild, sizeof(firstBuild), row[2]);
	}
    else if (!sameString(row[0], currentSnpString))
        {
	fprintf(f, "%s\t%s\t%s\t%s\t%s\t%d\n", 
	           currentSnpString, ssList->string, buildList->string, firstBuild, lastBuild, count);
	if (currentSnpNum > sqlUnsigned(row[0]))
	    errAbort("snps out of order: %d before %s\n", currentSnpNum, row[0]);
	currentSnpString = cloneString(row[0]);
	currentSnpNum = sqlUnsigned(row[0]);
	dyStringClear(ssList);
	dyStringPrintf(ssList, "%s", row[1]);
	dyStringClear(buildList);
	dyStringPrintf(buildList, "%s", row[2]);
	safef(firstBuild, sizeof(firstBuild), row[2]);
	safef(lastBuild, sizeof(lastBuild), row[2]);
	count = 1;
	}
    else
        {
	count++;
	dyStringAppend(ssList, ",");
	dyStringAppend(ssList, row[1]);
	dyStringAppend(buildList, ",");
	dyStringAppend(buildList, row[2]);
	safef(lastBuild, sizeof(lastBuild), row[2]);
	}
    }
fprintf(f, "%s\t%s\t%s\t%s\t%s\t%d\n", currentSnpString, ssList->string, buildList->string, 
                                       firstBuild, lastBuild, count);
sqlFreeResult(&sr);
hFreeConn(&conn);
carefulClose(&f);
}
static void doPrimers(struct sqlConnection *conn, int taxon, char *db)
/* get probe seq from primers */
{
int rc = 0;
struct dyString *dy = dyStringNew(0);
char cmdLine[256];
char path1[256];
char path2[256];

dyStringClear(dy);
dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g");
dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon);
dyStringAppend(dy, " and e.state = 'new' and e.type='primersMrna'");
rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE);
verbose(1,"rc = %d = count of primers for mrna search for taxon %d\n",rc,taxon);

if (rc > 0) /* something to do */
    {

    dyStringClear(dy);
    dyStringPrintf(dy, "select qName from %s.all_mrna",db);
    rc = 0;
    rc = sqlSaveQuery(conn, dy->string, "accFile.txt", FALSE);
    safef(cmdLine,sizeof(cmdLine),"getRna %s accFile.txt mrna.fa",db);
    system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");
    
    verbose(1,"rc = %d = count of mrna for %s\n",rc,db);

    system("date"); system("isPcr mrna.fa primers.query isPcr.fa -out=fa"); system("date");
    system("ls -l");

    processIsPcr(conn,taxon,db);

    unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa");

    }
unlink("primers.query");    

/* find any remaining type primersMrna that couldn't be resolved and demote 
 * them to type primersGenome
 */
dyStringClear(dy);
dyStringAppend(dy, "update vgPrb set type='primersGenome'"); 
dyStringPrintf(dy, " where taxon = %d",taxon);
dyStringAppend(dy, " and state = 'new' and type='primersMrna'");
sqlUpdate(conn, dy->string);



/* get primers for those probes that did not find mrna isPcr matches 
 * and then do them against the genome instead */
dyStringClear(dy);
dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g");
dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon);
dyStringAppend(dy, " and e.state = 'new' and e.type='primersGenome'");
rc = 0;
rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE);
verbose(1,"rc = %d = count of primers for genome search for taxon %d\n",rc,taxon);

if (rc > 0) /* something to do */
    {
    safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db);
    safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db);
    verbose(1,"copy: [%s] to [%s]\n",path1,path2);  copyFile(path1,path2);

    safef(cmdLine,sizeof(cmdLine),
	    "ssh kolossus 'cd %s; isPcr %s.2bit primers.query isPcr.fa -out=fa'",
	    getCurrentDir(),db);
    system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");
    safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db);
    verbose(1,"rm %s\n",path2); unlink(path2); system("ls -l");

    processIsPcr(conn,taxon,db);
    
    unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa");

    }
unlink("primers.query");    

/* find any remaining type primersGenome that couldn't be resolved and demote 
 * them to type refSeq
 */
dyStringClear(dy);
dyStringAppend(dy, "update vgPrb set type='refSeq'"); 
dyStringPrintf(dy, " where taxon = %d",taxon);
dyStringAppend(dy, " and state = 'new' and type='primersGenome'");
sqlUpdate(conn, dy->string);

dyStringFree(&dy);
}
Beispiel #20
0
void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f,
	int resTryCount, int resScales[], int resSizes[], 
	boolean doCompress, bits32 *retMaxSectionSize)
/* Read through lf, chunking it into sections that get written to f.  Save info
 * about sections in bounds. */
{
int maxSectionSize = 0;
struct bbiChromUsage *usage = usageList;
int itemIx = 0, sectionIx = 0;
bits32 reserved32 = 0;
UBYTE reserved8 = 0;
struct sectionItem items[itemsPerSlot];
struct sectionItem *lastB = NULL;
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
struct dyString *stream = dyStringNew(0);

/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);

for (;;)
    {
    /* Get next line of input if any. */
    char *row[5];
    int rowSize = lineFileChopNext(lf, row, ArraySize(row));

    /* Figure out whether need to output section. */
    boolean sameChrom = FALSE;
    if (rowSize > 0)
	sameChrom = sameString(row[0], usage->name);
    if (itemIx >= itemsPerSlot || rowSize == 0 || !sameChrom)
        {
	/* Figure out section position. */
	bits32 chromId = usage->id;
	bits32 sectionStart = items[0].start;
	bits32 sectionEnd = items[itemIx-1].end;

	/* Save section info for indexing. */
	assert(sectionIx < sectionCount);
	struct bbiBoundsArray *section = &bounds[sectionIx++];
	section->offset = ftell(f);
	section->range.chromIx = chromId;
	section->range.start = sectionStart;
	section->range.end = sectionEnd;

	/* Output section header to stream. */
	dyStringClear(stream);
	UBYTE type = bwgTypeBedGraph;
	bits16 itemCount = itemIx;
	dyStringWriteOne(stream, chromId);			// chromId
	dyStringWriteOne(stream, sectionStart);		// start
	dyStringWriteOne(stream, sectionEnd);	// end
	dyStringWriteOne(stream, reserved32);		// itemStep
	dyStringWriteOne(stream, reserved32);		// itemSpan
	dyStringWriteOne(stream, type);			// type
	dyStringWriteOne(stream, reserved8);			// reserved
	dyStringWriteOne(stream, itemCount);			// itemCount

	/* Output each item in section to stream. */
	int i;
	for (i=0; i<itemIx; ++i)
	    {
	    struct sectionItem *item = &items[i];
	    dyStringWriteOne(stream, item->start);
	    dyStringWriteOne(stream, item->end);
	    dyStringWriteOne(stream, item->val);
	    }

	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxSectionSize)
	    maxSectionSize = stream->stringSize;
	if (doCompress)
	    {
	    size_t maxCompSize = zCompBufSize(stream->stringSize);
	    char compBuf[maxCompSize];
	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    }
	else
	    mustWrite(f, stream->string, stream->stringSize);


	/* If at end of input we are done. */
	if (rowSize == 0)
	    break;

	/* Set up for next section. */
	itemIx = 0;

	if (!sameChrom)
	    {
	    usage = usage->next;
	    assert(usage != NULL);
            if (!sameString(row[0], usage->name))
                errAbort("read %s, expecting %s on line %d in file %s\n", 
                    row[0], usage->name, lf->lineIx, lf->fileName);
	    assert(sameString(row[0], usage->name));
	    lastB = NULL;
	    for (resTry = 0; resTry < resTryCount; ++resTry)
		resEnds[resTry] = 0;
	    }
	}

    /* Parse out input. */
    lineFileExpectWords(lf, 4, rowSize);
    bits32 start = lineFileNeedNum(lf, row, 1);
    bits32 end = lineFileNeedNum(lf, row, 2);
    float val = lineFileNeedDouble(lf, row, 3);

    /* Verify that inputs meets our assumption - that it is a sorted bedGraph file. */
    if (start > end)
        errAbort("Start (%u) after end (%u) line %d of %s", start, end, lf->lineIx, lf->fileName);
    if (lastB != NULL)
        {
	if (lastB->start > start)
	    errAbort("BedGraph not sorted on start line %d of %s", lf->lineIx, lf->fileName);
	if (lastB->end > start)
	    errAbort("Overlapping regions in bedGraph line %d of %s", lf->lineIx, lf->fileName);
	}


    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
        {
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	    }
	while (end > resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];
	    }
	}

    /* Save values in output array. */
    struct sectionItem *b = &items[itemIx];
    b->start = start;
    b->end = end;
    b->val = val;
    lastB = b;
    itemIx += 1;
    }
assert(sectionIx == sectionCount);

*retMaxSectionSize = maxSectionSize;
}
Beispiel #21
0
void xpParseStartTag(struct xp *xp, 
	int maxAttCount,		  /* Maximum attribute count. */
	struct dyString *retName, 	  /* Returns tag name */
	int *retAttCount, 		  /* Returns attribute count. */
	struct dyString **retAttributes,  /* Name, value, name, value... */
	boolean *retClosed)	  /* If true then is self-closing (ends in />) */
/* Call this after the first '<' in a tag has been read.  It'll
 * parse out until the '>' tag. */
{
char c, quotC;
int attCount = 0;
struct dyString *dy;
int lineStart;

dyStringClear(retName);

/* Skip white space after '<' and before tag name. */
for (;;)
    {
    if ((c = xpGetChar(xp)) == 0)
	xpUnexpectedEof(xp);
    if (isspace(c))
        {
	if (c == '\n')
	    ++xp->lineIx;
        }
    else
        break;
    }

/* Read in tag name. */
for (;;)
    {
    dyStringAppendC(retName, c);
    if ((c = xpGetChar(xp)) == 0)
	xpUnexpectedEof(xp);
    if (c == '>' || c == '/' || isspace(c))
        break;
    }
if (c == '\n')
    ++xp->lineIx;

/* Parse attributes. */
if (c != '>' && c != '/')
    {
    for (;;)
	{
	/* Skip leading white space. */
	for (;;)
	    {
	    if ((c = xpGetChar(xp)) == 0)
		xpUnexpectedEof(xp);
	    if (isspace(c))
		{
		if (c == '\n')
		    ++xp->lineIx;
		}
	    else
		break;
	    }
	if (c == '>' || c == '/')
	    break;

	/* Allocate space in attribute table. */
	if (attCount >= maxAttCount - 2)
	    xpError(xp, "Attribute stack overflow");
	dy = retAttributes[attCount];
	if (dy == NULL)
	    dy = retAttributes[attCount] = newDyString(64);
	else
	    dyStringClear(dy);
	++attCount;

	/* Read until not a label character. */
	for (;;)
	    {
	    dyStringAppendC(dy, c);
	    if ((c = xpGetChar(xp)) == 0)
		xpUnexpectedEof(xp);
	    if (isspace(c))
		{
		if (c == '\n')
		    ++xp->lineIx;
		break;
		}
	    if (c == '=')
		break;
	    if (c == '/' || c == '>')
		xpError(xp, "Expecting '=' after attribute name");
	    }

	/* Skip white space until '=' */
	if (c != '=')
	    {
	    for (;;)
		{
		if ((c = xpGetChar(xp)) == 0)
		    xpUnexpectedEof(xp);
		if (isspace(c))
		    {
		    if (c == '\n')
			++xp->lineIx;
		    }
		else
		    break;
		}
	    if (c != '=')
		xpError(xp, "Expecting '=' after attribute name");
	    }

	/* Skip space until quote. */
	for (;;)
	    {
	    if ((c = xpGetChar(xp)) == 0)
		xpUnexpectedEof(xp);
	    else if (isspace(c))
		{
		if (c == '\n')
		    ++xp->lineIx;
		}
	    else
		break;
	    }
	if (c != '\'' && c != '"')
	    xpError(xp, "Expecting quoted string after =");

	/* Allocate space in attribute table. */
	if (attCount >= maxAttCount - 2)
	    xpError(xp, "Attribute stack overflow");
	dy = retAttributes[attCount];
	if (dy == NULL)
	    dy = retAttributes[attCount] = newDyString(64);
	else
	    dyStringClear(dy);
	++attCount;

	/* Read until next quote. */
	quotC = c;
	lineStart = xp->lineIx;
	for (;;)
	    {
	    if ((c = xpGetChar(xp)) == 0)
	       xpError(xp, "End of file inside literal string that started at line %d", lineStart);
	    if (c == quotC)
		break;
	    if (c == '&')
	       xpLookup(xp, xp->endTag, dy);
	    else
		{
		if (c == '\n')
		    ++xp->lineIx;
		dyStringAppendC(dy, c);
		}
	    }
	}
    }
if (c == '/')
    {
    *retClosed = TRUE;
    c = xpGetChar(xp);
    if (c != '>')
        xpError(xp, "Expecting '>' after '/'");
    }
else
    *retClosed = FALSE;
*retAttCount = attCount;
}
Beispiel #22
0
int checkTableCoords(char *db)
/* Check several invariants (see comments in check*() above), 
 * summarize errors, return nonzero if there are errors. */
{
struct sqlConnection *conn = hAllocConn(db);
struct slName *tableList = NULL, *curTable = NULL;
struct slName *allChroms = NULL;
boolean gotError = FALSE;

allChroms = hAllChromNames(db);
if (theTable == NULL)
    tableList = getTableNames(conn);
else if (sqlTableExists(conn, theTable))
    tableList = newSlName(theTable);
else
    errAbort("Error: specified table \"%s\" does not exist in database %s.",
	     theTable, db);

for (curTable = tableList;  curTable != NULL;  curTable = curTable->next)
    {
    struct hTableInfo *hti = NULL;
    struct slName *chromList = NULL, *chromPtr = NULL;
    char *table = curTable->name;
    char tableChrom[32], trackName[128], tableChromPrefix[33];
    hParseTableName(db, table, trackName, tableChrom);
    hti = hFindTableInfo(db, tableChrom, trackName);
    if (hti != NULL && hti->isPos)
	{
	/* watch out for presence of both split and non-split tables; 
	 * hti for non-split will be replaced with hti of split. */
	if (splitAndNonSplitExist(conn, table, tableChrom))
	    continue;
	safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom);
	if (hti->isSplit)
	    chromList = newSlName(tableChrom);
	else
	    chromList = allChroms;
	/* invariant: chrom must be described in chromInfo. */
        /* items with bad chrom will be invisible to hGetBedRange(), so 
	 * catch them here by SQL query. */
	/* The SQL query is too huge for scaffold-based db's, check count: */
	if (hChromCount(db) <= MAX_SEQS_SUPPORTED)
	    {
	    if (isNotEmpty(hti->chromField))
		{
		struct dyString *bigQuery = newDyString(1024);
		dyStringClear(bigQuery);
		sqlDyStringPrintf(bigQuery, "select count(*) from %s where ",
			       table);
		for (chromPtr=chromList; chromPtr != NULL;
		       chromPtr=chromPtr->next)
		    {
		    sqlDyStringPrintf(bigQuery, "%s != '%s' ",
				   hti->chromField, chromPtr->name);
		    if (chromPtr->next != NULL)
			dyStringAppend(bigQuery, "AND ");
		    }
		gotError |= reportErrors(BAD_CHROM, table,
					 sqlQuickNum(conn, bigQuery->string));
		dyStringFree(&bigQuery);
		}
	    for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next)
		{
		char *chrom = chromPtr->name;
		struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL);
		if (hti->isSplit && isNotEmpty(hti->chromField))
		    gotError |= checkSplitTableOnlyChrom(bedList, table, hti,
							 tableChrom);
		gotError |= checkStartEnd(bedList, table, hti,
					  testChromSize(chrom));
		if (hti->hasCDS)
		    gotError |= checkCDSStartEnd(bedList, table, hti);
		if (hti->hasBlocks && !ignoreBlocks)
		    gotError |= checkBlocks(bedList, table, hti);
		bedFreeList(&bedList);
		}
	    }
	}
    }
return gotError;
}
void addSdrfToStormTop(char *sdrfFile, struct tagStorm *storm)
/* Add lines of sdrfFile as children of first top level stanza in storm. */
{
struct fieldedTable *table = fieldedTableFromTabFile(sdrfFile, sdrfFile, NULL, 0 );


/* Convert ArrayExpress field names to our field names */
int fieldIx;
char *lastNonTerm = NULL;
char *lastNonUnit = NULL;
for (fieldIx=0; fieldIx < table->fieldCount; fieldIx += 1)
    {
    char tagName[256];
    aeFieldToNormalField("sdrf.", table->fields[fieldIx], tagName, sizeof(tagName));
    if (lastNonTerm != NULL && sameString("sdrf.Term_Source_REF", tagName))
	 {
         safef(tagName, sizeof(tagName), "%s_Term_Source_REF", lastNonTerm);
	 table->fields[fieldIx] = lmCloneString(table->lm, tagName);
	 }
    else if (lastNonTerm != NULL && sameString("sdrf.Term_Accession_Number", tagName))
	 {
         safef(tagName, sizeof(tagName), "%s_Term_Accession_Number", lastNonTerm);
	 table->fields[fieldIx] = lmCloneString(table->lm, tagName);
	 }
    else if (lastNonUnit != NULL && startsWith("sdrf.Unit_", tagName))
         {
	 safef(tagName, sizeof(tagName), "%s_Unit", lastNonUnit);
	 lastNonTerm = lmCloneString(table->lm, tagName);
	 table->fields[fieldIx] = lastNonTerm;
	 }
    else
	 {
         lastNonTerm = lastNonUnit = lmCloneString(table->lm, tagName);
	 table->fields[fieldIx] = lastNonTerm;
	 }
    }


/* Make up fastq field indexes to handle processing of paired reads in fastq, which
 * take two lines of sdrf file. */
char *fieldsWithFastqs[] = 
/* Fields that contain the fastq file names */
    {
    "sdrf.Comment_FASTQ_URI",
    "sdrf.Comment_SUBMITTED_FILE_NAME",
    "sdrf.Scan_Name",
    };
boolean mightReuseStanza = TRUE;
bool *reuseMultiFields;  // If set this field can vary and line still reused
AllocArray(reuseMultiFields, table->fieldCount);
int i;
for (i=0; i<ArraySize(fieldsWithFastqs); ++i)
    {
    char *field = fieldsWithFastqs[i];
    int ix = stringArrayIx(field, table->fields, table->fieldCount);
    if (ix >=0)
	reuseMultiFields[ix] = TRUE;
    else if (i == 0)
	{
	mightReuseStanza = FALSE;
        break;	    // Make sure has first one if going to do paired read fastq processing
	}
    }


/* Make up a list and hash of fieldMergers to handle conversion of columns that occur
 * multiple times to a comma-separated list of values in a single column. */
struct fieldMerger
/* Something to help merge multiple columns with same name */
    {
    struct fieldMerger *next;	/* Next in list */
    char *name;	
    struct dyString *val;	/* Comma separated value */
    };
struct hash *fieldHash = hashNew(0);
struct fieldMerger *fmList = NULL;
for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx)
    {
    char *fieldName = table->fields[fieldIx];
    if (hashLookup(fieldHash, fieldName) == NULL)
        {
	struct fieldMerger *fm;
	AllocVar(fm);
	fm->name = fieldName;
	fm->val = dyStringNew(0);
	slAddTail(&fmList, fm);
	hashAdd(fieldHash, fieldName, fm);
	}
    }

/* Grab top level stanza and make sure there is only one. */
struct tagStanza *topStanza = storm->forest;
if (topStanza == NULL || topStanza->next != NULL)
    internalErr();

/* Scan through table, making new stanzas for each row and hooking them into topStanza */
struct fieldedRow *fr, *lastFr = NULL;
struct tagStanza *stanza = NULL;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    /* Empty out any existing vals */
    struct fieldMerger *fm;
    for (fm = fmList; fm != NULL; fm = fm->next)
	dyStringClear(fm->val);

    /* Add all non-empty values from this row to our fieldMergers. */
    char **row = fr->row;
    for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx)
        {
	char *fieldName = table->fields[fieldIx];
	fm = hashMustFindVal(fieldHash, fieldName);
	char *val = row[fieldIx];
	if (!isEmpty(val))
	    csvEscapeAndAppend(fm->val, val);
	}

    /* If only the reuseMultiFields are varying, append to those values in previous stanza,
     * otherwise make a new stanza */
    if (mightReuseStanza && lastFr != NULL 
        && sameExceptForSome(lastFr->row, fr->row, table->fieldCount, reuseMultiFields))
	{
	int i;
	for (i=0; i<ArraySize(fieldsWithFastqs); ++i)
	    {
	    char *fieldName = fieldsWithFastqs[i];
	    if ((fm = hashFindVal(fieldHash, fieldName)) != NULL)
	        {
		char *newVal = fm->val->string;
		char *oldVal = tagMustFindVal(stanza, fieldName);
		int bothSize = strlen(newVal) + strlen(oldVal) + 1 + 1;
		char bothBuf[bothSize];
		safef(bothBuf, bothSize, "%s,%s", oldVal, newVal);
		tagStanzaUpdateTag(storm, stanza, fieldName, bothBuf);
		}
	    }
	}
    else
        {
	/* Output all nonempty vals to stanza */
	stanza = tagStanzaNew(storm, topStanza);
	for (fm = fmList; fm != NULL; fm = fm->next)
	    if (fm->val->stringSize > 0)
		tagStanzaAppend(storm, stanza, fm->name, fm->val->string);
	}

    lastFr = fr;
    }
slReverse(&topStanza->children);
}
Beispiel #24
0
void loadGeneToMotif(struct sqlConnection *conn, char *fileName, char *table,
	struct hash *geneToModuleHash, struct hash *moduleAndMotifHash,
	struct hash *motifHash, struct hash *positionsHash,
	char *regionTable)
/* Load file which is a big matrix with genes for rows and motifs for
 * columns.  There is a semicolon-separated list of numbers in the matrix 
 * where a gene has the motif, and an empty (tab separated) field
 * where there is no motif.  The numbers are relative to the
 * region associated with the gene in the positionsHash. 
 * Only load bits of this where motif actually occurs in module associated 
 * with gene. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
FILE *f = hgCreateTabFile(tmpDir, table);
char *motifNames[32*1024], *row[32*1024];
int motifCount, rowSize, i;
char *gene, *module;
int geneCount = 0, total = 0;
struct dyString *dy = dyStringNew(512);
struct genomePos *motifPosList = NULL, *motifPosForGene;
struct genomePos *regionPosList = NULL, *regionPos;

/* Read first line, which is labels. */
if (!lineFileNextReal(lf, &line))
    errAbort("Empty file %s", fileName);
subChar(line, ' ', '_');
motifCount = chopLine(line, motifNames);
if (motifCount >= ArraySize(motifNames))
    errAbort("Too many motifs line 1 of %s", fileName);
lineFileExpectAtLeast(lf, 2, motifCount);
motifNames[0] = NULL;
for (i=1; i<motifCount; ++i)
    {
    char name[64];
    motifNames[i] = cloneString(fixMotifName(motifNames[i],name,sizeof(name)));
    if (!hashLookup(motifHash, motifNames[i]))
        errAbort("Motif %s is in %s but not modules_motifs.gxm",
		motifNames[i], fileName);
    }

/* Read subsequent lines. */
while ((rowSize = lineFileChopTab(lf, row)) != 0)
    {
    lineFileExpectWords(lf, motifCount, rowSize);
    gene = row[0];
    module = hashFindVal(geneToModuleHash, gene);
    if (module == NULL)
	{
        warn("WARNING: Gene %s in line %d of %s but not module_assignments.tab", 
		gene, lf->lineIx, lf->fileName);
	continue;
	}
    regionPos = NULL;
    for (i=1; i<rowSize; ++i)
        {
	if (row[i][0] != 0)
	    {
	    if (hashLookup2(moduleAndMotifHash, module, motifNames[i]))
		{
		regionPos = hashFindVal(positionsHash, gene);
		if (regionPos == NULL)
		    {
		    warn("WARNING: %s in %s but not gene_positions.tab",
		    	gene, fileName);
		    i = rowSize; continue;
		    }
		
		motifPosForGene = convertMotifPos(row[i], regionPos, 
			hashMustFindVal(motifHash, motifNames[i]), lf);
		motifPosList = slCat(motifPosForGene, motifPosList);
		++total;
		}
	    }
	}
    if (regionPos != NULL)
        {
	slAddHead(&regionPosList, regionPos);
	}
    ++geneCount;
    }
lineFileClose(&lf);

/* Output sorted table of all motif hits. */
    {
    struct genomePos *pos;
    slSort(&motifPosList, genomePosCmp);
    for (pos = motifPosList; pos != NULL; pos = pos->next)
	{
	int start = pos->start;
	int end = pos->end;
	if (start < 0) start = 0;
	fprintf(f, "%d\t", binFromRange(start, end));
	fprintf(f, "%s\t", pos->chrom);
	fprintf(f, "%d\t%d\t", start, end);
	fprintf(f, "%s\t", pos->motif);
	fprintf(f, "%d\t", pos->score);
	fprintf(f, "%c\t", pos->strand);
	fprintf(f, "%s\n", pos->name);
	}
    sqlDyStringPrintf(dy,
    "CREATE TABLE  %s (\n"
    "    bin smallInt unsigned not null,\n"
    "    chrom varChar(255) not null,\n"
    "    chromStart int not null,\n"
    "    chromEnd int not null,\n"
    "    name varchar(255) not null,\n"
    "    score int not null,\n"
    "    strand char(1) not null,\n"
    "    gene varchar(255) not null,\n"
    "              #Indices\n"
    "    INDEX(gene(12)),\n"
    "    INDEX(name(16)),\n"
    "    INDEX(chrom(8),bin)\n"
    ")\n",  table);
    sqlRemakeTable(conn, table, dy->string);
    verbose(1, "%d genes, %d motifs, %d motifs in genes\n",
	    geneCount, motifCount-1, total);
    hgLoadTabFile(conn, tmpDir, table, &f);
    // hgRemoveTabFile(tmpDir, table);
    verbose(1, "Loaded %s table\n", table);
    slFreeList(&motifPosList);
    }

/* Now output sorted table of upstream regions. */
    {
    FILE *f = hgCreateTabFile(tmpDir, regionTable);
    struct genomePos *pos;
    dyStringClear(dy);
    sqlDyStringPrintf(dy,
    "CREATE TABLE  %s (\n"
    "    bin smallInt unsigned not null,\n"
    "    chrom varChar(255) not null,\n"
    "    chromStart int not null,\n"
    "    chromEnd int not null,\n"
    "    name varchar(255) not null,\n"
    "    score int not null,\n"
    "    strand char(1) not null,\n"
    "              #Indices\n"
    "    INDEX(name(16)),\n"
    "    INDEX(chrom(8),bin)\n"
    ")\n",  regionTable);
    sqlRemakeTable(conn, regionTable, dy->string);
    slSort(&regionPosList, genomePosCmp);
    for (pos = regionPosList; pos != NULL; pos = pos->next)
	{
	int start = pos->start;
	int end = pos->end;
	if (start < 0) start = 0;
	fprintf(f, "%d\t", binFromRange(start, end));
	fprintf(f, "%s\t", pos->chrom);
	fprintf(f, "%d\t%d\t", start, end);
	fprintf(f, "%s\t", pos->name);
	fprintf(f, "%d\t", pos->score);
	fprintf(f, "%c\n", pos->strand);
	}
    hgLoadTabFile(conn, tmpDir, regionTable, &f);
    // hgRemoveTabFile(tmpDir, regionTable);
    }
}
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
double minVal,maxVal;
struct chromGraph *el, *list;
FILE *f;
char *tempDir = ".";
char path[PATH_LEN], gbdbPath[PATH_LEN];
char *idTable = optionVal("idTable", NULL);
char *pathPrefix = NULL;

if (idTable == NULL)
    list = chromGraphLoadAll(fileName);
else 
    list = chromGraphListWithTable(fileName, db, idTable);
if (list == NULL)
    errAbort("%s is empty", fileName);

/* Figure out min/max values */
minVal = maxVal = list->val;
for (el = list->next; el != NULL; el = el->next)
    {
    if (optionExists("minusLog10"))
	{
	if (el->val == 1)
	    el->val = 0;
	else if (el->val > 0)
	    el->val = -1 * log(el->val)/log(10);
	}
    if (el->val < minVal)
        minVal = el->val;
    if (el->val > maxVal)
        maxVal = el->val;
    }


/* Sort and write out temp file. */
slSort(&list, chromGraphCmp);
f = hgCreateTabFile(tempDir, track);
for (el = list; el != NULL; el = el->next)
    chromGraphTabOut(el, f);

if (doLoad)
    {
    struct dyString *dy = dyStringNew(0);
    struct sqlConnection *conn;

    /* Set up connection to database and create main table. */
    conn = hAllocConn(db);
    dyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
    sqlRemakeTable(conn, track, dy->string);

    /* Load main table and clean up file handle. */
    hgLoadTabFile(conn, tempDir, track, &f);
    hgRemoveTabFile(tempDir, track);

    /* If need be create meta table.  If need be delete old row. */
    if (!sqlTableExists(conn, "metaChromGraph"))
	sqlUpdate(conn, metaCreateString);
    else
        {
	dyStringClear(dy);
	dyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", 
		track);
	sqlUpdate(conn, dy->string);
	}

    /* Make chrom graph file */
    safef(path, sizeof(path), "%s.cgb", track);
    chromGraphToBin(list, path);
    safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
    pathPrefix = optionVal("pathPrefix", path);
    safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

    /* Create new line in meta table */
    dyStringClear(dy);
    dyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
    	track, minVal, maxVal, gbdbPath);
    sqlUpdate(conn, dy->string);
    }
}
static void doBlat(struct sqlConnection *conn, int taxon, char *db)
/* place probe seq from non-BAC with blat that have no alignments yet */
{
int rc = 0;
char *blatSpec=NULL;
char cmdLine[256];
char path1[256];
char path2[256];
struct dyString *dy = dyStringNew(0);

/* (non-BACs needing alignment) */
dyStringClear(dy);
dyStringPrintf(dy, 
    "select concat(\"vgPrb_\",e.id), e.seq"
    " from vgPrb e, vgPrbAli a"
    " where e.id = a.vgPrb"
    " and a.db = '%s'"
    " and a.status = 'new'"
    " and e.taxon = %d"
    " and e.type <> 'bac'"
    " and e.seq <> ''"
    " order by e.id"
    , db, taxon);
//restore: 
rc = sqlSaveQuery(conn, dy->string, "blat.fa", TRUE);
verbose(1,"rc = %d = count of sequences for blat, to get psls for taxon %d\n",rc,taxon);

if (rc == 0) 
    {
    unlink("blat.fa");
    system("rm -f blatNearBest.psl; touch blatNearBest.psl");  /* make empty file */
    return;
    }

/* make .ooc and blat on kolossus */

safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db);
safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db);
//restore: 
verbose(1,"copy: [%s] to [%s]\n",path1,path2);  copyFile(path1,path2);

safef(cmdLine,sizeof(cmdLine),
"ssh kolossus 'cd %s; blat -makeOoc=11.ooc -tileSize=11"
" -repMatch=1024 %s.2bit /dev/null /dev/null'",
    getCurrentDir(),db);
//restore: 
system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");

safef(cmdLine,sizeof(cmdLine),
	"ssh kolossus 'cd %s; blat %s.2bit blat.fa -ooc=11.ooc -noHead blat.psl'",
	getCurrentDir(),db);
//restore: 
system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");

/* using blat even with -fastMap was way too slow - took over a day,
 * so instead I will make a procedure to write a fake psl for the BACs
 * which you will see called below */

safef(path2,sizeof(path2),"%s.2bit",db);
verbose(1,"rm %s\n",path2); unlink(path2); 

safef(path2,sizeof(path2),"11.ooc");
verbose(1,"rm %s\n",path2); unlink(path2); 


/* skip psl header and sort on query name */
safef(cmdLine,sizeof(cmdLine), "sort -k 10,10 blat.psl > blatS.psl");
verbose(1,"cmdLine=[%s]\n",cmdLine);
system(cmdLine); 

/* keep near best within 5% of the best */
safef(cmdLine,sizeof(cmdLine), 
    "pslCDnaFilter -globalNearBest=0.005 -minId=0.96 -minNonRepSize=20 -minCover=0.50"
    " blatS.psl blatNearBest.psl");
verbose(1,"cmdLine=[%s]\n",cmdLine);
system(cmdLine); 

unlink("blat.fa");
unlink("blat.psl");
unlink("blatS.psl");

freez(&blatSpec);
dyStringFree(&dy);
}
Beispiel #27
0
static void parseDbXrefs()
/* Parse the db_xref entries for various features to build a single dbx entry
 * in the kvt and to obtain the locus and mim ids for the kvt */
{
static char* LOCUS_ID = "LocusID:";
static char* GENE_ID = "GeneID:";
static char* MIM_ID = "MIM:";
struct slName* head = NULL, *xref, *prevXref;
struct keyVal* dbXrefKv = NULL;
struct keyVal* locusLinkIdKv = NULL;
struct keyVal* geneIdKv = NULL;
struct keyVal* omimIdKv = NULL;
if (dbXrefBuf == NULL)
    dbXrefBuf = dyStringNew(256);
dyStringClear(dbXrefBuf);
if (omimIdBuf == NULL)
    omimIdBuf = dyStringNew(256);
dyStringClear(omimIdBuf);
locusLinkId[0] = '\0';

/* split into a list and sort so we can remove dups */
if (gbCdsDbxField->val->stringSize > 0)
    head = slCat(head, parseDbXrefStr(gbCdsDbxField->val->string));
if (gbGeneDbxField->val->stringSize > 0)
    head = slCat(head, parseDbXrefStr(gbGeneDbxField->val->string));
slNameSort(&head);

xref = head;
prevXref = NULL;
while (xref != NULL)
    {
    /* skip if dup of previous */
    if ((prevXref == NULL) || !sameString(prevXref->name, xref->name))
        {
        if (dbXrefBuf->stringSize > 0)
            dyStringAppendC(dbXrefBuf, ' ');
        dyStringAppend(dbXrefBuf, xref->name);
        updateKvt(&dbXrefKv, "dbx", dbXrefBuf->string);

        /* find number in db_xref like LocusID:27 or GeneID:27 */
        if (startsWith(LOCUS_ID, xref->name))
            {
            safef(locusLinkId, sizeof(locusLinkId), "%s",
                  xref->name+strlen(LOCUS_ID));
            updateKvt(&locusLinkIdKv, "loc", locusLinkId);
            }
        else if (startsWith(GENE_ID, xref->name))
            {
            safef(geneId, sizeof(geneId), "%s",
                  xref->name+strlen(GENE_ID));
            updateKvt(&geneIdKv, "gni", geneId);
            }
        else if (startsWith(MIM_ID, xref->name))
            {
            if (omimIdBuf->stringSize > 0)
                dyStringAppendC(omimIdBuf, ' ');
            dyStringAppend(omimIdBuf, xref->name+strlen(MIM_ID));
            updateKvt(&omimIdKv, "mim", omimIdBuf->string);
            }
        }
    prevXref = xref;
    xref = xref->next;
    }
slFreeList(&head);
}
static void doSeqAndExtFile(struct sqlConnection *conn, char *db, char *table)
{
int rc = 0;
char cmd[256];
char path[256];
char bedPath[256];
char gbdbPath[256];
char *fname=NULL;
struct dyString *dy = dyStringNew(0);
dyStringClear(dy);
dyStringPrintf(dy, 
"select distinct concat('vgPrb_',e.id), e.seq"
" from vgPrb e join %s.%s v"
" left join %s.seq s on s.acc = v.qName"
" where concat('vgPrb_',e.id) = v.qName"
" and s.acc is NULL"
" order by e.id"
    , db, table, db);
rc = sqlSaveQuery(conn, dy->string, "vgPrbExt.fa", TRUE);
verbose(1,"rc = %d = count of sequences for vgPrbExt.fa, to use with %s track %s\n",rc,db,table);
if (rc > 0)  /* can set any desired minimum */
    {
    safef(bedPath,sizeof(bedPath),"/cluster/data/%s/bed/visiGene/",db);
    if (!fileExists(bedPath))
	{
	safef(cmd,sizeof(cmd),"mkdir %s",bedPath);
	verbose(1,"%s\n",cmd); system(cmd);
	}
    
    safef(gbdbPath,sizeof(gbdbPath),"/gbdb/%s/visiGene/",db);
    if (!fileExists(gbdbPath))
	{
	safef(cmd,sizeof(cmd),"mkdir %s",gbdbPath);
    	verbose(1,"%s\n",cmd); system(cmd);
	}
   
    while(1)
	{
	int i=0;
	safef(path,sizeof(path),"%svgPrbExt_AAAAAA.fa",bedPath);
        char *c = rStringIn("AAAAAA",path);
        srand( (unsigned)time( NULL ) );
        for(i=0;i<6;++i)
            {
            *c++ += (int) 26 * (rand() / (RAND_MAX + 1.0));
            }
	if (!fileExists(path))
	    break;
	}

    
    safef(cmd,sizeof(cmd),"cp vgPrbExt.fa %s",path);
    verbose(1,"%s\n",cmd); system(cmd);
    
    fname = rStringIn("/", path);
    ++fname;
    
    safef(cmd,sizeof(cmd),"ln -s %s %s%s",path,gbdbPath,fname);
    verbose(1,"%s\n",cmd); system(cmd);
    
    safef(cmd,sizeof(cmd),"hgLoadSeq %s %s%s", db, gbdbPath,fname);
    verbose(1,"%s\n",cmd); system(cmd);
    }

dyStringFree(&dy);
}
Beispiel #29
0
void gensatImageDownload(char *gensatXml, char *outDir, char *outLog)
/* gensatImageDownload - Download images from gensat guided by xml file.. */
{
struct xap *xap;
struct gsGensatImage *image;
char *ftpUri = "ftp://ftp.ncbi.nih.gov/pub/gensat";
char *jpgCgiUri = "http://www.ncbi.nlm.nih.gov/projects/gensat/gensat_img.cgi?action=image&mode=full&fmt=jpeg&id=";
char finalJpg[PATH_LEN];
char finalDir[PATH_LEN];
char wgetSource[PATH_LEN];
struct hash *dirHash = newHash(16);
struct dyString *mkdir = dyStringNew(0);
int imageIx = 0;

fLog = mustOpen(outLog, "a");
fprintf(fLog, "starting gensatImageDownload from %s to %s\n", gensatXml, outDir);
xap = xapListOpen(gensatXml, "GensatImageSet", gsStartHandler, gsEndHandler);


while ((image = xapListNext(xap, "GensatImage")) != NULL)
    {
    int id = image->gsGensatImageId->text;
    char *imageFile = image->gsGensatImageImageInfo->gsGensatImageImageInfoFullImg
    			->gsGensatImageInfo->gsGensatImageInfoFilename->text;

    /* Mangle file name a little */
    subChar(imageFile, '(', '_');
    stripChar(imageFile, ')');

    /* Figure out name of jpeg file in outDir. */
    verbose(1, "image %d, id %d\n", ++imageIx, id);
    safef(finalJpg, sizeof(finalJpg), "%s/%s", outDir, imageFile);
    stripString(finalJpg, ".full"); /* Image magick can't handle two suffixes */
    chopSuffix(finalJpg);
    strcat(finalJpg, ".jpg");

    /* Create directory that it goes in if necessary */
    splitPath(finalJpg, finalDir, NULL, NULL);
    if (!hashLookup(dirHash, finalDir))
        {
	hashAdd(dirHash, finalDir, NULL);
	dyStringClear(mkdir);
	dyStringPrintf(mkdir, "mkdir -p %s", finalDir);
	if (system(mkdir->string) != 0)
	    errAbort("Couldn't %s", mkdir->string);
	}

    /* Download it - either directly via ftp, or indirectly via cgi. */
    if (fileExists(finalJpg))
	{
	verbose(1, "already have %s\n", imageFile);
	fprintf(fLog, "%s already downloaded\n", finalJpg);
	}
    else
        {
	if (endsWith(imageFile, ".jpg"))
	    {
	    safef(wgetSource, sizeof(wgetSource), "%s/%s", ftpUri, imageFile);
	    if (safeGetOne(wgetSource, finalJpg))
	        fprintf(fLog, "Got via ftp %s\n", finalJpg);
	    }
	else
	    {
	    safef(wgetSource, sizeof(wgetSource), "%s%d", jpgCgiUri, id);
	    if (safeGetOne(wgetSource, finalJpg))
	        fprintf(fLog, "Got via cgi %s\n", finalJpg);
	    }
	}
    }
carefulClose(&fLog);
}
char *scanSettingsForCT(char *userName, char *sessionName, char *contents,
			int *pLiveCount, int *pExpiredCount)
/* Parse the CGI-encoded session contents into {var,val} pairs and search
 * for custom tracks.  If found, refresh the custom track.  Parsing code 
 * taken from cartParseOverHash. 
 * If any nonexistent custom track files are found, return a SQL update
 * command that will remove those from this session.  We can't just do 
 * the update here because that messes up the caller's query. */
{
int contentLength = strlen(contents);
struct dyString *newContents = dyStringNew(contentLength+1);
struct dyString *oneSetting = dyStringNew(contentLength / 4);
char *updateIfAny = NULL;
char *contentsToChop = cloneString(contents);
char *namePt = contentsToChop;
verbose(3, "Scanning %s %s\n", userName, sessionName);
while (isNotEmpty(namePt))
    {
    char *dataPt = strchr(namePt, '=');
    char *nextNamePt;
    if (dataPt == NULL)
	errAbort("Mangled session content string %s", namePt);
    *dataPt++ = 0;
    nextNamePt = strchr(dataPt, '&');
    if (nextNamePt != NULL)
	*nextNamePt++ = 0;
    dyStringClear(oneSetting);
    dyStringPrintf(oneSetting, "%s=%s%s",
		   namePt, dataPt, (nextNamePt ? "&" : ""));
    if (startsWith(CT_FILE_VAR_PREFIX, namePt))
	{
	boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE;
	cgiDecode(dataPt, dataPt, strlen(dataPt));
	verbose(3, "Found variable %s = %s\n", namePt, dataPt);
	/* If the file does not exist, omit this setting from newContents so 
	 * it doesn't get copied from session to session.  If it does exist,
	 * leave it up to customFactoryTestExistence to parse the file for 
	 * possible customTrash table references, some of which may exist 
	 * and some not. */
	if (! fileExists(dataPt))
	    {
	    verbose(3, "Removing %s from %s %s\n", oneSetting->string,
		    userName, sessionName);
	    thisGotExpiredCT = TRUE;
	    }
	else
	    {
	    char *db = namePt + strlen(CT_FILE_VAR_PREFIX);
	    dyStringAppend(newContents, oneSetting->string);
	    customFactoryTestExistence(db, dataPt,
				       &thisGotLiveCT, &thisGotExpiredCT);
	    }
	if (thisGotLiveCT && pLiveCount != NULL)
	    (*pLiveCount)++;
	if (thisGotExpiredCT && pExpiredCount != NULL)
	    (*pExpiredCount)++;
	if (thisGotExpiredCT)
	    {
	    if (verboseLevel() >= 3)
		verbose(3, "Found expired custom track in %s %s: %s\n",
			userName, sessionName, dataPt);
	    else
		verbose(2, "Found expired custom track: %s\n", dataPt);
	    }
	if (thisGotLiveCT)
	    verbose(4, "Found live custom track: %s\n", dataPt);
	}
    else
	dyStringAppend(newContents, oneSetting->string);
    namePt = nextNamePt;
    }
if (newContents->stringSize != contentLength)
    {
    struct dyString *update = dyStringNew(contentLength*2);
    if (newContents->stringSize > contentLength)
	errAbort("Uh, why is newContents (%d) longer than original (%d)??",
		 newContents->stringSize, contentLength);
    dyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable);
    dyStringAppendN(update, newContents->string, newContents->stringSize);
    dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 "
		   "where userName=\"%s\" and sessionName=\"%s\";",
		   userName, sessionName);
    verbose(3, "Removing one or more dead CT file settings from %s %s "
	    "(original length %d, now %d)\n", 
	    userName, sessionName,
	    contentLength, newContents->stringSize);
    updateIfAny = dyStringCannibalize(&update);
    }
dyStringFree(&oneSetting);
dyStringFree(&newContents);
freeMem(contentsToChop);
return updateIfAny;
}