Пример #1
struct dyString *lineFileSlurpHttpBody(struct lineFile *lf,
				       boolean chunked, int contentLength)
/* Return a dyString that contains the http response body in lf.  Handle
 * chunk-encoding and content-length. */
  struct dyString *body = newDyString(64*1024);
  char *line;
  int lineSize;

  if (chunked)
      /* Handle "Transfer-Encoding: chunked" body */
      /* Procedure from RFC2068 section 19.4.6 */
      char *csword;
      unsigned chunkSize = 0;
      unsigned size;
	  /* Read line that has chunk size (in hex) as first word. */
	  if (lineFileNext(lf, &line, NULL))
	    csword = nextWord(&line);
	  else break;
	  if (sscanf(csword, "%x", &chunkSize) < 1)
	      warn("%s: chunked transfer-encoding chunk size parse error.\n",
	  /* If chunk size is 0, read in a blank line & then we're done. */
	  if (chunkSize == 0)
	      lineFileNext(lf, &line, NULL);
	      if (line == NULL || (line[0] != '\r' && line[0] != 0))
		warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
		     lf->fileName, line);

	  /* Read (and save) lines until we have read in chunk. */
	  for (size = 0;  size < chunkSize;  size += lineSize)
	      if (! lineFileNext(lf, &line, &lineSize))
	      dyStringAppendN(body, line, lineSize-1);
	      dyStringAppendC(body, '\n');
	  /* Read blank line - or extra CRLF inserted in the middle of the
	   * current line, in which case we need to trim it. */
	  if (size > chunkSize)
	      body->stringSize -= (size - chunkSize);
	      body->string[body->stringSize] = 0;
	  else if (size == chunkSize)
	      lineFileNext(lf, &line, NULL);
	      if (line == NULL || (line[0] != '\r' && line[0] != 0))
		warn("%s: chunked transfer-encoding: expected blank line, got %s\n",
		     lf->fileName, line);
	} while (chunkSize > 0);
      /* Try to read in next line.  If it's an HTTP header, put it back. */
      /* If there is a next line but it's not an HTTP header, it's a footer. */
      if (lineFileNext(lf, &line, NULL))
	  if (startsWith("HTTP/", line))
	      /* Got a footer -- keep reading until blank line */
	      warn("%s: chunked transfer-encoding: got footer %s, discarding it.\n",
		   lf->fileName, line);
	      while (lineFileNext(lf, &line, NULL))
		  if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
		  warn("discarding footer line: %s\n", line);
  else if (contentLength >= 0)
      /* Read in known length */
      int size;
      for (size = 0;  size < contentLength;  size += lineSize)
	  if (! lineFileNext(lf, &line, &lineSize))
	  dyStringAppendN(body, line, lineSize-1);
	  dyStringAppendC(body, '\n');
      /* Read in to end of file (assume it's not a persistent connection) */
      while (lineFileNext(lf, &line, &lineSize))
	  dyStringAppendN(body, line, lineSize-1);
	  dyStringAppendC(body, '\n');

} /* lineFileSlurpHttpBody */
Пример #2
void initStep(struct sqlConnection *conn, struct stepInit *init)
/* Create step based on initializer */
/* Do a little validation on while counting up inputs and outputs */
int inCount = commaSepCount(init->inputTypes);
int matchCount = commaSepCount(init->inputFormats);
if (inCount != matchCount)
    errAbort("inputTypes has %d elements but inputFormats has %d in step %s", 
	    inCount, matchCount, init->name);
int outCount = commaSepCount(init->outputTypes);
matchCount = commaSepCount(init->outputFormats);
if (outCount != matchCount)
    errAbort("outputTypes has %d elements but outputFormats has %d in step %s", 
	    outCount, matchCount, init->name);
matchCount = commaSepCount(init->outputNamesInTempDir);
if (outCount != matchCount)
    errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", 
	    outCount, matchCount, init->name);

struct dyString *query = dyStringNew(0);
dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name);
int existingCount = sqlQuickNum(conn, query->string);
if (existingCount > 0)
    warn("%s already exists in eapStep", init->name);

/* Parse out software part and make sure that all pieces are there. */
char **softwareArray;
int softwareCount;
sqlStringDynamicArray(init->software, &softwareArray, &softwareCount);
unsigned softwareIds[softwareCount];
int i;
for (i=0; i<softwareCount; ++i)
    char *name = softwareArray[i];
    dyStringPrintf(query, "select id from eapSoftware where name='%s'", name);
    unsigned softwareId = sqlQuickNum(conn, query->string);
    if (softwareId == 0)
        errAbort("Software %s doesn't exist by that name in eapSoftware", name);
    softwareIds[i] = softwareId;

/* Make step record. */
	"insert eapStep (name,cpusRequested,"
        " inCount,inputTypes,inputFormats,"
	" outCount,outputNamesInTempDir,outputTypes,outputFormats)"
	" values (");
dyStringPrintf(query, "'%s',", init->name);
dyStringPrintf(query, "%d,", init->cpusRequested);
dyStringPrintf(query, "%d,", inCount);
dyStringPrintf(query, "'%s',", init->inputTypes);
dyStringPrintf(query, "'%s',", init->inputFormats);
dyStringPrintf(query, "%d,", outCount);
dyStringPrintf(query, "'%s',", init->outputNamesInTempDir);
dyStringPrintf(query, "'%s',", init->outputTypes);
dyStringPrintf(query, "'%s'", init->outputFormats);
dyStringPrintf(query, ")");
sqlUpdate(conn, query->string);

/* Make software/step associations. */
for (i=0; i<softwareCount; ++i)
    dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')",
	    init->name, softwareArray[i]);
    sqlUpdate(conn, query->string);

/* Force step version stuff to be made right away */
eapCurrentStepVersion(conn, init->name);

/* Clean up. */
void bioImageLoad(char *setRaFile, char *itemTabFile)
/* bioImageLoad - Load data into bioImage database. */
struct hash *raHash = raReadSingle(setRaFile);
struct hash *rowHash;
struct lineFile *lf = lineFileOpen(itemTabFile, TRUE);
char *line, *words[256];
struct sqlConnection *conn = sqlConnect(database);
int rowSize;
int submissionSetId;
struct hash *fullDirHash = newHash(0);
struct hash *screenDirHash = newHash(0);
struct hash *thumbDirHash = newHash(0);
struct hash *treatmentHash = newHash(0);
struct hash *bodyPartHash = newHash(0);
struct hash *sliceTypeHash = newHash(0);
struct hash *imageTypeHash = newHash(0);
struct hash *sectionSetHash = newHash(0);
struct dyString *dy = dyStringNew(0);

/* Read first line of tab file, and from it get all the field names. */
if (!lineFileNext(lf, &line, NULL))
    errAbort("%s appears to be empty", lf->fileName);
if (line[0] != '#')
    errAbort("First line of %s needs to start with #, and then contain field names",
rowHash = hashRowOffsets(line+1);
rowSize = rowHash->elCount;
if (rowSize >= ArraySize(words))
    errAbort("Too many fields in %s", lf->fileName);

/* Check that have all required fields */
    char *fieldName;
    int i;

    for (i=0; i<ArraySize(requiredSetFields); ++i)
	fieldName = requiredSetFields[i];
	if (!hashLookup(raHash, fieldName))
	    errAbort("Field %s is not in %s", fieldName, setRaFile);

    for (i=0; i<ArraySize(requiredItemFields); ++i)
	fieldName = requiredItemFields[i];
	if (!hashLookup(rowHash, fieldName))
	    errAbort("Field %s is not in %s", fieldName, itemTabFile);

    for (i=0; i<ArraySize(requiredFields); ++i)
	fieldName = requiredFields[i];
	if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName))
	    errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile);

/* Create/find submission record. */
submissionSetId = saveSubmissionSet(conn, raHash);

/* Process rest of tab file. */
while (lineFileNextRowTab(lf, words, rowSize))
    int fullDir = cachedId(conn, "location", "name", 
    	fullDirHash, "fullDir", raHash, rowHash, words);
    int screenDir = cachedId(conn, "location", "name", 
    	screenDirHash, "screenDir", raHash, rowHash, words);
    int thumbDir = cachedId(conn, "location", 
    	"name", thumbDirHash, "thumbDir", raHash, rowHash, words);
    int bodyPart = cachedId(conn, "bodyPart", 
    	"name", bodyPartHash, "bodyPart", raHash, rowHash, words);
    int sliceType = cachedId(conn, "sliceType", 
    	"name", sliceTypeHash, "sliceType", raHash, rowHash, words);
    int imageType = cachedId(conn, "imageType", 
    	"name", imageTypeHash, "imageType", raHash, rowHash, words);
    int treatment = cachedId(conn, "treatment", 
    	"conditions", treatmentHash, "treatment", raHash, rowHash, words);
    char *fileName = getVal("fileName", raHash, rowHash, words, NULL);
    char *submitId = getVal("submitId", raHash, rowHash, words, NULL);
    char *taxon = getVal("taxon", raHash, rowHash, words, NULL);
    char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL);
    char *age = getVal("age", raHash, rowHash, words, NULL);
    char *sectionSet = getVal("sectionSet", raHash, rowHash, words, "");
    char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0");
    char *gene = getVal("gene", raHash, rowHash, words, "");
    char *locusLink = getVal("locusLink", raHash, rowHash, words, "");
    char *refSeq = getVal("refSeq", raHash, rowHash, words, "");
    char *genbank = getVal("genbank", raHash, rowHash, words, "");
    char *priority = getVal("priority", raHash, rowHash, words, "200");
    int sectionId = 0;
    int oldId;
    // char *xzy = getVal("xzy", raHash, rowHash, words, xzy);

    if (sectionSet[0] != 0 && !sameString(sectionSet, "0"))
	struct hashEl *hel = hashLookup(sectionSetHash, sectionSet);
	if (hel != NULL)
	    sectionId = ptToInt(hel->val);
	    sqlUpdate(conn, "insert into sectionSet values(default)");
	    sectionId = sqlLastAutoId(conn);
	    hashAdd(sectionSetHash, sectionSet, intToPt(sectionId));

    dyStringAppend(dy, "select id from image ");
    dyStringPrintf(dy, "where fileName = '%s' ", fileName);
    dyStringPrintf(dy, "and fullLocation = %d",  fullDir);
    oldId = sqlQuickNum(conn, dy->string);
    if (oldId != 0)
	if (replace)
	    dyStringPrintf(dy, "delete from image where id = %d", oldId);
	    sqlUpdate(conn, dy->string);
	    errAbort("%s is already in database line %d of %s", 
	    	fileName, lf->lineIx, lf->fileName);

    dyStringAppend(dy, "insert into image set\n");
    dyStringPrintf(dy, " id = default,\n");
    dyStringPrintf(dy, " fileName = '%s',\n", fileName);
    dyStringPrintf(dy, " fullLocation = %d,\n", fullDir);
    dyStringPrintf(dy, " screenLocation = %d,\n", screenDir);
    dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir);
    dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId);
    dyStringPrintf(dy, " sectionSet = %d,\n", sectionId);
    dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx);
    dyStringPrintf(dy, " submitId = '%s',\n", submitId);
    dyStringPrintf(dy, " gene = '%s',\n", gene);
    dyStringPrintf(dy, " locusLink = '%s',\n", locusLink);
    dyStringPrintf(dy, " refSeq = '%s',\n", refSeq);
    dyStringPrintf(dy, " genbank = '%s',\n", genbank);
    dyStringPrintf(dy, " priority = %s,\n", priority);
    dyStringPrintf(dy, " taxon = %s,\n", taxon);
    dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo);
    dyStringPrintf(dy, " age = %s,\n", age);
    dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart);
    dyStringPrintf(dy, " sliceType = %d,\n", sliceType);
    dyStringPrintf(dy, " imageType = %d,\n", imageType);
    dyStringPrintf(dy, " treatment = %d\n", treatment);

    sqlUpdate(conn, dy->string);
Пример #4
boolean xpParseNext(struct xp *xp, char *tag)
/* Skip through file until get given tag.  Then parse out the
 * tag and all of it's children (calling atStartTag/atEndTag).
 * You can call this repeatedly to process all of a given tag
 * in file. */

char c;
int i, attCount = 0;
struct dyString *text = NULL;
boolean isClosed;
boolean inside = (tag == NULL);
struct xpStack *initialStack = xp->stack;

for (;;)
    /* Load up text until next tag. */
    for (;;)
	if ((c = xpGetChar(xp)) == 0)
	    return FALSE;
	if (c == '<')
	if (c == '&')
	   xpLookup(xp, xp->endTag, text);
	    if (c == '\n')
	    if (text != NULL)
		dyStringAppendC(text, c);

    /* Get next character to figure out what type of tag. */
    c = xpGetChar(xp);
    if (c == 0)
       xpError(xp, "End of file inside tag");
    else if (c == '?' || c == '!')
        xpEatComment(xp, c);
    else if (c == '/')  /* Closing tag. */
	struct xpStack *stack = xp->stack;
	if (stack >= xp->stackBufEnd)
	    xpError(xp, "Extra end tag");
	xpParseEndTag(xp, stack->tag->string);
	if (inside)
	    xp->atEndTag(xp->userData, stack->tag->string, stack->text->string);
	xp->stack += 1;
	if (xp->stack == initialStack)
	    return TRUE;
    else	/* Start tag. */
	/* Push new frame on stack and check for overflow and unallocated strings. */
	struct xpStack *stack = --xp->stack;
	if (stack < xp->stackBuf)
	    xpError(xp, "Stack overflow");
	if (stack->tag == NULL)
	    stack->tag = newDyString(32);
	if (stack->text == NULL)
	    stack->text = newDyString(256);
	text = stack->text;

	/* Parse the start tag. */
	xpParseStartTag(xp, ArraySize(xp->attDyBuf), stack->tag, 
		&attCount, xp->attDyBuf, &isClosed);

	if (!inside && sameString(stack->tag->string, tag))
	    inside = TRUE;
	    initialStack = xp->stack + 1;

	/* Call user start function, and if closed tag, end function too. */
	if (inside)
	    /* Unpack attributes into simple array of strings. */
	    for (i=0; i<attCount; ++i)
		xp->attBuf[i] = xp->attDyBuf[i]->string;
	    xp->attBuf[attCount] = NULL;
	    xp->atStartTag(xp->userData, stack->tag->string, xp->attBuf);
	if (isClosed)
	    if (inside)
		xp->atEndTag(xp->userData, stack->tag->string, stack->text->string);
	    xp->stack += 1;
	    if (xp->stack == initialStack)
		return TRUE;
struct tagStorm *idfToStormTop(char *fileName)
/* Convert an idf.txt format file to a tagStorm with a single top-level stanza */
/* Create a tag storm with one as yet empty stanza */
struct tagStorm *storm = tagStormNew(fileName);
struct tagStanza *stanza = tagStanzaNew(storm, NULL);

/* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */
char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data";
struct dyString *additionalFileDy = dyStringNew(0);

/* There can be multiple secondary accession tags, so handle these too */
char *secondaryAccessionTag = "idf.Comment_SecondaryAccession";
struct dyString *secondaryAccessionDy = dyStringNew(0);

/* Parse lines from idf file into stanza */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct dyString *dyVal = dyStringNew(0);
while (lineFileNextReal(lf, &line))
    /* Erase trailing tab... */

    /* Parse line into tab-separated array and make sure it's a reasonable size */
    char *row[256];
    int rowSize = chopTabs(line, row);
    if (rowSize == ArraySize(row))
        errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName);
    if (rowSize < 2)

    /* Convert first element to tagName */
    char tagName[256];
    aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName));

    /* Special case where we already are a comma separated list */
    if (sameString(tagName, "idf.Publication_Author_List"))
	tagStanzaAppend(storm, stanza, tagName, row[1]);
    else if (startsWith(additionalFilePrefix, tagName))
	csvEscapeAndAppend(additionalFileDy, row[1]);
    else if (sameString(secondaryAccessionTag, tagName))
	csvEscapeAndAppend(secondaryAccessionDy, row[1]);
	/* Convert rest of elements to possibly comma separated values */
	int i;
	for (i=1; i<rowSize; ++i)
	    csvEscapeAndAppend(dyVal, row[i]);
	tagStanzaAppend(storm, stanza, tagName, dyVal->string);
if (additionalFileDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string);
if (secondaryAccessionDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string);
return storm;
static int doBacs(struct sqlConnection *conn, int taxon, char *db)
/* fetch available sequence for bacEndPairs */
struct dyString *dy = dyStringNew(0);
struct dnaSeq *chromSeq = NULL;
struct bac *bacs = bacRead(conn, taxon, db);
struct bac *bac = NULL;
char *chrom = cloneString("");
int count = 0;

verbose(1,"bac list read done.\n");

    if (differentWord(chrom,bac->chrom))
	verbose(1,"switching to chrom %s\n",bac->chrom);
	chromSeq = hLoadChrom(bac->chrom,db);
	chrom = cloneString(bac->chrom);

    char *dna = checkAndFetchBacDna(chromSeq, bac);
    if (sameString(bac->strand,"-"))

    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",bac->probe);
    if (sqlQuickNum(conn,dy->string)>0)
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	if (vgPrb == 0)
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", bac->chrom);
	    dyStringPrintf(dy, " tStart=%d,\n", bac->chromStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", bac->chromEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", bac->strand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", bac->probe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    //verbose(2, "%s\n", dy->string); // the sql string could be quite large
	    sqlUpdate(conn, dy->string);
	else  /* probe seq already exists */ 
	    /* just re-map the probe table recs to it */
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,bac->probe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",bac->probe);
	    sqlUpdate(conn, dy->string);
	verbose(2,"%d finished bac for probe id %d size %d\n", 
	    count, bac->probe, bac->chromEnd - bac->chromStart);





return count;  
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db)
/* process isPcr results  */

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("mrna.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
boolean more = lineFileNext(lf, &line, &lineSize);
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line+1);
    while((more=lineFileNext(lf, &line, &lineSize)))
	if (line[0] == '>')
    dna = cloneString(dy->string);

	int oldProbe = 0;
	dyStringPrintf(dy, "select id from vgPrb "
	   "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name);
	oldProbe = sqlQuickNum(conn,dy->string);
	if (oldProbe==0)
	    break;       /* no more records match */
	/* record exists and hasn't already been updated */
	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	if (vgPrb == 0)
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq = '");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " db = '%s',\n", db);
	    dyStringAppend(dy, " state = 'seq'\n");
	    dyStringPrintf(dy, " where id=%d\n", oldProbe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	else  /* probe seq already exists */ 
	    /* just re-map the probe table recs to it */
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe);
	    sqlUpdate(conn, dy->string);


Пример #8
Файл: main.c Проект: bowhan/kent
void paraFlow(char *fileName, int pfArgc, char **pfArgv)
/* parse and dump. */
struct pfCompile *pfc;
struct pfParse *program, *module;
char baseDir[256], baseName[128], baseSuffix[64];
char defFile[PATH_LEN];
char *parseFile = "out.parse";
char *typeFile = "out.typed";
char *boundFile = "out.bound";
char *scopeFile = "out.scope";
char *foldedFile = "out.folded";
char *cFile = "out.c";
FILE *parseF = mustOpen(parseFile, "w");
FILE *typeF = mustOpen(typeFile, "w");
FILE *scopeF = mustOpen(scopeFile, "w");
FILE *boundF = mustOpen(boundFile, "w");
FILE *foldedF = mustOpen(foldedFile, "w");

if (endPhase < 0)
verbose(2, "Phase 0 - initialization\n");
pfc = pfCompileNew();
splitPath(fileName, baseDir, baseName, baseSuffix);
pfc->baseDir = cloneString(baseDir);
safef(defFile, sizeof(defFile), "%s%s.pfh", baseDir, baseName);

if (endPhase < 1)
   return ;
verbose(2, "Phase 1 - tokenizing\n");
pfTokenizeInto(pfc, baseDir, baseName);

if (endPhase < 2)
verbose(2, "Phase 2 - parsing\n");
program = pfParseInto(pfc);
dumpParseTree(pfc, program, parseF);

if (endPhase < 3)
verbose(2, "Phase 3 - binding names\n");
pfBindVars(pfc, program);
dumpParseTree(pfc, program, boundF);

if (endPhase < 4)
verbose(2, "Phase 4 - type checking\n");
pfTypeCheck(pfc, &program);
dumpParseTree(pfc, program, typeF);

if (endPhase < 5)
verbose(2, "Phase 5 - polymorphic, para, and flow checks\n");
checkPolymorphic(pfc, pfc->scopeRefList);
checkParaFlow(pfc, program);
printScopeInfo(scopeF, 0, program);

if (endPhase < 6)
verbose(2, "Phase 6 - constant folding\n");
pfConstFold(pfc, program);
dumpParseTree(pfc, program, foldedF);

if (optionExists("asm"))
    struct dyString *gccFiles;

    if (endPhase < 7)
    verbose(2, "Phase 7 - nothing\n");

    if (endPhase < 8)
    verbose(2, "Phase 8 - Code generation\n");

    pfc->backEnd = backEndFind("mac-pentium");
    gccFiles = asmCoder(pfc, program, baseDir, baseName);

    if (endPhase < 9)
    verbose(2, "Phase 9 - Assembling pentium code\n");
	char *libName = hashMustFindVal(pfc->cfgHash,"runAsmLib");
	struct dyString *dy = dyStringNew(0);
	int err;
	dyStringPrintf(dy, "gcc ");
	dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir);
	dyStringPrintf(dy, "-o %s%s ", baseDir, baseName);
	dyStringAppend(dy, gccFiles->string);
	dyStringPrintf(dy, "%s ", libName);
	dyStringPrintf(dy, " %s ", pfc->runtimeLib);
	dyStringPrintf(dy, "%s ", pfc->jkwebLib);
	verbose(2, "%s\n", dy->string);
	err = system(dy->string);
	if (err != 0)
	    errAbort("Couldn't assemble: %s", dy->string);
    verbose(2, "Phase 7 - nothing\n");
    if (endPhase < 8)
    verbose(2, "Phase 8 - C code generation\n");
    pfCodeC(pfc, program, baseDir, cFile);
    verbose(2, "%d modules, %d tokens, %d parseNodes\n",
	pfc->moduleHash->elCount, pfc->tkz->tokenCount, pfParseCount(program));

    if (endPhase < 9)
    verbose(2, "Phase 9 - compiling C code\n");
    /* Now run gcc on it. */
	struct dyString *dy = dyStringNew(0);
	int err;
	for (module = program->children; module != NULL; module = module->next)
	    if (module->name[0] != '<' && module->type != pptModuleRef)
		struct pfModule *mod = hashMustFindVal(pfc->moduleHash, module->name);
		char *cName = replaceSuffix(mod->fileName, ".pf", ".c");
		char *oName = replaceSuffix(mod->fileName, ".pf", ".o");
		dyStringAppend(dy, "gcc ");
		dyStringAppend(dy, "-O ");
		dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir);
		dyStringAppend(dy, "-c ");
		dyStringAppend(dy, "-o ");
		dyStringPrintf(dy, "%s ", oName);
		dyStringPrintf(dy, "%s ", cName);
		verbose(2, "%s\n", dy->string);
		err = system(dy->string);
		if (err != 0)
		    errAbort("Couldn't compile %s.c", module->name);
	dyStringAppend(dy, "gcc ");
	dyStringAppend(dy, "-O ");
	dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir);
	dyStringPrintf(dy, "-o %s%s ", baseDir, baseName);
	dyStringPrintf(dy, "%s ", cFile);
	for (module = program->children; module != NULL; module = module->next)
	    if (module->name[0] != '<')
		struct pfModule *mod = hashMustFindVal(pfc->moduleHash, module->name);
		char *suffix = (module->type == pptModuleRef ? ".pfh" : ".pf");
		char *oName = replaceSuffix(mod->fileName, suffix, ".o");
		dyStringPrintf(dy, "%s ", oName);
	dyStringPrintf(dy, " %s ", pfc->runtimeLib);
	dyStringPrintf(dy, "%s ", pfc->jkwebLib);
	dyStringAppend(dy, "-lpthread -lm");
	verbose(2, "%s\n", dy->string);
	err = system(dy->string);
	if (err != 0)
	    errnoAbort("problem compiling:\n", dy->string);

if (endPhase < 10)

verbose(2, "Phase 10 - execution\n");
/* Now go run program itself. */
    struct dyString *dy = dyStringNew(0);
    int err;
    int i;
    if (baseDir[0] == 0)
	dyStringPrintf(dy, "./%s", baseName);
	dyStringPrintf(dy, "%s%s", baseDir, baseName);
    for (i=0; i<pfArgc; ++i)
	dyStringAppendC(dy, ' ');
	dyStringAppend(dy, pfArgv[i]);
    err = system(dy->string);
    if (err != 0)
	errAbort("problem running %s", baseName);
Пример #9
struct submitFileRow *submitFileRowFromFieldedTable(
    struct sqlConnection *conn, struct fieldedTable *table,
    int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx)
/* Turn parsed out table (still all just strings) into list of edwFiles. */
struct submitFileRow *sfr, *sfrList = NULL;
struct edwFile *bf;
struct fieldedRow *fr;
struct dyString *tags = dyStringNew(0);
char *ucscDbTag = "ucsc_db";
int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount);

for (fr = table->rowList; fr != NULL; fr = fr->next)
    char **row = fr->row;
    bf->submitFileName = cloneString(row[fileIx]);
    safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]);
    bf->size = sqlLongLong(row[sizeIx]);
    bf->updateTime = sqlLongLong(row[modifiedIx]);

    /* Add as tags any fields not included in fixed fields. */
    int i;
    for (i=0; i<table->fieldCount; ++i)
	if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx)
	    cgiEncodeIntoDy(table->fields[i], row[i], tags);
    if (ucscDbField < 0)
	/* Try to make this field up from file name */
	char *slash = strchr(bf->submitFileName, '/');
	if (slash == NULL)
	    errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName);
	int len = slash - bf->submitFileName;
	char ucscDbVal[len+1];
	memcpy(ucscDbVal, bf->submitFileName, len);
	ucscDbVal[len] = 0;

	/* Do a little check on it */
	if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal)
	    && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal)
	    && !sameString("hg19", ucscDbVal))
	    errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " 
	             "level directory in the fileName in the manifest is a UCSC database name "
		     "like 'hg19' or 'mm10.'  Alternatively please include a ucsc_db column.",

	/* Add it to tags. */
	cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags);
    bf->tags = cloneString(tags->string);

    /* Fake other fields. */
    bf->edwFileName  = cloneString("");

    /* Allocate wrapper structure */
    sfr->file = bf;

    /* fill in fields about replacement maybe */
    if (replacesIx != -1)
	char *replacesAcc = row[replacesIx];
	char *reason = row[replaceReasonIx];
	int fileId = edwFileIdForLicensePlate(conn, replacesAcc);
	if (fileId == 0)
	    errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag);
	sfr->replaces = cloneString(replacesAcc);
	sfr->replaceReason = cloneString(reason);
	sfr->replacesFile = fileId;

    slAddHead(&sfrList, sfr);
return sfrList;
static struct slName *getProbeList(struct sqlConnection *conn, int id)
/* Get list of probes with hyperlinks to probe info page. */
struct slName *returnList = NULL;
char query[256];
char *sidUrl = cartSidUrlString(cart);
struct dyString *dy = dyStringNew(0);
struct slInt *probeList = NULL, *probe;
int submissionSource = 0;

/* Make up a list of all probes in this image. */
safef(query, sizeof(query),
   "select probe from imageProbe where image=%d", id);
probeList = sqlQuickNumList(conn, query);

safef(query, sizeof(query),
   "select submissionSet.submissionSource from image, submissionSet"
   " where image.submissionSet = submissionSet.id and image.id=%d", id);
submissionSource = sqlQuickNum(conn, query);

for (probe = probeList; probe != NULL; probe = probe->next)
    char *type;

    /* Create hyperlink to probe page around gene name. */
    dyStringPrintf(dy, "<A HREF=\"%s?%s&%s=%d&%s=%d\" target=_parent>",
    	hgVisiGeneCgiName(), sidUrl, hgpDoProbe, probe->val, hgpSs, submissionSource);
    safef(query, sizeof(query), 
    	"select probeType.name from probeType,probe where probe.id = %d "
	"and probe.probeType = probeType.id", 
    type = sqlQuickString(conn, query);
    dyStringPrintf(dy, "%s", naForEmpty(type));
    if (sameWord(type, "antibody"))
	char *abName;
	safef(query, sizeof(query), 
	   "select antibody.name from probe,antibody "
	   "where probe.id = %d and probe.antibody = antibody.id"
	   , probe->val);
	abName = sqlQuickString(conn, query);
	if (abName != NULL)
	    dyStringPrintf(dy, " %s", abName);
    else if (sameWord(type, "RNA"))
	safef(query, sizeof(query),
	    "select length(seq) from probe where id=%d", probe->val);
	if (sqlQuickNum(conn, query) > 0)
	    dyStringPrintf(dy, " sequenced");
	    safef(query, sizeof(query),
		"select length(fPrimer) from probe where id=%d", probe->val);
	    if (sqlQuickNum(conn, query) > 0)
	        dyStringPrintf(dy, " from primers");
    else if (sameWord(type, "BAC"))
	char *name;
	safef(query, sizeof(query), 
	   "select bac.name from probe,bac "
	   "where probe.id = %d and probe.bac = bac.id"
	   , probe->val);
	name = sqlQuickString(conn, query);
	if (name != NULL)
	    dyStringPrintf(dy, " %s", name);
    dyStringPrintf(dy, "</A>");

    /* Add to return list. */
    slNameAddTail(&returnList, dy->string);

return returnList;
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId)
/* Given gene ID, try and find known gene on browser in same
 * species. */
char query[256];
int taxon;
char *url = NULL;
char *genomeDb = NULL;

/* Figure out taxon. */
safef(query, sizeof(query), 
    "select taxon from gene where id = %d", geneId);
taxon = sqlQuickNum(conn, query);

genomeDb = hDbForTaxon(conn, taxon);
if (genomeDb != NULL)
    /* Make sure known genes track exists - we may need
     * to tweak this at some point for model organisms. */
    safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb);
    if (!sqlTableExists(conn, query))
	genomeDb = NULL;

/* If no db for that organism revert to human. */
if (genomeDb == NULL)
    genomeDb = hDefaultDb();

safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb);
if (sqlTableExists(conn, query))
    struct dyString *dy = dyStringNew(0);
    char *knownGene = NULL;
    if (sqlCountColumnsInTable(conn, query) == 3)
	   "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId);
	struct slName *imageList, *image;
	safef(query, sizeof(query), 
	    "select imageProbe.image from probe,imageProbe "
	    "where probe.gene=%d and imageProbe.probe=probe.id", geneId);
	imageList = sqlQuickList(conn, query);
	if (imageList != NULL)
	       "select name from %s.knownToVisiGene ", genomeDb);
	       "where value in(");
	    for (image = imageList; image != NULL; image = image->next)
		dyStringPrintf(dy, "'%s'", image->name);
		if (image->next != NULL)
		    dyStringAppendC(dy, ',');
	    dyStringAppend(dy, ")");
    if (dy->stringSize > 0)
	knownGene = sqlQuickString(conn, dy->string);
	if (knownGene != NULL)
	    dyStringPrintf(dy, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none",
		genomeDb, knownGene);
	    url = dyStringCannibalize(&dy);
return url;
static struct slName *geneProbeList(struct sqlConnection *conn, int id)
/* Get list of gene names with hyperlinks to probe info page. */
struct slName *returnList = NULL;
char query[256], **row;
struct sqlResult *sr;
struct dyString *dy = dyStringNew(0);
struct probeAndColor *pcList = NULL, *pc;
int probeCount = 0;

/* Make up a list of all probes in this image. */
safef(query, sizeof(query),
   "select probe,probeColor from imageProbe where image=%d", id);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    pc->probe = sqlUnsigned(row[0]);
    pc->probeColor = sqlUnsigned(row[1]);
    slAddHead(&pcList, pc);

for (pc = pcList; pc != NULL; pc = pc->next)
    int geneId;
    char *geneName;
    int probe = pc->probe;
    char *geneUrl = NULL;

    /* Get gene ID and name. */
    safef(query, sizeof(query), 
    	"select gene from probe where id = %d", probe);
    geneId = sqlQuickNum(conn, query);
    geneName = vgGeneNameFromId(conn, geneId);
    /* Get url for known genes page if any. */
    geneUrl = getKnownGeneUrl(conn, geneId);

    /* Print gene name, surrounded by hyperlink to known genes
     * page if possible. */
    if (geneUrl != NULL)
	dyStringPrintf(dy, "<A HREF=\"%s\" target=_parent>",
    dyStringPrintf(dy, "%s", geneName);
    if (geneUrl != NULL)
	dyStringAppend(dy, "</A>");

    /* Add color if there's more than one probe for this image. */
    if (probeCount > 1)
	char *color;
	safef(query, sizeof(query), 
	    "select probeColor.name from probeColor "
	    "where probeColor.id = %d"
	    , pc->probeColor);
	color = sqlQuickString(conn, query);
	if (color != NULL)
	    dyStringPrintf(dy, " (%s)", color);

    /* Add to return list. */
    slNameAddTail(&returnList, dy->string);

return returnList;
Пример #13
boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr,
				boolean *chunked, int *contentLength)
/* Extract HTTP response header from lf into hdr, tell if it's
 * "Transfer-Encoding: chunked" or if it has a contentLength. */
  struct dyString *header = newDyString(1024);
  char *line;
  int lineSize;

  if (chunked != NULL)
    *chunked = FALSE;
  if (contentLength != NULL)
    *contentLength = -1;
  if (lineFileNext(lf, &line, &lineSize))
      if (startsWith("HTTP/", line))
	char *version, *code;
	dyStringAppendN(header, line, lineSize-1);
	dyStringAppendC(header, '\n');
	version = nextWord(&line);
	code = nextWord(&line);
	if (code == NULL)
	    warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
	    *hdr = cloneString(header->string);
	    return FALSE;
	if (!sameString(code, "200"))
	    warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line);
	    *hdr = cloneString(header->string);
	    return FALSE;
	while (lineFileNext(lf, &line, &lineSize))
	    /* blank line means end of HTTP header */
	    if ((line[0] == '\r' && line[1] == 0) || line[0] == 0)
	    if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL)
	        *chunked = TRUE;
	    dyStringAppendN(header, line, lineSize-1);
	    dyStringAppendC(header, '\n');
	    if (strstr(line, "Content-Length:"))
		code = nextWord(&line);
		code = nextWord(&line);
		if (contentLength != NULL)
		    *contentLength = atoi(code);
	  /* put the line back, don't put it in header/hdr */
	  warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string);
	  *hdr = cloneString(header->string);
	  return FALSE;
      *hdr = cloneString(header->string);
      return FALSE;

  *hdr = cloneString(header->string);
  return TRUE;
} /* lineFileParseHttpHeader */
Пример #14
boolean lineFileNextFull(struct lineFile *lf, char **retFull, int *retFullSize,
                        char **retRaw, int *retRawSize)
// Fetch next line from file joining up any that are continued by ending '\'
// If requested, and was joined, the unjoined raw lines are also returned
// NOTE: comment lines can't be continued!  ("# comment \ \n more comment" is 2 lines.)
// May have requested reusing the last full line.
if (lf->fullLineReuse)
    lf->fullLineReuse = FALSE;
    assert(lf->fullLine != NULL);
    *retFull = dyStringContents(lf->fullLine);
    if (retFullSize)
        *retFullSize = dyStringLen(lf->fullLine);
    if (retRaw != NULL)
        assert(lf->rawLines != NULL);
        *retRaw = dyStringContents(lf->rawLines);
        if (retRawSize)
            *retRawSize = dyStringLen(lf->rawLines);
    return TRUE;

// Empty pointers
*retFull = NULL;
if (retRaw != NULL)
    *retRaw = NULL;

// Prepare lf buffers
if (lf->fullLine == NULL)
    lf->fullLine = dyStringNew(1024);
    lf->rawLines = dyStringNew(1024); // Better to always create it than test every time

char *line;
while (lineFileNext(lf, &line, NULL))
    char *start = skipLeadingSpaces(line);

    // Will the next line continue this one?
    char *end = start;
    if (*start == '#')  // Comment lines can't be continued!
        end = start + strlen(start);
        while (*end != '\0')  // walking forward for efficiency (avoid strlens())
            for (;*end != '\0' && *end != '\\'; end++) ; // Tight loop to find '\'
            if (*end == '\0')

            // This could be a continuation
            char *slash = end;
            if (*(++end) == '\\')  // escaped
            end = skipLeadingSpaces(end);

            if (*end == '\0') // Just whitespace after '\', so true continuation mark
                if (retRaw != NULL) // Only if actually requested.
                    dyStringAppendN(lf->rawLines,line,(end - line));
                    dyStringAppendC(lf->rawLines,'\n'); // New lines delimit raw lines.
                end = slash; // Don't need to zero, because of appending by length

    // Stitch together full lines
    if (dyStringLen(lf->fullLine) == 0)
        dyStringAppendN(lf->fullLine,line,(end - line)); // includes first line's whitespace
    else if (start < end)             // don't include continued line's leading spaces
        dyStringAppendN(lf->fullLine,start,(end - start));

    if (*end == '\\')

    // Got a full line now!
    *retFull = dyStringContents(lf->fullLine);
    if (retFullSize)
        *retFullSize = dyStringLen(lf->fullLine);

    if (retRaw != NULL && dyStringLen(lf->rawLines) > 0) // Only if actually requested & continued
        // This is the final line which doesn't have a continuation char
        dyStringAppendN(lf->rawLines,line,(end - line));
        *retRaw = dyStringContents(lf->rawLines);
        if (retRawSize)
            *retRawSize = dyStringLen(lf->rawLines);
    return TRUE;
return FALSE;
static void populateMissingVgPrb(struct sqlConnection *conn)
/* populate vgPrb where missing, usually after new records added to visiGene */
struct sqlResult *sr;
char **row;
struct dyString *dy = dyStringNew(0);
struct sqlConnection *conn2 = sqlConnect(database);
struct sqlConnection *conn3 = sqlConnect(database);
int probeCount=0, vgPrbCount=0;

"select p.id,p.gene,antibody,probeType,fPrimer,rPrimer,p.seq,bac,g.taxon"
" from probe p join gene g"
" left join vgPrbMap m on m.probe = p.id"
" where g.id = p.gene"
"   and m.probe is NULL");
sr = sqlGetResult(conn, dy->string);
while ((row = sqlNextRow(sr)) != NULL)
    int id = sqlUnsigned(row[0]); 
    /* int gene = sqlUnsigned(row[1]); */
    /* int antibody = sqlUnsigned(row[2]); */
    /* int probeType = sqlUnsigned(row[3]); */
    char *fPrimer = row[4]; 
    char *rPrimer = row[5]; 
    char *seq = row[6]; 
    int bac = sqlUnsigned(row[7]); 
    int taxon = sqlUnsigned(row[8]); 

    char *peType = "none";
    int peProbe = id;
    char *peSeq = seq;
    char *tName = "";
    int tStart = 0;
    int tEnd = 0;
    char *tStrand = " ";
    char *peGene = "";
    int bacInfo = 0;
    int seqid = 0;
    int pslid = 0;
    char *state = "new";
    char *db = "";
    int vgPrb = 0;

    if (isNotEmpty(seq))
	peType = "probe";
	state = "seq";
    else if (isNotEmpty(fPrimer) && isNotEmpty(rPrimer))
	peType = "primersMrna";
    else if (isNotEmpty(fPrimer) && isEmpty(rPrimer))
    	{ /* only have fPrimer, it's probably a comment, not dna seq */
	peType = "refSeq";   /* use accession or gene */
    else if (bac > 0)
	peType = "bac";   /* use bacEndPairs */
	peType = "refSeq";   /* use accession or gene */

    if (!sameString(peSeq,""))
	vgPrb = findVgPrbBySeq(conn3,peSeq,taxon);

    if (vgPrb == 0)
	dyStringAppend(dy, "insert into vgPrb set");
	dyStringPrintf(dy, " id=default,\n");
	dyStringPrintf(dy, " type='%s',\n", peType);
	dyStringAppend(dy, " seq='");
	dyStringAppend(dy, peSeq);
	dyStringAppend(dy, "',\n");
	dyStringPrintf(dy, " tName='%s',\n", tName);
	dyStringPrintf(dy, " tStart=%d,\n", tStart);
	dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	dyStringPrintf(dy, " db='%s',\n", db);
	dyStringPrintf(dy, " taxon='%d',\n", taxon);
	dyStringPrintf(dy, " state='%s'\n", state);
	verbose(2, "%s\n", dy->string);
	sqlUpdate(conn2, dy->string);
	vgPrb = sqlLastAutoId(conn2);
    dyStringAppend(dy, "insert into vgPrbMap set");
    dyStringPrintf(dy, " probe=%d,\n", peProbe);
    dyStringPrintf(dy, " vgPrb=%d \n", vgPrb);
    verbose(2, "%s\n", dy->string);
    sqlUpdate(conn2, dy->string);


verbose(1, "# new probe records found = %d, # new vgPrb records added = %d\n", probeCount, vgPrbCount);



Пример #16
static void readPartHeaderMB(struct mimeBuf *b, struct mimePart *p, char *altHeader)
/* Reads the header lines of the mimePart,
   saves the header settings in a hash.  */
struct dyString *fullLine = dyStringNew(0);
char *key=NULL, *val=NULL;
struct lineFile *lf = NULL;
char *line = NULL;
char *lineAhead = NULL;
int size = 0;
p->hdr = newHash(3);
    	//fprintf(stderr,"headers dumpMB: ");
	//dumpMB(b);  //debug
if (altHeader)
    lf = lineFileOnString("MIME Header", TRUE, altHeader);
/* read ahead one line, skipping any leading blanks lines */   
    if (altHeader)
	lineFileNext(lf, &lineAhead, &size);
	lineAhead = getLineMB(b);
    while (sameString(lineAhead,""));

    /* accumulate a full header line - some emailers split into mpl lines */
	line = lineAhead;
	if (altHeader)
	    lineFileNext(lf, &lineAhead, &size);
	    lineAhead = getLineMB(b);
	if (!altHeader) 
	} while (isspace(lineAhead[0]));
    line = fullLine->string;
    //fprintf(stderr,"found a line! [%s]\n",line);  //debug
    key = line;
    val = strchr(line,':');
    if (!val)
	errAbort("readPartHeaderMB error - header-line colon not found, line=[%s]",line);
    *val = 0;
    // since the hash is case-sensitive, convert to lower case for ease of matching
    //fprintf(stderr,"MIME header: key=[%s], val=[%s]\n",key,val);
    } while (!sameString(lineAhead,""));
if (altHeader)
    if (nlType == nlt_undet)
	nlType = lf->nlType;
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db)
/* process isPcr results  */


struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
char *word, *end;
char *tName;
int tStart;
int tEnd;
char *tStrand;
int probeid=0;  /* really a vgPrb id */
boolean more = lineFileNext(lf, &line, &lineSize);
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line);
    while((more=lineFileNext(lf, &line, &lineSize)))
	if (line[0] == '>')
    dna = cloneString(dy->string);
    word = name+1;
    end = strchr(word,':');
    tName = cloneStringZ(word,end-word); 
    word = end+1;
    end = strchr(word,'+');
    tStrand = "+";
    if (!end)
	end = strchr(word,'-');
	tStrand = "-";
    tStart = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    tEnd = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    probeid = atoi(word); 

    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid);
    if (sqlQuickNum(conn,dy->string)>0)
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	if (vgPrb == 0)
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", tName);
	    dyStringPrintf(dy, " tStart=%d,\n", tStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", probeid);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	else  /* probe seq already exists */ 
	    /* just re-map the probe table recs to it */
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid);
	    sqlUpdate(conn, dy->string);

Пример #18
void condenseValues()
/* combine values for single snp */
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
FILE *f;
struct dyString *ssList = newDyString(255);
struct dyString *buildList = newDyString(255);
char *currentSnpString = NULL;
int currentSnpNum = 0;
int count = 0;
char firstBuild[32];
char lastBuild[32];

f = hgCreateTabFile(".", "SNPSubSNPLinkCondense");

sqlSafef(query, sizeof(query), "select snp_id, subsnp_id, build_id from SNPSubSNPLink");

sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    if (currentSnpString == NULL) 
        currentSnpString = cloneString(row[0]);
	currentSnpNum = sqlUnsigned(row[0]);
	dyStringPrintf(ssList, "%s", row[1]);
	dyStringPrintf(buildList, "%s", row[2]);
	safef(firstBuild, sizeof(firstBuild), row[2]);
	safef(lastBuild, sizeof(firstBuild), row[2]);
    else if (!sameString(row[0], currentSnpString))
	fprintf(f, "%s\t%s\t%s\t%s\t%s\t%d\n", 
	           currentSnpString, ssList->string, buildList->string, firstBuild, lastBuild, count);
	if (currentSnpNum > sqlUnsigned(row[0]))
	    errAbort("snps out of order: %d before %s\n", currentSnpNum, row[0]);
	currentSnpString = cloneString(row[0]);
	currentSnpNum = sqlUnsigned(row[0]);
	dyStringPrintf(ssList, "%s", row[1]);
	dyStringPrintf(buildList, "%s", row[2]);
	safef(firstBuild, sizeof(firstBuild), row[2]);
	safef(lastBuild, sizeof(lastBuild), row[2]);
	count = 1;
	dyStringAppend(ssList, ",");
	dyStringAppend(ssList, row[1]);
	dyStringAppend(buildList, ",");
	dyStringAppend(buildList, row[2]);
	safef(lastBuild, sizeof(lastBuild), row[2]);
fprintf(f, "%s\t%s\t%s\t%s\t%s\t%d\n", currentSnpString, ssList->string, buildList->string, 
                                       firstBuild, lastBuild, count);
static void doPrimers(struct sqlConnection *conn, int taxon, char *db)
/* get probe seq from primers */
int rc = 0;
struct dyString *dy = dyStringNew(0);
char cmdLine[256];
char path1[256];
char path2[256];

dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g");
dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon);
dyStringAppend(dy, " and e.state = 'new' and e.type='primersMrna'");
rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE);
verbose(1,"rc = %d = count of primers for mrna search for taxon %d\n",rc,taxon);

if (rc > 0) /* something to do */

    dyStringPrintf(dy, "select qName from %s.all_mrna",db);
    rc = 0;
    rc = sqlSaveQuery(conn, dy->string, "accFile.txt", FALSE);
    safef(cmdLine,sizeof(cmdLine),"getRna %s accFile.txt mrna.fa",db);
    system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");
    verbose(1,"rc = %d = count of mrna for %s\n",rc,db);

    system("date"); system("isPcr mrna.fa primers.query isPcr.fa -out=fa"); system("date");
    system("ls -l");


    unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa");


/* find any remaining type primersMrna that couldn't be resolved and demote 
 * them to type primersGenome
dyStringAppend(dy, "update vgPrb set type='primersGenome'"); 
dyStringPrintf(dy, " where taxon = %d",taxon);
dyStringAppend(dy, " and state = 'new' and type='primersMrna'");
sqlUpdate(conn, dy->string);

/* get primers for those probes that did not find mrna isPcr matches 
 * and then do them against the genome instead */
dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g");
dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon);
dyStringAppend(dy, " and e.state = 'new' and e.type='primersGenome'");
rc = 0;
rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE);
verbose(1,"rc = %d = count of primers for genome search for taxon %d\n",rc,taxon);

if (rc > 0) /* something to do */
    verbose(1,"copy: [%s] to [%s]\n",path1,path2);  copyFile(path1,path2);

	    "ssh kolossus 'cd %s; isPcr %s.2bit primers.query isPcr.fa -out=fa'",
    system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");
    verbose(1,"rm %s\n",path2); unlink(path2); system("ls -l");

    unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa");


/* find any remaining type primersGenome that couldn't be resolved and demote 
 * them to type refSeq
dyStringAppend(dy, "update vgPrb set type='refSeq'"); 
dyStringPrintf(dy, " where taxon = %d",taxon);
dyStringAppend(dy, " and state = 'new' and type='primersGenome'");
sqlUpdate(conn, dy->string);

Пример #20
void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f,
	int resTryCount, int resScales[], int resSizes[], 
	boolean doCompress, bits32 *retMaxSectionSize)
/* Read through lf, chunking it into sections that get written to f.  Save info
 * about sections in bounds. */
int maxSectionSize = 0;
struct bbiChromUsage *usage = usageList;
int itemIx = 0, sectionIx = 0;
bits32 reserved32 = 0;
UBYTE reserved8 = 0;
struct sectionItem items[itemsPerSlot];
struct sectionItem *lastB = NULL;
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
struct dyString *stream = dyStringNew(0);

/* remove initial browser and track lines */

for (;;)
    /* Get next line of input if any. */
    char *row[5];
    int rowSize = lineFileChopNext(lf, row, ArraySize(row));

    /* Figure out whether need to output section. */
    boolean sameChrom = FALSE;
    if (rowSize > 0)
	sameChrom = sameString(row[0], usage->name);
    if (itemIx >= itemsPerSlot || rowSize == 0 || !sameChrom)
	/* Figure out section position. */
	bits32 chromId = usage->id;
	bits32 sectionStart = items[0].start;
	bits32 sectionEnd = items[itemIx-1].end;

	/* Save section info for indexing. */
	assert(sectionIx < sectionCount);
	struct bbiBoundsArray *section = &bounds[sectionIx++];
	section->offset = ftell(f);
	section->range.chromIx = chromId;
	section->range.start = sectionStart;
	section->range.end = sectionEnd;

	/* Output section header to stream. */
	UBYTE type = bwgTypeBedGraph;
	bits16 itemCount = itemIx;
	dyStringWriteOne(stream, chromId);			// chromId
	dyStringWriteOne(stream, sectionStart);		// start
	dyStringWriteOne(stream, sectionEnd);	// end
	dyStringWriteOne(stream, reserved32);		// itemStep
	dyStringWriteOne(stream, reserved32);		// itemSpan
	dyStringWriteOne(stream, type);			// type
	dyStringWriteOne(stream, reserved8);			// reserved
	dyStringWriteOne(stream, itemCount);			// itemCount

	/* Output each item in section to stream. */
	int i;
	for (i=0; i<itemIx; ++i)
	    struct sectionItem *item = &items[i];
	    dyStringWriteOne(stream, item->start);
	    dyStringWriteOne(stream, item->end);
	    dyStringWriteOne(stream, item->val);

	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxSectionSize)
	    maxSectionSize = stream->stringSize;
	if (doCompress)
	    size_t maxCompSize = zCompBufSize(stream->stringSize);
	    char compBuf[maxCompSize];
	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    mustWrite(f, stream->string, stream->stringSize);

	/* If at end of input we are done. */
	if (rowSize == 0)

	/* Set up for next section. */
	itemIx = 0;

	if (!sameChrom)
	    usage = usage->next;
	    assert(usage != NULL);
            if (!sameString(row[0], usage->name))
                errAbort("read %s, expecting %s on line %d in file %s\n", 
                    row[0], usage->name, lf->lineIx, lf->fileName);
	    assert(sameString(row[0], usage->name));
	    lastB = NULL;
	    for (resTry = 0; resTry < resTryCount; ++resTry)
		resEnds[resTry] = 0;

    /* Parse out input. */
    lineFileExpectWords(lf, 4, rowSize);
    bits32 start = lineFileNeedNum(lf, row, 1);
    bits32 end = lineFileNeedNum(lf, row, 2);
    float val = lineFileNeedDouble(lf, row, 3);

    /* Verify that inputs meets our assumption - that it is a sorted bedGraph file. */
    if (start > end)
        errAbort("Start (%u) after end (%u) line %d of %s", start, end, lf->lineIx, lf->fileName);
    if (lastB != NULL)
	if (lastB->start > start)
	    errAbort("BedGraph not sorted on start line %d of %s", lf->lineIx, lf->fileName);
	if (lastB->end > start)
	    errAbort("Overlapping regions in bedGraph line %d of %s", lf->lineIx, lf->fileName);

    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	while (end > resEnd)
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];

    /* Save values in output array. */
    struct sectionItem *b = &items[itemIx];
    b->start = start;
    b->end = end;
    b->val = val;
    lastB = b;
    itemIx += 1;
assert(sectionIx == sectionCount);

*retMaxSectionSize = maxSectionSize;
Пример #21
void xpParseStartTag(struct xp *xp, 
	int maxAttCount,		  /* Maximum attribute count. */
	struct dyString *retName, 	  /* Returns tag name */
	int *retAttCount, 		  /* Returns attribute count. */
	struct dyString **retAttributes,  /* Name, value, name, value... */
	boolean *retClosed)	  /* If true then is self-closing (ends in />) */
/* Call this after the first '<' in a tag has been read.  It'll
 * parse out until the '>' tag. */
char c, quotC;
int attCount = 0;
struct dyString *dy;
int lineStart;


/* Skip white space after '<' and before tag name. */
for (;;)
    if ((c = xpGetChar(xp)) == 0)
    if (isspace(c))
	if (c == '\n')

/* Read in tag name. */
for (;;)
    dyStringAppendC(retName, c);
    if ((c = xpGetChar(xp)) == 0)
    if (c == '>' || c == '/' || isspace(c))
if (c == '\n')

/* Parse attributes. */
if (c != '>' && c != '/')
    for (;;)
	/* Skip leading white space. */
	for (;;)
	    if ((c = xpGetChar(xp)) == 0)
	    if (isspace(c))
		if (c == '\n')
	if (c == '>' || c == '/')

	/* Allocate space in attribute table. */
	if (attCount >= maxAttCount - 2)
	    xpError(xp, "Attribute stack overflow");
	dy = retAttributes[attCount];
	if (dy == NULL)
	    dy = retAttributes[attCount] = newDyString(64);

	/* Read until not a label character. */
	for (;;)
	    dyStringAppendC(dy, c);
	    if ((c = xpGetChar(xp)) == 0)
	    if (isspace(c))
		if (c == '\n')
	    if (c == '=')
	    if (c == '/' || c == '>')
		xpError(xp, "Expecting '=' after attribute name");

	/* Skip white space until '=' */
	if (c != '=')
	    for (;;)
		if ((c = xpGetChar(xp)) == 0)
		if (isspace(c))
		    if (c == '\n')
	    if (c != '=')
		xpError(xp, "Expecting '=' after attribute name");

	/* Skip space until quote. */
	for (;;)
	    if ((c = xpGetChar(xp)) == 0)
	    else if (isspace(c))
		if (c == '\n')
	if (c != '\'' && c != '"')
	    xpError(xp, "Expecting quoted string after =");

	/* Allocate space in attribute table. */
	if (attCount >= maxAttCount - 2)
	    xpError(xp, "Attribute stack overflow");
	dy = retAttributes[attCount];
	if (dy == NULL)
	    dy = retAttributes[attCount] = newDyString(64);

	/* Read until next quote. */
	quotC = c;
	lineStart = xp->lineIx;
	for (;;)
	    if ((c = xpGetChar(xp)) == 0)
	       xpError(xp, "End of file inside literal string that started at line %d", lineStart);
	    if (c == quotC)
	    if (c == '&')
	       xpLookup(xp, xp->endTag, dy);
		if (c == '\n')
		dyStringAppendC(dy, c);
if (c == '/')
    *retClosed = TRUE;
    c = xpGetChar(xp);
    if (c != '>')
        xpError(xp, "Expecting '>' after '/'");
    *retClosed = FALSE;
*retAttCount = attCount;
Пример #22
int checkTableCoords(char *db)
/* Check several invariants (see comments in check*() above), 
 * summarize errors, return nonzero if there are errors. */
struct sqlConnection *conn = hAllocConn(db);
struct slName *tableList = NULL, *curTable = NULL;
struct slName *allChroms = NULL;
boolean gotError = FALSE;

allChroms = hAllChromNames(db);
if (theTable == NULL)
    tableList = getTableNames(conn);
else if (sqlTableExists(conn, theTable))
    tableList = newSlName(theTable);
    errAbort("Error: specified table \"%s\" does not exist in database %s.",
	     theTable, db);

for (curTable = tableList;  curTable != NULL;  curTable = curTable->next)
    struct hTableInfo *hti = NULL;
    struct slName *chromList = NULL, *chromPtr = NULL;
    char *table = curTable->name;
    char tableChrom[32], trackName[128], tableChromPrefix[33];
    hParseTableName(db, table, trackName, tableChrom);
    hti = hFindTableInfo(db, tableChrom, trackName);
    if (hti != NULL && hti->isPos)
	/* watch out for presence of both split and non-split tables; 
	 * hti for non-split will be replaced with hti of split. */
	if (splitAndNonSplitExist(conn, table, tableChrom))
	safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom);
	if (hti->isSplit)
	    chromList = newSlName(tableChrom);
	    chromList = allChroms;
	/* invariant: chrom must be described in chromInfo. */
        /* items with bad chrom will be invisible to hGetBedRange(), so 
	 * catch them here by SQL query. */
	/* The SQL query is too huge for scaffold-based db's, check count: */
	if (hChromCount(db) <= MAX_SEQS_SUPPORTED)
	    if (isNotEmpty(hti->chromField))
		struct dyString *bigQuery = newDyString(1024);
		sqlDyStringPrintf(bigQuery, "select count(*) from %s where ",
		for (chromPtr=chromList; chromPtr != NULL;
		    sqlDyStringPrintf(bigQuery, "%s != '%s' ",
				   hti->chromField, chromPtr->name);
		    if (chromPtr->next != NULL)
			dyStringAppend(bigQuery, "AND ");
		gotError |= reportErrors(BAD_CHROM, table,
					 sqlQuickNum(conn, bigQuery->string));
	    for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next)
		char *chrom = chromPtr->name;
		struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL);
		if (hti->isSplit && isNotEmpty(hti->chromField))
		    gotError |= checkSplitTableOnlyChrom(bedList, table, hti,
		gotError |= checkStartEnd(bedList, table, hti,
		if (hti->hasCDS)
		    gotError |= checkCDSStartEnd(bedList, table, hti);
		if (hti->hasBlocks && !ignoreBlocks)
		    gotError |= checkBlocks(bedList, table, hti);
return gotError;
void addSdrfToStormTop(char *sdrfFile, struct tagStorm *storm)
/* Add lines of sdrfFile as children of first top level stanza in storm. */
struct fieldedTable *table = fieldedTableFromTabFile(sdrfFile, sdrfFile, NULL, 0 );

/* Convert ArrayExpress field names to our field names */
int fieldIx;
char *lastNonTerm = NULL;
char *lastNonUnit = NULL;
for (fieldIx=0; fieldIx < table->fieldCount; fieldIx += 1)
    char tagName[256];
    aeFieldToNormalField("sdrf.", table->fields[fieldIx], tagName, sizeof(tagName));
    if (lastNonTerm != NULL && sameString("sdrf.Term_Source_REF", tagName))
         safef(tagName, sizeof(tagName), "%s_Term_Source_REF", lastNonTerm);
	 table->fields[fieldIx] = lmCloneString(table->lm, tagName);
    else if (lastNonTerm != NULL && sameString("sdrf.Term_Accession_Number", tagName))
         safef(tagName, sizeof(tagName), "%s_Term_Accession_Number", lastNonTerm);
	 table->fields[fieldIx] = lmCloneString(table->lm, tagName);
    else if (lastNonUnit != NULL && startsWith("sdrf.Unit_", tagName))
	 safef(tagName, sizeof(tagName), "%s_Unit", lastNonUnit);
	 lastNonTerm = lmCloneString(table->lm, tagName);
	 table->fields[fieldIx] = lastNonTerm;
         lastNonTerm = lastNonUnit = lmCloneString(table->lm, tagName);
	 table->fields[fieldIx] = lastNonTerm;

/* Make up fastq field indexes to handle processing of paired reads in fastq, which
 * take two lines of sdrf file. */
char *fieldsWithFastqs[] = 
/* Fields that contain the fastq file names */
boolean mightReuseStanza = TRUE;
bool *reuseMultiFields;  // If set this field can vary and line still reused
AllocArray(reuseMultiFields, table->fieldCount);
int i;
for (i=0; i<ArraySize(fieldsWithFastqs); ++i)
    char *field = fieldsWithFastqs[i];
    int ix = stringArrayIx(field, table->fields, table->fieldCount);
    if (ix >=0)
	reuseMultiFields[ix] = TRUE;
    else if (i == 0)
	mightReuseStanza = FALSE;
        break;	    // Make sure has first one if going to do paired read fastq processing

/* Make up a list and hash of fieldMergers to handle conversion of columns that occur
 * multiple times to a comma-separated list of values in a single column. */
struct fieldMerger
/* Something to help merge multiple columns with same name */
    struct fieldMerger *next;	/* Next in list */
    char *name;	
    struct dyString *val;	/* Comma separated value */
struct hash *fieldHash = hashNew(0);
struct fieldMerger *fmList = NULL;
for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx)
    char *fieldName = table->fields[fieldIx];
    if (hashLookup(fieldHash, fieldName) == NULL)
	struct fieldMerger *fm;
	fm->name = fieldName;
	fm->val = dyStringNew(0);
	slAddTail(&fmList, fm);
	hashAdd(fieldHash, fieldName, fm);

/* Grab top level stanza and make sure there is only one. */
struct tagStanza *topStanza = storm->forest;
if (topStanza == NULL || topStanza->next != NULL)

/* Scan through table, making new stanzas for each row and hooking them into topStanza */
struct fieldedRow *fr, *lastFr = NULL;
struct tagStanza *stanza = NULL;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    /* Empty out any existing vals */
    struct fieldMerger *fm;
    for (fm = fmList; fm != NULL; fm = fm->next)

    /* Add all non-empty values from this row to our fieldMergers. */
    char **row = fr->row;
    for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx)
	char *fieldName = table->fields[fieldIx];
	fm = hashMustFindVal(fieldHash, fieldName);
	char *val = row[fieldIx];
	if (!isEmpty(val))
	    csvEscapeAndAppend(fm->val, val);

    /* If only the reuseMultiFields are varying, append to those values in previous stanza,
     * otherwise make a new stanza */
    if (mightReuseStanza && lastFr != NULL 
        && sameExceptForSome(lastFr->row, fr->row, table->fieldCount, reuseMultiFields))
	int i;
	for (i=0; i<ArraySize(fieldsWithFastqs); ++i)
	    char *fieldName = fieldsWithFastqs[i];
	    if ((fm = hashFindVal(fieldHash, fieldName)) != NULL)
		char *newVal = fm->val->string;
		char *oldVal = tagMustFindVal(stanza, fieldName);
		int bothSize = strlen(newVal) + strlen(oldVal) + 1 + 1;
		char bothBuf[bothSize];
		safef(bothBuf, bothSize, "%s,%s", oldVal, newVal);
		tagStanzaUpdateTag(storm, stanza, fieldName, bothBuf);
	/* Output all nonempty vals to stanza */
	stanza = tagStanzaNew(storm, topStanza);
	for (fm = fmList; fm != NULL; fm = fm->next)
	    if (fm->val->stringSize > 0)
		tagStanzaAppend(storm, stanza, fm->name, fm->val->string);

    lastFr = fr;
Пример #24
void loadGeneToMotif(struct sqlConnection *conn, char *fileName, char *table,
	struct hash *geneToModuleHash, struct hash *moduleAndMotifHash,
	struct hash *motifHash, struct hash *positionsHash,
	char *regionTable)
/* Load file which is a big matrix with genes for rows and motifs for
 * columns.  There is a semicolon-separated list of numbers in the matrix 
 * where a gene has the motif, and an empty (tab separated) field
 * where there is no motif.  The numbers are relative to the
 * region associated with the gene in the positionsHash. 
 * Only load bits of this where motif actually occurs in module associated 
 * with gene. */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
FILE *f = hgCreateTabFile(tmpDir, table);
char *motifNames[32*1024], *row[32*1024];
int motifCount, rowSize, i;
char *gene, *module;
int geneCount = 0, total = 0;
struct dyString *dy = dyStringNew(512);
struct genomePos *motifPosList = NULL, *motifPosForGene;
struct genomePos *regionPosList = NULL, *regionPos;

/* Read first line, which is labels. */
if (!lineFileNextReal(lf, &line))
    errAbort("Empty file %s", fileName);
subChar(line, ' ', '_');
motifCount = chopLine(line, motifNames);
if (motifCount >= ArraySize(motifNames))
    errAbort("Too many motifs line 1 of %s", fileName);
lineFileExpectAtLeast(lf, 2, motifCount);
motifNames[0] = NULL;
for (i=1; i<motifCount; ++i)
    char name[64];
    motifNames[i] = cloneString(fixMotifName(motifNames[i],name,sizeof(name)));
    if (!hashLookup(motifHash, motifNames[i]))
        errAbort("Motif %s is in %s but not modules_motifs.gxm",
		motifNames[i], fileName);

/* Read subsequent lines. */
while ((rowSize = lineFileChopTab(lf, row)) != 0)
    lineFileExpectWords(lf, motifCount, rowSize);
    gene = row[0];
    module = hashFindVal(geneToModuleHash, gene);
    if (module == NULL)
        warn("WARNING: Gene %s in line %d of %s but not module_assignments.tab", 
		gene, lf->lineIx, lf->fileName);
    regionPos = NULL;
    for (i=1; i<rowSize; ++i)
	if (row[i][0] != 0)
	    if (hashLookup2(moduleAndMotifHash, module, motifNames[i]))
		regionPos = hashFindVal(positionsHash, gene);
		if (regionPos == NULL)
		    warn("WARNING: %s in %s but not gene_positions.tab",
		    	gene, fileName);
		    i = rowSize; continue;
		motifPosForGene = convertMotifPos(row[i], regionPos, 
			hashMustFindVal(motifHash, motifNames[i]), lf);
		motifPosList = slCat(motifPosForGene, motifPosList);
    if (regionPos != NULL)
	slAddHead(&regionPosList, regionPos);

/* Output sorted table of all motif hits. */
    struct genomePos *pos;
    slSort(&motifPosList, genomePosCmp);
    for (pos = motifPosList; pos != NULL; pos = pos->next)
	int start = pos->start;
	int end = pos->end;
	if (start < 0) start = 0;
	fprintf(f, "%d\t", binFromRange(start, end));
	fprintf(f, "%s\t", pos->chrom);
	fprintf(f, "%d\t%d\t", start, end);
	fprintf(f, "%s\t", pos->motif);
	fprintf(f, "%d\t", pos->score);
	fprintf(f, "%c\t", pos->strand);
	fprintf(f, "%s\n", pos->name);
    "CREATE TABLE  %s (\n"
    "    bin smallInt unsigned not null,\n"
    "    chrom varChar(255) not null,\n"
    "    chromStart int not null,\n"
    "    chromEnd int not null,\n"
    "    name varchar(255) not null,\n"
    "    score int not null,\n"
    "    strand char(1) not null,\n"
    "    gene varchar(255) not null,\n"
    "              #Indices\n"
    "    INDEX(gene(12)),\n"
    "    INDEX(name(16)),\n"
    "    INDEX(chrom(8),bin)\n"
    ")\n",  table);
    sqlRemakeTable(conn, table, dy->string);
    verbose(1, "%d genes, %d motifs, %d motifs in genes\n",
	    geneCount, motifCount-1, total);
    hgLoadTabFile(conn, tmpDir, table, &f);
    // hgRemoveTabFile(tmpDir, table);
    verbose(1, "Loaded %s table\n", table);

/* Now output sorted table of upstream regions. */
    FILE *f = hgCreateTabFile(tmpDir, regionTable);
    struct genomePos *pos;
    "CREATE TABLE  %s (\n"
    "    bin smallInt unsigned not null,\n"
    "    chrom varChar(255) not null,\n"
    "    chromStart int not null,\n"
    "    chromEnd int not null,\n"
    "    name varchar(255) not null,\n"
    "    score int not null,\n"
    "    strand char(1) not null,\n"
    "              #Indices\n"
    "    INDEX(name(16)),\n"
    "    INDEX(chrom(8),bin)\n"
    ")\n",  regionTable);
    sqlRemakeTable(conn, regionTable, dy->string);
    slSort(&regionPosList, genomePosCmp);
    for (pos = regionPosList; pos != NULL; pos = pos->next)
	int start = pos->start;
	int end = pos->end;
	if (start < 0) start = 0;
	fprintf(f, "%d\t", binFromRange(start, end));
	fprintf(f, "%s\t", pos->chrom);
	fprintf(f, "%d\t%d\t", start, end);
	fprintf(f, "%s\t", pos->name);
	fprintf(f, "%d\t", pos->score);
	fprintf(f, "%c\n", pos->strand);
    hgLoadTabFile(conn, tmpDir, regionTable, &f);
    // hgRemoveTabFile(tmpDir, regionTable);
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
double minVal,maxVal;
struct chromGraph *el, *list;
FILE *f;
char *tempDir = ".";
char path[PATH_LEN], gbdbPath[PATH_LEN];
char *idTable = optionVal("idTable", NULL);
char *pathPrefix = NULL;

if (idTable == NULL)
    list = chromGraphLoadAll(fileName);
    list = chromGraphListWithTable(fileName, db, idTable);
if (list == NULL)
    errAbort("%s is empty", fileName);

/* Figure out min/max values */
minVal = maxVal = list->val;
for (el = list->next; el != NULL; el = el->next)
    if (optionExists("minusLog10"))
	if (el->val == 1)
	    el->val = 0;
	else if (el->val > 0)
	    el->val = -1 * log(el->val)/log(10);
    if (el->val < minVal)
        minVal = el->val;
    if (el->val > maxVal)
        maxVal = el->val;

/* Sort and write out temp file. */
slSort(&list, chromGraphCmp);
f = hgCreateTabFile(tempDir, track);
for (el = list; el != NULL; el = el->next)
    chromGraphTabOut(el, f);

if (doLoad)
    struct dyString *dy = dyStringNew(0);
    struct sqlConnection *conn;

    /* Set up connection to database and create main table. */
    conn = hAllocConn(db);
    dyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
    sqlRemakeTable(conn, track, dy->string);

    /* Load main table and clean up file handle. */
    hgLoadTabFile(conn, tempDir, track, &f);
    hgRemoveTabFile(tempDir, track);

    /* If need be create meta table.  If need be delete old row. */
    if (!sqlTableExists(conn, "metaChromGraph"))
	sqlUpdate(conn, metaCreateString);
	dyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", 
	sqlUpdate(conn, dy->string);

    /* Make chrom graph file */
    safef(path, sizeof(path), "%s.cgb", track);
    chromGraphToBin(list, path);
    safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
    pathPrefix = optionVal("pathPrefix", path);
    safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

    /* Create new line in meta table */
    dyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
    	track, minVal, maxVal, gbdbPath);
    sqlUpdate(conn, dy->string);
static void doBlat(struct sqlConnection *conn, int taxon, char *db)
/* place probe seq from non-BAC with blat that have no alignments yet */
int rc = 0;
char *blatSpec=NULL;
char cmdLine[256];
char path1[256];
char path2[256];
struct dyString *dy = dyStringNew(0);

/* (non-BACs needing alignment) */
    "select concat(\"vgPrb_\",e.id), e.seq"
    " from vgPrb e, vgPrbAli a"
    " where e.id = a.vgPrb"
    " and a.db = '%s'"
    " and a.status = 'new'"
    " and e.taxon = %d"
    " and e.type <> 'bac'"
    " and e.seq <> ''"
    " order by e.id"
    , db, taxon);
rc = sqlSaveQuery(conn, dy->string, "blat.fa", TRUE);
verbose(1,"rc = %d = count of sequences for blat, to get psls for taxon %d\n",rc,taxon);

if (rc == 0) 
    system("rm -f blatNearBest.psl; touch blatNearBest.psl");  /* make empty file */

/* make .ooc and blat on kolossus */

verbose(1,"copy: [%s] to [%s]\n",path1,path2);  copyFile(path1,path2);

"ssh kolossus 'cd %s; blat -makeOoc=11.ooc -tileSize=11"
" -repMatch=1024 %s.2bit /dev/null /dev/null'",
system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");

	"ssh kolossus 'cd %s; blat %s.2bit blat.fa -ooc=11.ooc -noHead blat.psl'",
system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date");

/* using blat even with -fastMap was way too slow - took over a day,
 * so instead I will make a procedure to write a fake psl for the BACs
 * which you will see called below */

verbose(1,"rm %s\n",path2); unlink(path2); 

verbose(1,"rm %s\n",path2); unlink(path2); 

/* skip psl header and sort on query name */
safef(cmdLine,sizeof(cmdLine), "sort -k 10,10 blat.psl > blatS.psl");

/* keep near best within 5% of the best */
    "pslCDnaFilter -globalNearBest=0.005 -minId=0.96 -minNonRepSize=20 -minCover=0.50"
    " blatS.psl blatNearBest.psl");


Пример #27
static void parseDbXrefs()
/* Parse the db_xref entries for various features to build a single dbx entry
 * in the kvt and to obtain the locus and mim ids for the kvt */
static char* LOCUS_ID = "LocusID:";
static char* GENE_ID = "GeneID:";
static char* MIM_ID = "MIM:";
struct slName* head = NULL, *xref, *prevXref;
struct keyVal* dbXrefKv = NULL;
struct keyVal* locusLinkIdKv = NULL;
struct keyVal* geneIdKv = NULL;
struct keyVal* omimIdKv = NULL;
if (dbXrefBuf == NULL)
    dbXrefBuf = dyStringNew(256);
if (omimIdBuf == NULL)
    omimIdBuf = dyStringNew(256);
locusLinkId[0] = '\0';

/* split into a list and sort so we can remove dups */
if (gbCdsDbxField->val->stringSize > 0)
    head = slCat(head, parseDbXrefStr(gbCdsDbxField->val->string));
if (gbGeneDbxField->val->stringSize > 0)
    head = slCat(head, parseDbXrefStr(gbGeneDbxField->val->string));

xref = head;
prevXref = NULL;
while (xref != NULL)
    /* skip if dup of previous */
    if ((prevXref == NULL) || !sameString(prevXref->name, xref->name))
        if (dbXrefBuf->stringSize > 0)
            dyStringAppendC(dbXrefBuf, ' ');
        dyStringAppend(dbXrefBuf, xref->name);
        updateKvt(&dbXrefKv, "dbx", dbXrefBuf->string);

        /* find number in db_xref like LocusID:27 or GeneID:27 */
        if (startsWith(LOCUS_ID, xref->name))
            safef(locusLinkId, sizeof(locusLinkId), "%s",
            updateKvt(&locusLinkIdKv, "loc", locusLinkId);
        else if (startsWith(GENE_ID, xref->name))
            safef(geneId, sizeof(geneId), "%s",
            updateKvt(&geneIdKv, "gni", geneId);
        else if (startsWith(MIM_ID, xref->name))
            if (omimIdBuf->stringSize > 0)
                dyStringAppendC(omimIdBuf, ' ');
            dyStringAppend(omimIdBuf, xref->name+strlen(MIM_ID));
            updateKvt(&omimIdKv, "mim", omimIdBuf->string);
    prevXref = xref;
    xref = xref->next;
static void doSeqAndExtFile(struct sqlConnection *conn, char *db, char *table)
int rc = 0;
char cmd[256];
char path[256];
char bedPath[256];
char gbdbPath[256];
char *fname=NULL;
struct dyString *dy = dyStringNew(0);
"select distinct concat('vgPrb_',e.id), e.seq"
" from vgPrb e join %s.%s v"
" left join %s.seq s on s.acc = v.qName"
" where concat('vgPrb_',e.id) = v.qName"
" and s.acc is NULL"
" order by e.id"
    , db, table, db);
rc = sqlSaveQuery(conn, dy->string, "vgPrbExt.fa", TRUE);
verbose(1,"rc = %d = count of sequences for vgPrbExt.fa, to use with %s track %s\n",rc,db,table);
if (rc > 0)  /* can set any desired minimum */
    if (!fileExists(bedPath))
	safef(cmd,sizeof(cmd),"mkdir %s",bedPath);
	verbose(1,"%s\n",cmd); system(cmd);
    if (!fileExists(gbdbPath))
	safef(cmd,sizeof(cmd),"mkdir %s",gbdbPath);
    	verbose(1,"%s\n",cmd); system(cmd);
	int i=0;
        char *c = rStringIn("AAAAAA",path);
        srand( (unsigned)time( NULL ) );
            *c++ += (int) 26 * (rand() / (RAND_MAX + 1.0));
	if (!fileExists(path))

    safef(cmd,sizeof(cmd),"cp vgPrbExt.fa %s",path);
    verbose(1,"%s\n",cmd); system(cmd);
    fname = rStringIn("/", path);
    safef(cmd,sizeof(cmd),"ln -s %s %s%s",path,gbdbPath,fname);
    verbose(1,"%s\n",cmd); system(cmd);
    safef(cmd,sizeof(cmd),"hgLoadSeq %s %s%s", db, gbdbPath,fname);
    verbose(1,"%s\n",cmd); system(cmd);

Пример #29
void gensatImageDownload(char *gensatXml, char *outDir, char *outLog)
/* gensatImageDownload - Download images from gensat guided by xml file.. */
struct xap *xap;
struct gsGensatImage *image;
char *ftpUri = "ftp://ftp.ncbi.nih.gov/pub/gensat";
char *jpgCgiUri = "http://www.ncbi.nlm.nih.gov/projects/gensat/gensat_img.cgi?action=image&mode=full&fmt=jpeg&id=";
char finalJpg[PATH_LEN];
char finalDir[PATH_LEN];
char wgetSource[PATH_LEN];
struct hash *dirHash = newHash(16);
struct dyString *mkdir = dyStringNew(0);
int imageIx = 0;

fLog = mustOpen(outLog, "a");
fprintf(fLog, "starting gensatImageDownload from %s to %s\n", gensatXml, outDir);
xap = xapListOpen(gensatXml, "GensatImageSet", gsStartHandler, gsEndHandler);

while ((image = xapListNext(xap, "GensatImage")) != NULL)
    int id = image->gsGensatImageId->text;
    char *imageFile = image->gsGensatImageImageInfo->gsGensatImageImageInfoFullImg

    /* Mangle file name a little */
    subChar(imageFile, '(', '_');
    stripChar(imageFile, ')');

    /* Figure out name of jpeg file in outDir. */
    verbose(1, "image %d, id %d\n", ++imageIx, id);
    safef(finalJpg, sizeof(finalJpg), "%s/%s", outDir, imageFile);
    stripString(finalJpg, ".full"); /* Image magick can't handle two suffixes */
    strcat(finalJpg, ".jpg");

    /* Create directory that it goes in if necessary */
    splitPath(finalJpg, finalDir, NULL, NULL);
    if (!hashLookup(dirHash, finalDir))
	hashAdd(dirHash, finalDir, NULL);
	dyStringPrintf(mkdir, "mkdir -p %s", finalDir);
	if (system(mkdir->string) != 0)
	    errAbort("Couldn't %s", mkdir->string);

    /* Download it - either directly via ftp, or indirectly via cgi. */
    if (fileExists(finalJpg))
	verbose(1, "already have %s\n", imageFile);
	fprintf(fLog, "%s already downloaded\n", finalJpg);
	if (endsWith(imageFile, ".jpg"))
	    safef(wgetSource, sizeof(wgetSource), "%s/%s", ftpUri, imageFile);
	    if (safeGetOne(wgetSource, finalJpg))
	        fprintf(fLog, "Got via ftp %s\n", finalJpg);
	    safef(wgetSource, sizeof(wgetSource), "%s%d", jpgCgiUri, id);
	    if (safeGetOne(wgetSource, finalJpg))
	        fprintf(fLog, "Got via cgi %s\n", finalJpg);
char *scanSettingsForCT(char *userName, char *sessionName, char *contents,
			int *pLiveCount, int *pExpiredCount)
/* Parse the CGI-encoded session contents into {var,val} pairs and search
 * for custom tracks.  If found, refresh the custom track.  Parsing code 
 * taken from cartParseOverHash. 
 * If any nonexistent custom track files are found, return a SQL update
 * command that will remove those from this session.  We can't just do 
 * the update here because that messes up the caller's query. */
int contentLength = strlen(contents);
struct dyString *newContents = dyStringNew(contentLength+1);
struct dyString *oneSetting = dyStringNew(contentLength / 4);
char *updateIfAny = NULL;
char *contentsToChop = cloneString(contents);
char *namePt = contentsToChop;
verbose(3, "Scanning %s %s\n", userName, sessionName);
while (isNotEmpty(namePt))
    char *dataPt = strchr(namePt, '=');
    char *nextNamePt;
    if (dataPt == NULL)
	errAbort("Mangled session content string %s", namePt);
    *dataPt++ = 0;
    nextNamePt = strchr(dataPt, '&');
    if (nextNamePt != NULL)
	*nextNamePt++ = 0;
    dyStringPrintf(oneSetting, "%s=%s%s",
		   namePt, dataPt, (nextNamePt ? "&" : ""));
    if (startsWith(CT_FILE_VAR_PREFIX, namePt))
	boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE;
	cgiDecode(dataPt, dataPt, strlen(dataPt));
	verbose(3, "Found variable %s = %s\n", namePt, dataPt);
	/* If the file does not exist, omit this setting from newContents so 
	 * it doesn't get copied from session to session.  If it does exist,
	 * leave it up to customFactoryTestExistence to parse the file for 
	 * possible customTrash table references, some of which may exist 
	 * and some not. */
	if (! fileExists(dataPt))
	    verbose(3, "Removing %s from %s %s\n", oneSetting->string,
		    userName, sessionName);
	    thisGotExpiredCT = TRUE;
	    char *db = namePt + strlen(CT_FILE_VAR_PREFIX);
	    dyStringAppend(newContents, oneSetting->string);
	    customFactoryTestExistence(db, dataPt,
				       &thisGotLiveCT, &thisGotExpiredCT);
	if (thisGotLiveCT && pLiveCount != NULL)
	if (thisGotExpiredCT && pExpiredCount != NULL)
	if (thisGotExpiredCT)
	    if (verboseLevel() >= 3)
		verbose(3, "Found expired custom track in %s %s: %s\n",
			userName, sessionName, dataPt);
		verbose(2, "Found expired custom track: %s\n", dataPt);
	if (thisGotLiveCT)
	    verbose(4, "Found live custom track: %s\n", dataPt);
	dyStringAppend(newContents, oneSetting->string);
    namePt = nextNamePt;
if (newContents->stringSize != contentLength)
    struct dyString *update = dyStringNew(contentLength*2);
    if (newContents->stringSize > contentLength)
	errAbort("Uh, why is newContents (%d) longer than original (%d)??",
		 newContents->stringSize, contentLength);
    dyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable);
    dyStringAppendN(update, newContents->string, newContents->stringSize);
    dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 "
		   "where userName=\"%s\" and sessionName=\"%s\";",
		   userName, sessionName);
    verbose(3, "Removing one or more dead CT file settings from %s %s "
	    "(original length %d, now %d)\n", 
	    userName, sessionName,
	    contentLength, newContents->stringSize);
    updateIfAny = dyStringCannibalize(&update);
return updateIfAny;