static struct htmlCookie *parseCookie(char *s) /* Parse out cookie line to the right of Set-Cookie. */ { char *e, *name, *val; struct htmlCookie *cookie; /* Grab up to semicolon, which is the cookie name/value pair. */ e = strchr(s, ';'); if (e == NULL) { warn("Missing ';' in cookie"); return NULL; } *e++ = 0; /* Allocate cookie and fill out name/value pair. */ AllocVar(cookie); cookieParseNameValuePair(s, &name, &val); cookie->name = cloneString(name); cookie->value = cloneString(val); /* Loop through to grab the other info - domain and so forth. */ s = e; for (;;) { /* Find next semicolon and zero-terminate it. */ s = skipLeadingSpaces(s); e = strchr(s, ';'); if (e == NULL) break; *e++ = 0; /* Parse out name/value pairs and save it away if it's one we know about. */ cookieParseNameValuePair(s, &name, &val); if (sameString(name, "domain")) cookie->domain = cloneString(val); else if (sameString(name, "path")) cookie->path = cloneString(val); else if (sameString(name, "expires")) cookie->expires = cloneString(val); else if (sameString(name, "secure")) cookie->secure = TRUE; s = e; } return cookie; }
void doMiddle(struct cart *theCart) /* Write header and body of html page. */ { char *userSeq; char *db, *organism; boolean clearUserSeq = cgiBoolean("Clear"); cart = theCart; dnaUtilOpen(); orgChange = sameOk(cgiOptionalString("changeInfo"),"orgChange"); if (orgChange) { cgiVarSet("db", hDefaultDbForGenome(cgiOptionalString("org"))); } getDbAndGenome(cart, &db, &organism, oldVars); char *oldDb = cloneString(db); findClosestServer(&db, &organism); /* Get sequence - from userSeq variable, or if * that is empty from a file. */ if (clearUserSeq) { cartSetString(cart, "userSeq", ""); cartSetString(cart, "seqFile", ""); } userSeq = cartUsualString(cart, "userSeq", ""); if (isEmpty(userSeq)) { userSeq = cartOptionalString(cart, "seqFile"); } if (isEmpty(userSeq) || orgChange) { cartWebStart(theCart, db, "%s BLAT Search", trackHubSkipHubName(organism)); if (differentString(oldDb, db)) printf("<HR><P><EM><B>Note:</B> BLAT search is not available for %s %s; " "defaulting to %s %s</EM></P><HR>\n", hGenome(oldDb), hFreezeDate(oldDb), organism, hFreezeDate(db)); askForSeq(organism,db); cartWebEnd(); } else { blatSeq(skipLeadingSpaces(userSeq), organism); } }
struct slInt *tabRowGuessFixedOffsets(struct slName *lineList, char *fileName) /* Return our best guess list of starting positions for space-padded fixed * width fields. */ { struct slInt *offList = NULL, *off; if (lineList) { char *spaceRec = cloneString(lineList->name), *s; int lineSize = strlen(spaceRec); struct slName *line; int lineIx=1; /* First 'or' together all lines into spaceRec, which will * have a space wherever all columns of all lines are space and * non-space elsewhere. */ for (line = lineList->next; line != NULL; line = line->next, ++lineIx) { int i; s = line->name; if (strlen(s) != lineSize) errAbort("Line %d of %s has %lu chars, but first line has just %d", lineIx, fileName, (unsigned long)strlen(s), lineSize); for (i=0; i<lineSize; ++i) { if (s[i] != ' ') spaceRec[i] = 'X'; } } /* Now make up slInt list that describes where words begin */ s = spaceRec; for (;;) { s = skipLeadingSpaces(s); if (s == NULL || s[0] == 0) break; AllocVar(off); off->val = s - spaceRec; slAddHead(&offList, off); s = skipToSpaces(s); } slReverse(&offList); } return offList; }
static Fastq* fastq_processNextSequence (int freeMemory, int truncateName) { char *line; static Fastq* currFQ = NULL; int count; Seq* currSeq = NULL; if (ls_isEof (lsFastq)) { if (freeMemory) { fastq_freeFastq (currFQ); } return NULL; } count = 0; while ( (line=ls_nextLine (lsFastq)) && (count<4) ) { if (line[0] == '\0') { continue; } if (line[0] == '@') { if (freeMemory) { fastq_freeFastq (currFQ); } count++; AllocVar (currFQ); AllocVar (currFQ->seq); currSeq = currFQ->seq; currSeq->name = hlr_strdup (line + 1); if (truncateName) { currSeq->name = firstWordInLine (skipLeadingSpaces (currSeq->name)); } line = ls_nextLine (lsFastq); // reading sequence currSeq->sequence = hlr_strdup ( line ); currSeq->size = strlen (currSeq->sequence); count++; line = ls_nextLine (lsFastq); // reading quality ID if( line[0] != '+' ) die("Expected quality ID: '+' or '+%s'", currSeq->name ); count++; line = ls_nextLine (lsFastq); // reading quality currFQ->quality = hlr_strdup( line ); count++; } } ls_back (lsFastq,1); return currFQ; }
void bitsForOut(char *fileName, int seqSize, Bits *bits) /* Get bitmap that corresponds to outFile. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *words[8]; int wordCount; boolean firstTime = TRUE; int start,end; /* Check and skip over three line header */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s is empty", fileName); line = skipLeadingSpaces(line); if (!startsWith("SW", line)) errAbort("%s is not a RepeatMasker .out file", fileName); lineFileNext(lf, &line, NULL); if (!startsWith("score", line)) errAbort("%s is not a RepeatMasker .out file", fileName); lineFileNext(lf, &line, NULL); for (;;) { if (!lineFileNext(lf, &line, NULL)) break; wordCount = chopLine(line, words); if (wordCount < 8) errAbort("Short line %d of %s\n", lf->lineIx, lf->fileName); start = lineFileNeedNum(lf, words, 5) - 1; end = lineFileNeedNum(lf, words, 6); if (start > end) errAbort("Start after end line %d of %s", lf->lineIx, lf->fileName); if (firstTime) { char *s = words[7]; if (s[0] != '(' || !isdigit(s[1])) errAbort("Expected parenthesized number line %d of %s", lf->lineIx, lf->fileName); if (seqSize != end + atoi(s+1)) errAbort("Size mismatch line %d of %s", lf->lineIx, lf->fileName); firstTime = FALSE; } if (end > seqSize) errAbort("End past bounds line %d of %s", lf->lineIx, lf->fileName); bitSetRange(bits, start, end-start); } lineFileClose(&lf); }
void rmskOut2OpenVerify(char *fileName, struct lineFile **retFile, boolean *retEmpty) /* Open repeat masker .out file and verify that it is good. * Set retEmpty if it has header characteristic of an empty file. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; lineFileNeedNext(lf, &line, &lineSize); if (startsWith("There were no", line)) *retEmpty = TRUE; line = skipLeadingSpaces(line); if (! ( startsWith("SW", line) || startsWith("bit", line) ) ) errAbort("%s doesn't seem to be a RepeatMasker .out file", fileName); lineFileSkip(lf, 2); *retEmpty = FALSE; *retFile = lf; }
char *customPpNextReal(struct customPp *cpp) /* Return next line that's nonempty, non-space and not a comment. * Save skipped comment lines to cpp->skippedLines. */ { slFreeList(&cpp->skippedLines); for (;;) { char *line = customPpNext(cpp); if (line == NULL) return line; char *s = skipLeadingSpaces(line); char c = *s; if (c != 0 && c != '#') return line; else if (c == '#') slNameAddHead(&cpp->skippedLines, line); } }
struct jobInfo *jobInfoParse(char *line) /* Create a job info from parsing line. Eats line. Returns * NULL if there's a problem. */ { char *id, *user, *machine, *command; struct jobInfo *job; if ((id = nextWord(&line)) == NULL) return NULL; if ((machine = nextWord(&line)) == NULL) return NULL; if ((user = nextWord(&line)) == NULL) return NULL; line = skipLeadingSpaces(line); if (line == NULL || line[0] == 0) return NULL; command = line; AllocVar(job); job->id = cloneString(id); job->user = cloneString(user); job->machine = cloneString(machine); job->command = cloneString(command); return job; }
struct hash *readInfoFile(char *fileName) /* Read in 'info' file into hash keyed by gene field and * containing a partial sanger22extra. */ { struct hash *infoHash = newHash(0); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *type, *val; struct dyString *dy = newDyString(512); struct sanger22extra *sx = NULL; while (lineFileNext(lf, &line, NULL)) { line = skipLeadingSpaces(line); if (line[0] == 0) { finishRecord(sx, dy); sx = NULL; } else { if (sx == NULL) AllocVar(sx); type = nextWord(&line); val = strchr(line, '"'); if (val == NULL) errAbort("No quoted value line %d of %s\n", lf->lineIx, lf->fileName); parseQuotedString(val, val, &line); if (sameString("Gene", type)) { sx->name = cloneString(val); hashAdd(infoHash, val, sx); } else if (sameString("Remark", type)) { dyStringAppend(dy, val); dyStringAppend(dy, ". "); } } } finishRecord(sx, dy); lineFileClose(&lf); return infoHash; }
struct hash *hashThisEqThatLine(char *line, int lineIx, boolean firstStartsWithLetter) /* Return a symbol table from a line of form: * 1-this1=val1 2-this='quoted val2' var3="another val" * If firstStartsWithLetter is true, then the left side of the equals must start with * a letter. */ { char *dupe = cloneString(line); char *s = dupe, c; char *var, *val; struct hash *hash = newHash(8); for (;;) { if ((var = skipLeadingSpaces(s)) == NULL) break; if ((c = *var) == 0) break; if (firstStartsWithLetter && !isalpha(c)) errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var); val = strchr(var, '='); if (val == NULL) { errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line); } *val++ = 0; c = *val; if (c == '\'' || c == '"') { if (!parseQuotedString(val, val, &s)) errAbort("line %d of input: missing closing %c", lineIx, c); } else { s = skipToSpaces(val); if (s != NULL) *s++ = 0; } hashAdd(hash, var, cloneString(val)); } freez(&dupe); return hash; }
void endHandler(struct xap *xap, char *name) /* Called at end of a tag */ { struct type *type = xap->stack->object; char *text = skipLeadingSpaces(xap->stack->text->string); struct element *el; for (el = type->elements; el != NULL; el = el->next) { if (!el->seenThisRound) el->isOptional = TRUE; } if (text[0] == 0) { if (type->textAttribute != NULL) type->textAttribute->isOptional = TRUE; } else { int textLen = strlen(text); struct attribute *att = type->textAttribute; if (att == NULL) { type->textAttribute = AllocVar(att); att->name = "<text>"; att->values = hashNew(16); if (type->count != 0) att->isOptional = TRUE; } if (att->maxLen < textLen) att->maxLen = textLen; hashStore(att->values, text); att->count += 1; if (!att->nonInt) if (!isAllInt(text) || hasLeftPaddedZero(text)) att->nonInt = TRUE; if (!att->nonFloat) if (!isAllFloat(text)) att->nonFloat = TRUE; } type->count += 1; topType = type; }
void setStage(struct imageInfo *image, char *flakyClone, char *flakyStage) /* Set image->stage from stage name if it looks good. */ { flakyStage = skipLeadingSpaces(flakyStage); /* Strip out -all suffix if any. */ stripSuffix(flakyStage, "-all"); stripSuffix(flakyStage, "-2"); stripSuffix(flakyStage, "-12"); if (startsWith("St", flakyStage)) { char *num = flakyStage+2; int numSize = strlen(num); if (isdigit(num[0]) && isdigit(num[1])) { if (numSize == 2 || (numSize == 4 && num[2] == '.' && isdigit(num[3]))) image->stage = flakyStage; } } if (image->stage == NULL) { if (sameString(flakyStage, "St105")) image->stage = "St10.5"; else if (sameString(flakyStage, "St8")) image->stage = "St8"; else if (sameString(flakyStage, "St9")) image->stage = "St9"; else if (sameString(flakyStage, "St10-1")) image->stage = "St10"; else if (sameString(flakyStage, "St12a")) image->stage = "St12"; else if (sameString(flakyStage, "S20")) image->stage = "St20"; else if (sameString(flakyStage, "S24")) image->stage = "St24"; else if (sameString(flakyStage, "S25")) image->stage = "St25"; } }
void linesToRa(char *inFile, char *outFile) /* Input lines with pipe-separated fields and ouptut ra stanzas */ { struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *of = mustOpen(outFile, "w"); char *line, *start; char *words[256]; int wordCt; while (lineFileNext(lf, &line, NULL)) { start = skipLeadingSpaces(line); if (*start == 0) { fputs("\n", of); } else if (startsWith("#", start)) { fputs(line, of); fputc('\n', of); } else { int size = ArraySize(words); wordCt = chopByChar(line, '|', words, size); if (wordCt >= size) errAbort("#words in line exceeds buffer size(%d): %s\n", size, line); int i; for (i = 0; i < wordCt; i++) { fputs(words[i], of); fputc('\n', of); } fputc('\n', of); } } fflush(of); carefulClose(&of); }
struct hash *hashVarLine(char *line, int lineIx) /* Return a symbol table from a line of form: * var1=val1 var2='quoted val2' var3="another val" */ { char *dupe = cloneString(line); char *s = dupe, c; char *var, *val; struct hash *hash = newHash(8); for (;;) { if ((var = skipLeadingSpaces(s)) == NULL) break; if ((c = *var) == 0) break; if (!isalpha(c)) errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var); val = strchr(var, '='); if (val == NULL) { errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line); } *val++ = 0; c = *val; if (c == '\'' || c == '"') { if (!parseQuotedString(val, val, &s)) errAbort("line %d of input: missing closing %c", lineIx, c); } else { s = skipToSpaces(val); if (s != NULL) *s++ = 0; } hashAdd(hash, var, cloneString(val)); } freez(&dupe); return hash; }
static void parseQueryLines(struct blastFile *bf, char *line, struct blastQuery *bq) /* Parse the Query= lines */ { char *s, *e; char *words[16]; int wordCount; if (bq->query != NULL) bfError(bf, "already parse Query="); /* Process something like: * Query= MM39H11 00630 */ wordCount = chopLine(line, words); if (wordCount < 2) bfError(bf, "No sequence name in query line"); bq->query = cloneString(words[1]); for (;;) { line = bfNeedNextLine(bf); s = skipLeadingSpaces(line); if (s[0] == '(') break; } if (!isdigit(s[1])) { bfError(bf, "expecting something like:\n" " (45,693 letters)"); } s += 1; if ((e = strchr(s, ' ')) == NULL) { bfError(bf, "expecting something like:\n" " (45,693 letters)"); } *e = 0; decomma(s); bq->queryBaseCount = atoi(s); }
struct htmlCookie *htmlCookieFileRead(char *fileName) /* Read cookies from a line oriented file. First word in line * is the cookie name, the rest of the line the cookie value. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct htmlCookie *list = NULL, *cookie; char *line, *word; while (lineFileNextReal(lf, &line)) { word = nextWord(&line); line = skipLeadingSpaces(line); if (line == NULL) errAbort("Missing cookie value line %d of %s", lf->lineIx, lf->fileName); AllocVar(cookie); cookie->name = cloneString(word); cookie->value = cloneString(line); slAddHead(&list, cookie); } lineFileClose(&lf); slReverse(&list); return list; }
char *nextQuotedWord(char **pLine) /* Generalization of nextWord. Returns next quoted * string or if no quotes next word. Updates *pLine * to point past word that is returned. Does not return * quotes. */ { char *line, c; line = skipLeadingSpaces(*pLine); if (line == NULL || line[0] == 0) return NULL; c = *line; if (c == '"' || c == '\'') { if (!parseQuotedString(line, line, pLine)) return NULL; return line; } else { return nextWord(pLine); } }
static struct joinerIgnore *parseTablesIgnored(struct lineFile *lf, char *line, struct hash *symHash, struct dyString *dyBuf) /* Parse out one tables ignored record - keep going until blank line or * end of file. */ { struct joinerIgnore *ig; struct slName *table; AllocVar(ig); ig->dbList = parseDatabaseList(lf, trimSpaces(line)); while ((line = nextSubbedLine(lf, symHash, dyBuf)) != NULL) { /* Keep grabbing until we get a blank line. */ line = skipLeadingSpaces(line); if (line[0] == 0) break; table = slNameNew(trimSpaces(line)); slAddHead(&ig->tableList, table); } slReverse(&ig->tableList); return ig; }
void convertVariableStepSection(struct lineFile *lf, struct hash *vars, struct bgOut *out) /* Read through section and output. */ { char *chrom = requiredVar(vars, "chrom", lf); int span = sqlUnsigned(optionalVar(vars, "span", "1")); char *line; while (lineFileNextReal(lf, &line)) { line = skipLeadingSpaces(line); if (isalpha(line[0])) { lineFileReuse(lf); break; } char *words[3]; int wordCount = chopLine(line, words); if (wordCount != 2) errAbort("Expecting exactly two numbers line %d of %s", lf->lineIx, lf->fileName); int start = lineFileNeedNum(lf, words, 0) - 1; double val = lineFileNeedDouble(lf, words, 1); bgOutWrite(out, chrom, start, start+span, val); } }
void convertFixedStepSection(struct lineFile *lf, struct hash *vars, struct bgOut *out) /* Read through section and output. */ { char *chrom = requiredVar(vars, "chrom", lf); int start = sqlUnsigned(requiredVar(vars, "start", lf)) - 1; char *spanString = optionalVar(vars, "span", "1"); int span = sqlUnsigned(spanString); int step = sqlUnsigned(optionalVar(vars, "step", spanString)); char *line; while (lineFileNextReal(lf, &line)) { line = skipLeadingSpaces(line); if (isalpha(line[0])) { lineFileReuse(lf); break; } eraseTrailingSpaces(line); double val = sqlDouble(line); bgOutWrite(out, chrom, start, start+span, val); start += step; } }
static char *nextSubbedLine(struct lineFile *lf, struct hash *hash, struct dyString *dy) /* Return next line after string substitutions. This removes comments too. */ { char *line, *s; for (;;) { if (!lineFileNext(lf, &line, NULL)) return NULL; s = strchr(line, '#'); if (s == NULL) /* No sharp, it's a real line. */ break; else { if (skipLeadingSpaces(line) != s) { *s = 0; /* Terminate line at sharp. */ break; } /* Eat line if starts with sharp */ } } return subThroughHash(lf, hash, dy, line); }
static char *limitText(char *text) /* read text string and limit to 1000 actual data lines */ { struct dyString *limitedText = dyStringNew(0); /* yes, opening with FALSE so as not to destroy the original string */ struct lineFile *lf = lineFileOnString("limitText", FALSE, text); char *lineStart = NULL; int lineLength = 0; int legitimateLineCount = 0; while (legitimateLineCount < 1000 && lineFileNext(lf, &lineStart, &lineLength)) { char *s, c; s = skipLeadingSpaces(lineStart); c = s[0]; if (c != 0 && c != '#') ++legitimateLineCount; dyStringAppendN(limitedText, lineStart, lineLength); } if ((legitimateLineCount == 1000) && lineFileNext(lf, &lineStart, &lineLength)) warn("WARNING: defined regions limit of 1000 definitions reached at line %d<BR>\n", lf->lineIx-1); lineFileClose(&lf); return (dyStringCannibalize(&limitedText)); }
struct segment *parseSegment(char *fileName, int start, int end, char *retSeqName) /* Read in a genscan file into segment. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct segment *seg; char *line; struct genScanFeature *gsfList = NULL, *gsf; struct genScanGene *gsg; char *words[2]; if (!lineFileNext(lf, &line, NULL)) errAbort("%s is empty", fileName); if (!startsWith("GENSCAN ", line)) errAbort("%s is not a GENSCAN output file", fileName); if (retSeqName != NULL) { line = mustSkipTo(lf, "Sequence"); if (chopLine(line, words) < 2) errAbort("Expecting sequence name line %d of %s", lf->lineIx, lf->fileName); strcpy(retSeqName, words[1]); } mustSkipTo(lf, "Predicted genes/exons"); mustSkipTo(lf, "Gn.Ex"); mustSkipTo(lf, "-----"); AllocVar(seg); seg->start = start; seg->end = end; for (;;) { if (!lineFileNext(lf, &line, NULL)) break; line = skipLeadingSpaces(line); if (line == NULL || line[0] == 0) continue; if (!isdigit(line[0])) { lineFileReuse(lf); break; } gsf = parseGenscanLine(lf, line); slAddHead(&gsfList, gsf); } slReverse(&gsfList); printf("Got %d exons\n", slCount(gsfList)); seg->geneList = bundleGenes(gsfList); seg->geneList = filterEmptyGenes(seg->geneList); gsfList = NULL; printf("Got %d genes\n", slCount(seg->geneList)); if (!lineFileNext(lf, &line, NULL)) errAbort("Unexpected end of file in %s", lf->fileName); if (startsWith("Suboptimal exons", line)) { mustSkipTo(lf, "-----"); for (;;) { if (!lineFileNext(lf, &line, NULL)) break; line = skipLeadingSpaces(line); if (line == NULL || line[0] == 0) continue; if (!startsWith("S.", line)) break; gsf = parseGenscanLine(lf, line); slAddHead(&gsfList, gsf); } slReverse(&gsfList); seg->suboptList = gsfList; printf("Got %d suboptimal exons\n", slCount(seg->suboptList)); } lineFileReuse(lf); mustSkipTo(lf, "Predicted peptide sequence"); if ((line = skipTo(lf, ">")) != NULL) { lineFileReuse(lf); for (gsg = seg->geneList; gsg != NULL; gsg = gsg->next) { aaSeq seq; if (!faPepSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) errAbort("Not enough predicted peptides in %s\n", lf->fileName); gsg->translation = cloneString(seq.dna); } } lineFileClose(&lf); return seg; }
static bioSeq *nextSeqFromMem(char **pText, boolean isDna, boolean doFilter) /* Convert fa in memory to bioSeq. Update *pText to point to next * record. Returns NULL when no more sequences left. */ { char *name = ""; char *s, *d; struct dnaSeq *seq; int size = 0; char c; char *filter = (isDna ? ntChars : aaChars); char *text = *pText; char *p = skipLeadingSpaces(text); if (p == NULL) return NULL; dnaUtilOpen(); if (*p == '>') { char *end; s = strchr(p, '\n'); if (s != NULL) ++s; name = skipLeadingSpaces(p+1); end = skipToSpaces(name); if (end >= s || name >= s) errAbort("No name in line starting with '>'"); if (end != NULL) *end = 0; } else { s = p; if (s == NULL || s[0] == 0) return NULL; } name = cloneString(name); d = text; if (s != NULL) { for (;;) { c = *s; if (c == 0 || c == '>') break; ++s; if (!isalpha(c)) continue; if (doFilter) { if ((c = filter[(int)c]) == 0) { if (isDna) c = 'n'; else c = 'X'; } } d[size++] = c; } } d[size] = 0; /* Put sequence into our little sequence structure. */ AllocVar(seq); seq->name = name; seq->dna = text; seq->size = size; *pText = s; return seq; }
void liftAgp(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift up coordinates in .agp file. */ { FILE *dest = mustOpen(destFile, "w"); char *source; int i; struct lineFile *lf; int lineSize, wordCount; char *line, *words[32]; char *s; struct liftSpec *spec; int start = 0; int end = 0; int ix = 0; char newDir[256], newName[128], newExt[64]; struct bigInsert *bi; struct chromInserts *chromInserts; struct hash *insertHash = newHash(8); struct hash *contigsHash = newHash(10); boolean firstContig = TRUE; char lastContig[256]; char *contig; int lastEnd = 0; if (sourceCount < 2) usage(); if (how == carryMissing) warn("'carry' doesn't work for .agp files, ignoring"); splitPath(destFile, newDir, newName, newExt); /* Read in inserts file and process it. */ chromInsertsRead(sources[0], insertHash); chromInserts = hashFindVal(insertHash, newName); strcpy(lastContig, ""); for (i=1; i<sourceCount; ++i) { source = sources[i]; verbose(1, "Lifting %s\n", source); lf = lineFileMayOpen(source, TRUE); if (lf != NULL) { while (lineFileNext(lf, &line, &lineSize)) { /* Check for comments and just pass them through. */ s = skipLeadingSpaces(line); if (s[0] == '#') { fprintf(dest, "%s\n", line); continue; } /* Parse line, adjust offsets, write */ wordCount = chopLine(line, words); if (wordCount != 8 && wordCount != 9) malformedAgp(lf); contig = words[0]; if (!sameString(contig, lastContig)) { char *gapType = "contig"; char *ctg = rmChromPrefix(contig); int gapSize = chromInsertsGapSize(chromInserts, ctg, firstContig); if (hashLookup(contigsHash, contig)) errAbort("Contig repeated line %d of %s", lf->lineIx, lf->fileName); hashAdd(contigsHash, contig, NULL); if (gapSize != 0) { if ((bi = bigInsertBeforeContig(chromInserts, ctg)) != NULL) { gapType = bi->type; } fprintf(dest, "%s\t%d\t%d\t%d\tN\t%d\t%s\tno\n", newName, end+1, end+gapSize, ++ix, gapSize, gapType); } firstContig = FALSE; strcpy(lastContig, contig); } spec = findLift(liftHash, contig, lf); cantHandleSpecRevStrand(spec); start = numField(words, 1, 0, lf) + spec->offset; end = numField(words, 2, 0, lf) + spec->offset; if (end > lastEnd) lastEnd = end; if (!sameString(newName, spec->newName)) errAbort("Mismatch in new name between %s and %s", newName, spec->newName); fprintf(dest, "%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s", newName, start, end, ++ix, words[4], words[5], words[6], words[7]); if (wordCount == 9) fprintf(dest, "\t%s", words[8]); fputc('\n', dest); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } } if (chromInserts != NULL) { if ((bi = chromInserts->terminal) != NULL) { fprintf(dest, "%s\t%d\t%d\t%d\tN\t%d\t%s\tno\n", newName, lastEnd+1, lastEnd+bi->size, ++ix, bi->size, bi->type); } } if (ferror(dest)) errAbort("error writing %s", destFile); fclose(dest); }
void gensatFtpImages(char *checkMd5, char *outDir) /* gensatFtpImages - Download images guided by output of gensatFtpList. */ { int err; char source[PATH_LEN], nativeImage[PATH_LEN], jpgImage[PATH_LEN]; char dir[PATH_LEN], file[PATH_LEN], ext[PATH_LEN]; struct lineFile *lf = lineFileOpen(checkMd5, TRUE); char *line, *md5, *relativePath; struct dyString *command = dyStringNew(0); while(lineFileNext(lf, &line, NULL)) { /* Parse out two columns of checkMd5 file. */ md5 = nextWord(&line); relativePath = skipLeadingSpaces(line); /* Figure out output path, and if file already exists skip it. */ safef(nativeImage, sizeof(nativeImage), "%s/%s", outDir, relativePath); strcpy(jpgImage, nativeImage); if (endsWith(jpgImage, ".bz2")) chopSuffix(jpgImage); if (endsWith(jpgImage, ".png") || endsWith(jpgImage, ".tif") || endsWith(jpgImage, ".tiff") || endsWith(jpgImage, ".jpeg") || endsWith(jpgImage, ".jpg") || endsWith(jpgImage, ".JPG") ) { chopSuffix(jpgImage); strcat(jpgImage, ".jpg"); } else if (endsWith(jpgImage, ".txt") || endsWith(jpgImage, ".zip") || endsWith(jpgImage, ".doc")) continue; else errAbort("Unrecognized image type in file %s", jpgImage); if (!fileExists(jpgImage)) { /* Create any directories needed. */ splitPath(relativePath, dir, file, ext); dyStringClear(command); dyStringPrintf(command, "mkdir -p '%s/%s'", outDir, dir); system(command->string); /* wget the file. */ safef(source, sizeof(source), "%s/%s", uri, relativePath); if (safeGetOne(source, md5, nativeImage)) { if (endsWith(nativeImage, ".bz2")) { dyStringClear(command); dyStringPrintf(command, "bunzip2 '%s'", nativeImage); verbose(1, "%s\n", command->string); err = system(command->string); if (err != 0) errAbort("err %d on %s", err, command->string); chopSuffix(nativeImage); } if (!endsWith(nativeImage, ".jpg") ) { dyStringClear(command); dyStringPrintf(command, "convert '%s' '%s'", nativeImage, jpgImage); verbose(1, "%s\n", command->string); err = system(command->string); if (err != 0) errAbort("err %d on %s", err, command->string); remove(nativeImage); } } else { if (++errCount > maxErrs) errAbort("Aborting after %d errors", errCount); } } else { verbose(1, "Already have %s\n", jpgImage); } } }
void hgLoadRnaFold(char *database, char *table, char *foldDir) /* hgLoadRnaFold - Load a directory full of RNA fold files into database. */ { char path[PATH_LEN]; struct slName *dirList, *dirEl; struct lineFile *lf; char *line, *word, *s, c; FILE *f = hgCreateTabFile(tabDir, table); int count = 0; dirList = listDir(foldDir, "*"); for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next) { char *name = dirEl->name; if (sameString(name, "CVS")) continue; safef(path, sizeof(path), "%s/%s", foldDir, name); lf = lineFileOpen(path, TRUE); if (!lineFileNext(lf, &line, NULL)) { if (warnEmpty) { warn("%s is empty, skipping\n", name); lineFileClose(&lf); continue; } else errAbort("%s is empty\n", name); } if (!isupper(line[0])) notFold(path, 1); fprintf(f, "%s\t", name); /* Save name */ fprintf(f, "%s\t", line); /* Save sequence */ lineFileNeedNext(lf, &line, NULL); c = line[0]; if (c != '.' && c != '(') notFold(path, 2); word = nextWord(&line); fprintf(f, "%s\t", word); /* Save nested parenthesis */ /* Parse out (energy) term at end of line. */ s = strchr(line, '('); if (s == NULL) notFold(path, 3); word = skipLeadingSpaces(s+1); if (word == NULL || (!word[0] == '-' && !isdigit(word[0]))) notFold(path, 4); if ((s = strchr(word, ')')) == NULL) notFold(path, 5); *s = 0; fprintf(f, "%s\n", word); lineFileClose(&lf); ++count; } printf("Parsed %d files\n", count); if (doLoad) { struct sqlConnection *conn = sqlConnect(database); rnaFoldCreateTable(conn, table); hgLoadTabFile(conn, tabDir, table, &f); hgRemoveTabFile(tabDir, table); sqlDisconnect(&conn); } }
struct hgFindSpec *hgFindSpecFromRa(char *db, char *raFile, char *releaseTag) /* Load track info from ra file into list. */ { static boolean reEntered = FALSE; struct lineFile *lf = lineFileOpen(raFile, TRUE); char *line, *word; struct hgFindSpec *hfsList = NULL, *hfs; boolean done = FALSE; char *incFile; for (;;) { /* Seek to next line that starts with 'searchName' or 'searchTable' */ for (;;) { char *subRelease; if (!lineFileNext(lf, &line, NULL)) { done = TRUE; break; } if (startsWith("searchName", line) || startsWith("searchTable", line)) { lineFileReuse(lf); break; } else if ((incFile = trackDbInclude(raFile, line, &subRelease)) != NULL) { if (subRelease) trackDbCheckValidRelease(subRelease); if (releaseTag && subRelease && !sameString(subRelease, releaseTag)) errAbort("Include with release %s inside include with release %s line %d of %s", subRelease, releaseTag, lf->lineIx, lf->fileName); /* Set reEntered=TRUE whenever we recurse, so we don't polish * multiple times and get too many backslash-escapes. */ boolean reBak = reEntered; reEntered = TRUE; struct hgFindSpec *incHfs = hgFindSpecFromRa(db, incFile, subRelease); reEntered = reBak; hfsList = slCat(hfsList, incHfs); } } if (done) break; /* Allocate track structure and fill it in until next blank line. */ AllocVar(hfs); slAddHead(&hfsList, hfs); for (;;) { /* Break at blank line or EOF. */ if (!lineFileNext(lf, &line, NULL)) break; line = skipLeadingSpaces(line); if (line == NULL || line[0] == 0) break; /* Skip comments. */ if (line[0] == '#') continue; /* Parse out first word and decide what to do. */ word = nextWord(&line); if (line == NULL) errAbort("No value for %s line %d of %s", word, lf->lineIx, lf->fileName); line = trimSpaces(line); hgFindSpecAddInfo(hfs, word, line); } if (releaseTag) hgFindSpecAddRelease(hfs, releaseTag); } lineFileClose(&lf); if (! reEntered) { for (hfs = hfsList; hfs != NULL; hfs = hfs->next) { hgFindSpecPolish(db, hfs); } } slReverse(&hfsList); return hfsList; }
boolean faMixedSpeedReadNext(struct lineFile *lf, DNA **retDna, int *retSize, char **retName) /* Read in DNA or Peptide FA record in mixed case. Allow any upper or lower case * letter, or the dash character in. */ { char c; int bufIx = 0; static char name[512]; int lineSize, i; char *line; dnaUtilOpen(); /* Read first line, make sure it starts with '>', and read first word * as name of sequence. */ name[0] = 0; if (!lineFileNext(lf, &line, &lineSize)) { *retDna = NULL; *retSize = 0; return FALSE; } if (line[0] == '>') { line = firstWordInLine(skipLeadingSpaces(line+1)); if (line == NULL) errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName); strncpy(name, line, sizeof(name)); name[sizeof(name)-1] = '\0'; /* Just to make sure name is NULL terminated. */ } else { errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName); } /* Read until next '>' */ for (;;) { if (!lineFileNext(lf, &line, &lineSize)) break; if (line[0] == '>') { lineFileReuse(lf); break; } if (bufIx + lineSize >= faFastBufSize) expandFaFastBuf(bufIx, lineSize); for (i=0; i<lineSize; ++i) { c = line[i]; if (isalpha(c) || c == '-') faFastBuf[bufIx++] = c; } } if (bufIx >= faFastBufSize) expandFaFastBuf(bufIx, 0); faFastBuf[bufIx] = 0; *retDna = faFastBuf; *retSize = bufIx; *retName = name; if (bufIx == 0) { warn("Invalid fasta format: sequence size == 0 for element %s",name); } return TRUE; }
void liftGap(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift up coordinates in .gap file (just the gaps from .agp). Negative strad allowed */ { FILE *dest = mustOpen(destFile, "w"); char *source; int i; struct lineFile *lf; int lineSize, wordCount; char *line, *words[32]; char *s; struct liftSpec *spec; int start = 0; int end = 0; int ix = 0; char newDir[256], newName[128], newExt[64]; char lastContig[256]; char *contig; int lastEnd = 0; int fragStart, fragEnd; if (how == carryMissing) warn("'carry' doesn't work for .gap files, ignoring"); splitPath(destFile, newDir, newName, newExt); strcpy(lastContig, ""); for (i=0; i<sourceCount; ++i) { source = sources[i]; verbose(1, "Lifting %s\n", source); lf = lineFileMayOpen(source, TRUE); if (lf != NULL) { while (lineFileNext(lf, &line, &lineSize)) { /* Check for comments and just pass them through. */ s = skipLeadingSpaces(line); if (s[0] == '#') { fprintf(dest, "%s\n", line); continue; } /* Parse line, adjust offsets, write */ wordCount = chopLine(line, words); if (wordCount != 8 && wordCount != 9) malformedAgp(lf); if (words[4][0] != 'N' && words[4][0] != 'U') errAbort("Found non-gap in .gap file: %s", words[4]); contig = words[0]; spec = findLift(liftHash, contig, lf); start = fragStart = numField(words, 1, 0, lf); end = fragEnd = numField(words, 2, 0, lf); end = fragEnd; if (spec->strand == '-') { start = spec->oldSize - fragEnd + 1; end = spec->oldSize - fragStart + 1; } start += spec->offset; end += spec->offset; if (end > lastEnd) lastEnd = end; fprintf(dest, "%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s", spec->newName, start, end, ++ix, words[4], words[5], words[6], words[7]); if (wordCount == 9) fprintf(dest, "\t%s", words[8]); fputc('\n', dest); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } } if (ferror(dest)) errAbort("error writing %s", destFile); fclose(dest); }