struct meta *metaNextStanza(struct lineFile *lf) /* Return next stanza in a meta file. Does not set parent/child/next pointers. * Returns NULL at end of file. Does a little error checking, making sure * that indentation level is consistent across all lines of stanza. Returns * indentation level. */ { /* See if anything left in file, and if not return. */ if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; /* Allocate return structure and vars to help parse. */ struct meta *meta; AllocVar(meta); struct dyString *dy = dyStringNew(256); char *tag,*val; /* Loop to get all tags in stanza. */ boolean firstTime = TRUE; int initialIndent = 0; for (;;) { dyStringClear(dy); if (!raNextTagVal(lf, &tag, &val, dy)) break; /* Make tag/val and add it to list. */ struct metaTagVal *mtv; AllocVar(mtv); mtv->tag = cloneString(tag); mtv->val = cloneString(val); slAddHead(&meta->tagList, mtv); /* Check indentation. */ int indent = countLeadingSpacesDetabbing(dy->string, 8); if (firstTime) { initialIndent = indent; firstTime = FALSE; } else { if (indent != initialIndent) { warn("Error line %d of %s\n", lf->lineIx, lf->fileName); warn("Indentation level %d doesn't match level %d at start of stanza.", indent, initialIndent); if (strchr(dy->string, '\t')) warn("There are tabs in the indentation, be sure tab stop is set to 8 spaces."); noWarnAbort(); } } } slReverse(&meta->tagList); /* Set up remaining fields and return. */ assert(meta->tagList != NULL); meta->name = meta->tagList->val; meta->indent = initialIndent; return meta; }
void usage() /* Explain usage and exit. */ { struct bzp *bzp = bzpDefault(); printf("blatzServer version %d - Set up in-memory server for\n", bzpVersion()); printf("cross-species DNA alignments\n"); printf("usage:\n"); printf(" blatzServer start file(s)\n"); printf("Starts up server. Files are either fasta files, nib files, 2bit files\n"); printf("or text files containing the names of the above files one per line.\n"); printf("It's important that the sequence be repeat masked with repeats in\n"); printf("lower case.\n"); printf("Options: (defaults are shown for numerical parameters)\n"); bzpServerOptionsHelp(bzp); bzpClientOptionsHelp(bzp); printf(" -debug Writes diagnostic output to console and no daemon fork\n"); printf(" -subnet=255.255.255.255 Restrict access to subnet\n"); printf(" -port=%d Use specified TCP/IP port\n", bzpDefaultPort); printf(" -host=%s Query specified host\n", host); printf(" -cpu=%d Use specified number of CPUs (processes)\n", cpuCount); printf("Other uses:\n"); printf(" blatzServer stop\n"); printf(" this terminates server\n"); printf(" blatzServer status\n"); printf(" this prints status info including version\n"); noWarnAbort(); }
void tagStormCheck(char *schemaFile, char *tagStormFile) /* tagStormCheck - Check that a tagStorm conforms to a schema.. */ { /* Load up schema from file. Make a hash of all non-wildcard * tags, and a list of wildcard tags. */ struct tagSchema *schema, *schemaList = tagSchemaFromFile(schemaFile); struct slRef *wildSchemaList = NULL, *requiredSchemaList = NULL; /* Split up schemaList into hash and wildSchemaList. Calculate schemaSize */ struct hash *hash = hashNew(0); int schemaSize = 0; for (schema = schemaList; schema != NULL; schema = schema->next) { ++schemaSize; if (anyWild(schema->name)) { refAdd(&wildSchemaList, schema); } else hashAdd(hash, schema->name, schema); if (schema->required != 0) refAdd(&requiredSchemaList, schema); } slReverse(&wildSchemaList); schemaList = NULL; struct tagStorm *tagStorm = tagStormFromFile(tagStormFile); struct dyString *scratch = dyStringNew(0); rCheck(tagStorm->forest, tagStormFile, wildSchemaList, hash, requiredSchemaList, scratch); if (gErrCount > 0) noWarnAbort(); else verbose(1, "No problems detected.\n"); }
struct rudp *rudpMustOpen() /* Open up unbound rudp. Warn and die if there is a problem. */ { struct rudp *ru = rudpOpen(); if (ru == NULL) noWarnAbort(); return ru; }
static void tagAbort(struct htmlPage *page, struct htmlTag *tag, char *format, ...) /* Print abort message and some context of tag. */ { va_list args; va_start(args, format); tagVaWarn(page, tag, format, args); va_end(args); noWarnAbort(); }
struct rudp *rudpMustOpenBound(struct sockaddr_in *sai) /* Open up a rudp socket bound to a particular port and address * or die trying. */ { struct rudp *ru = rudpOpenBound(sai); if (ru == NULL) noWarnAbort(); return ru; }
void handleFileError(struct sqlConnection *conn, int submitId, int fileId, char *err) /* Write out error to stderr and also save it in errorMessage field of file * and submit table. */ { /* Write out error message to errorMessage field of table. */ warn("%s", trimSpaces(err)); edwWriteErrToTable(conn, "edwFile", fileId, err); edwWriteErrToTable(conn, "edwSubmit", submitId, err); noWarnAbort(err); }
void mustChangeDir(char *dir) /* Change directory or die trying. */ { if (chdir(dir) < 0) { warn("Couldn't change directory to %s", dir); perror(""); noWarnAbort(); } }
static void perr(char *function, int err) /* Print out error for function and abort on * non-zero error code.. */ { if (err != 0) { pwarn(function, err); noWarnAbort(); } }
void hvPrintf(char *format, va_list args) /* Suppressable variable args printf. Check for write error so we can * terminate if http connection breaks. */ { if (suppressHtml) return; vprintf(format, args); if (ferror(stdout)) noWarnAbort(); }
void reportError(char *fileName, int startLine, char *format, ...) /* Report error and abort if there are too many errors. */ { va_list args; va_start(args, format); if (++gErrCount > clMaxErr) noWarnAbort(); vaWarn(format, args); warn(" in stanza starting line %d of %s", startLine, fileName); }
struct htmlPage *htmlPageParseOk(char *url, char *fullText) /* Parse out page and return only if status ok. */ { struct htmlPage *page = htmlPageParse(url, fullText); if (page == NULL) noWarnAbort(); if (page->status->status != 200) errAbort("%s returned with status code %d", url, page->status->status); return page; }
void recordAbort(struct raRecord *rec, char *format, ...) /* Issue a warning message. */ { va_list args; va_start(args, format); vaWarn(format, args); va_end(args); recordLocationReport(rec, stderr); noWarnAbort(); }
struct edwUser *edwMustGetUserFromEmail(struct sqlConnection *conn, char *email) /* Return user associated with email or put up error message. */ { struct edwUser *user = edwUserFromEmail(conn, email); if (user == NULL) { edwWarnUnregisteredUser(email); noWarnAbort(); } return user; }
static void hDumpStackAbortHandler() /* abort handle that prints stack dump then invokes the previous abort * handler on the stack. */ { if (!stackDumpDisabled) { stackDumpDisabled = TRUE; popWarnHandler(); // remove us from the stack dumpStack("\nStack dump:"); // continue with next abort handler noWarnAbort(); } }
void vaErrAbort(char *format, va_list args) /* Abort function, with optional (vprintf formatted) error message. */ { /* flag is needed because both errAbort and warn generate message * using the warn handler, however sometimes one needed to know * (like when logging), if it's an error or a warning. This is far from * perfect, as this isn't cleared if the error handler continues, * as with an exception mechanism. */ struct perThreadAbortVars *ptav = getThreadVars(); ptav->errAbortInProgress = TRUE; vaWarn(format, args); noWarnAbort(); }
void usage(struct bzp *bzp) /* Explain usage and exit. */ { printf("blatz version %d - Align dna across species\n", bzpVersion()); printf("usage:\n"); printf(" blatz target query output\n"); printf("where target and query are either fasta files, nib files, 2bit files\n"); printf("or a text files containing the names of the above files one per line.\n"); printf("It's important that the sequence be repeat masked with repeats in\n"); printf("lower case.\n"); printf("Options: (defaults are shown for numerical parameters)\n"); bzpServerOptionsHelp(bzp); bzpClientOptionsHelp(bzp); noWarnAbort(); }
int edwOpenAndRecordInDir(struct sqlConnection *conn, char *submitDir, char *submitFile, char *url, int *retHostId, int *retDirId) /* Return a low level read socket handle on URL if possible. Consult and * update the edwHost and edwDir tables to help log and troubleshoot remote * problems. The url parameter should be just a concatenation of submitDir and * submitFile. */ { /* Wrap routine to open network file in errCatch and remember whether it works. */ struct errCatch *errCatch = errCatchNew(); int sd = -1; boolean success = TRUE; if (errCatchStart(errCatch)) { sd = netUrlMustOpenPastHeader(url); } errCatchEnd(errCatch); if (errCatch->gotError) { success = FALSE; warn("Error: %s", trimSpaces(errCatch->message->string)); } /* Parse url into pieces */ struct netParsedUrl npu; ZeroVar(&npu); netParseUrl(url, &npu); char urlDir[PATH_LEN], urlFileName[PATH_LEN], urlExtension[PATH_LEN]; splitPath(npu.file, urlDir, urlFileName, urlExtension); /* Record success of open attempt in host and submitDir tables. */ int hostId = edwGetHost(conn, npu.host); recordIntoHistory(conn, hostId, "edwHost", success); int submitDirId = edwGetSubmitDir(conn, hostId, submitDir); recordIntoHistory(conn, submitDirId, "edwSubmitDir", success); /* Finish up error processing, bailing out of further processing if there was an error. */ errCatchFree(&errCatch); if (!success) noWarnAbort(); /* Update optional return variables and return socket to read from. */ if (retHostId != NULL) *retHostId = hostId; if (retDirId != NULL) *retDirId = submitDirId; return sd; }
void usage() /* print usage and quit */ { int i; printf( "altSplice - constructs altSplice graphs using psl alignments\n" "from est and mrna databases. Must specify either a bed file\n" "or a genePred file to load coordinates. This file should contain\n" "data for one and only one chromosome.\n" "usage:\n" " altSplice -db=hg15 -beds=rnaCluster.bed -agxOut=out.agx\n" "where options are:\n"); for(i=0; i<ArraySize(optionSpecs) -1; i++) fprintf(stderr, " -%s -- %s\n", optionSpecs[i].name, optionDescripts[i]); noWarnAbort(); }
static void addFilteredBedsOnRegion(char *fileName, struct region *region, char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList, struct hash *idHash, int *pMaxOut, boolean isTabix) /* Add relevant beds in reverse order to pBedList */ { struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, 100, *pMaxOut); else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, *pMaxOut, TRUE); if (vcff == NULL) noWarnAbort(); struct lm *lm = lmInit(0); char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, rec->name) == NULL)) continue; struct bed *bed; lmAllocVar(bedLm, bed); bed->chrom = lmCloneString(bedLm, region->chrom); bed->chromStart = rec->chromStart; bed->chromEnd = rec->chromEnd; bed->name = lmCloneString(bedLm, rec->name); slAddHead(pBedList, bed); } (*pMaxOut)--; if (*pMaxOut <= 0) break; } dyStringFree(&dyAlt); dyStringFree(&dyFilter); dyStringFree(&dyInfo); dyStringFree(&dyGt); lmCleanup(&lm); vcfFileFree(&vcff); }
struct mouseChromCache *newMouseChromCache(char *chrom, int chromSize, char *ratMouseDir) /* Create a new chromCache. */ { struct mouseChromCache *mcc; char fileName[512]; struct lineFile *lf; char *row[3]; int start,end; long long *pPos; /* Open up file with actual alignments. Warn and return NULL * if it doesn't exist. */ sprintf(fileName, "%s/%s.axt", ratMouseDir, chrom); lf = lineFileMayOpen(fileName, TRUE); /* Allocate structure and store basic info in it. */ AllocVar(mcc); mcc->name = cloneString(chrom); mcc->size = chromSize; mcc->lf = lf; if (lf == NULL) { warn("%s doesn't exist", fileName); if (!noDieMissing) noWarnAbort(); return mcc; } /* Read index file into bk. */ sprintf(fileName, "%s/%s.axt.ix", ratMouseDir, chrom); mcc->bk = binKeeperNew(0, chromSize); lf = lineFileOpen(fileName, TRUE); verbose(1, "Reading %s\n", fileName); while (lineFileRow(lf, row)) { start = lineFileNeedNum(lf, row, 0); end = lineFileNeedNum(lf, row, 1) + start; AllocVar(pPos); *pPos = atoll(row[2]); binKeeperAdd(mcc->bk, start, end, pPos); } lineFileClose(&lf); /* Return initialized object. */ return mcc; }
static void usage() /* Explain usage and exit. */ { struct bzp *bzp = bzpDefault(); printf("blatzClient version %d - Ask server to do\n", bzpVersion()); printf("cross-species DNA alignments and save results.\n"); printf("usage:\n"); printf(" blatzClient queryFile outputFile.\n"); printf("The queryFile can be in fasta, nib, or 2bit format or a \n"); printf("text file containing the names of the above files one per line.\n"); printf("It's important that the sequence be repeat masked with repeats in\n"); printf("lower case.\n"); printf("Options: (defaults are shown for numerical parameters)\n"); bzpClientOptionsHelp(bzp); printf(" -port=%d Use specified TCP/IP port\n", bzpDefaultPort); printf(" -host=%s Query specified host\n", host); noWarnAbort(); }
struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix) /* Return some semi-random IDs from a VCF file. */ { /* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */ struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); char *fileName = vcfFileName(tdb, conn, table, hDefaultChrom(database)); struct lineFile *lf = isTabix ? lineFileTabixMayOpen(fileName, TRUE) : lineFileMayOpen(fileName, TRUE); if (lf == NULL) noWarnAbort(); int orderedCount = count * 4; if (orderedCount < 100) orderedCount = 100; struct slName *idList = NULL; char *words[4]; int i; for (i = 0; i < orderedCount && lineFileChop(lf, words); i++) { // compress runs of identical ID, in case most are placeholder if (i == 0 || !sameString(words[2], idList->name)) slAddHead(&idList, slNameNew(words[2])); } lineFileClose(&lf); /* Shuffle list and trim it to count if necessary. */ shuffleList(&idList); struct slName *sl; for (sl = idList, i = 0; sl != NULL; sl = sl->next, i++) { if (i+1 >= count) { slNameFreeList(&(sl->next)); break; } } freez(&fileName); return idList; }
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, struct hash *chromSizeHash, struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList) /* Parse out bedGraph section until we get to something that is not in bedGraph format. */ { /* Set up hash and list to store chromosomes. */ struct hash *chromHash = hashNew(0); struct bedGraphChrom *chrom, *chromList = NULL; /* Collect lines in items on appropriate chromosomes. */ struct bwgBedGraphItem *item; char *line; while (lineFileNextReal(lf, &line)) { /* Check for end of section. */ if (stepTypeLine(line)) { lineFileReuse(lf); break; } /* Parse out our line and make sure it has exactly 4 columns. */ char *words[5]; int wordCount = chopLine(line, words); lineFileExpectWords(lf, 4, wordCount); /* Get chromosome. */ char *chromName = words[0]; chrom = hashFindVal(chromHash, chromName); if (chrom == NULL) { lmAllocVar(chromHash->lm, chrom); hashAddSaveName(chromHash, chromName, chrom, &chrom->name); chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM); slAddHead(&chromList, chrom); } /* Convert to item and add to chromosome list. */ lmAllocVar(lm, item); item->start = lineFileNeedNum(lf, words, 1); item->end = lineFileNeedNum(lf, words, 2); item->val = lineFileNeedDouble(lf, words, 3); /* Do sanity checking on coordinates. */ if (item->start > item->end) errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", item->start, item->end, lf->lineIx, lf->fileName); if (item->end > chrom->size) { warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u", lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&chrom->itemList, item); } } slSort(&chromList, bedGraphChromCmpName); /* Loop through each chromosome and output the item list, broken into sections * for that chrom. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { slSort(&chrom->itemList, bwgBedGraphItemCmp); /* Check to make sure no overlap between items. */ struct bwgBedGraphItem *item = chrom->itemList, *nextItem; for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) { if (item->end > nextItem->start) errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again", chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end); item = nextItem; } /* Break up into sections of no more than items-per-slot size. */ struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) { /* Find end item of this section, and start item for next section. * Terminate list at end item. */ int sectionSize = 0; int i; endItem = startItem; for (i=0; i<itemsPerSlot; ++i) { if (nextStartItem == NULL) break; endItem = nextStartItem; nextStartItem = nextStartItem->next; ++sectionSize; } endItem->next = NULL; /* Fill in section and add it to section list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = cloneString(chrom->name); section->start = startItem->start; section->end = endItem->end; section->type = bwgTypeBedGraph; section->items.bedGraphList = startItem; section->itemCount = sectionSize; slAddHead(pSectionList, section); } } /* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in * hash's memory. */ hashFree(&chromHash); chromList = NULL; }
static void parseSteppedSection(struct lineFile *lf, boolean clipDontDie, struct hash *chromSizeHash, char *initialLine, struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList) /* Parse out a variableStep or fixedStep section and add it to list, breaking it up as need be. */ { /* Parse out first word of initial line and make sure it is something we recognize. */ char *typeWord = nextWord(&initialLine); enum bwgSectionType type = bwgTypeFixedStep; if (sameString(typeWord, "variableStep")) type = bwgTypeVariableStep; else if (sameString(typeWord, "fixedStep")) type = bwgTypeFixedStep; else errAbort("Unknown type %s\n", typeWord); /* Set up defaults for values we hope to parse out of rest of line. */ int span = 0; bits32 step = 0; bits32 start = 0; char *chrom = NULL; /* Parse out var=val pairs. */ char *varEqVal; while ((varEqVal = nextWord(&initialLine)) != NULL) { char *wordPairs[2]; int wc = chopByChar(varEqVal, '=', wordPairs, 2); if (wc != 2) errAbort("strange var=val pair line %d of %s", lf->lineIx, lf->fileName); char *var = wordPairs[0]; char *val = wordPairs[1]; if (sameString(var, "chrom")) chrom = cloneString(val); else if (sameString(var, "span")) span = parseUnsignedVal(lf, var, val); else if (sameString(var, "step")) step = parseUnsignedVal(lf, var, val); else if (sameString(var, "start")) { start = parseUnsignedVal(lf, var, val); } else errAbort("Unknown setting %s=%s line %d of %s", var, val, lf->lineIx, lf->fileName); } /* Check that we have all that are required and no more, and call type-specific routine to parse * rest of section. */ if (chrom == NULL) errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName); bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM); if (start > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item starts at %u", lf->lineIx, lf->fileName, chrom, chromSize, start); if (!clipDontDie) noWarnAbort(); } if (type == bwgTypeFixedStep) { if (start == 0) errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName); if (step == 0) errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName); if (span == 0) span = step; parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, chrom, chromSize, span, start-1, step, pSectionList); } else { if (start != 0) errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName); if (step != 0) errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName); if (span == 0) span = 1; parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, chrom, chromSize, span, pSectionList); } }
static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList) /* Read the single column data in section until get to end. */ { struct lm *lmLocal = lmInit(0); /* Stream through section until get to end of file or next section, * adding values from single column to list. */ char *words[2]; char *line; struct bwgVariableStepItem *item, *nextItem, *itemList = NULL; int originalSectionSize = 0; while (lineFileNextReal(lf, &line)) { if (steppedSectionEnd(line, 2)) { lineFileReuse(lf); break; } chopLine(line, words); lmAllocVar(lmLocal, item); int start = lineFileNeedNum(lf, words, 0); if (start <= 0) { errAbort("line %d of %s: zero or negative chromosome coordinate not allowed", lf->lineIx, lf->fileName); } item->start = start - 1; item->val = lineFileNeedDouble(lf, words, 1); if (item->start + span > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", lf->lineIx, lf->fileName, chrom, chromSize, item->start + span); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&itemList, item); ++originalSectionSize; } } slSort(&itemList, bwgVariableStepItemCmp); /* Make sure no overlap between items. */ if (itemList != NULL) { item = itemList; for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) { if (item->start + span > nextItem->start) errAbort("Overlap on %s between items starting at %d and %d.\n" "Please remove overlaps and try again", chrom, item->start, nextItem->start); item = nextItem; } } /* Break up into sections of no more than items-per-slot size. */ int sizeLeft = originalSectionSize; for (item = itemList; item != NULL; ) { /* Figure out size of this section */ int sectionSize = sizeLeft; if (sectionSize > itemsPerSlot) sectionSize = itemsPerSlot; sizeLeft -= sectionSize; /* Convert from list to array representation. */ struct bwgVariableStepPacked *packed, *p; p = lmAllocArray(lm, packed, sectionSize); int i; for (i=0; i<sectionSize; ++i) { p->start = item->start; p->val = item->val; item = item->next; ++p; } /* Fill in section and add it to list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = chrom; section->start = packed[0].start; section->end = packed[sectionSize-1].start + span; section->type = bwgTypeVariableStep; section->items.variableStepPacked = packed; section->itemSpan = span; section->itemCount = sectionSize; slAddHead(pSectionList, section); } lmCleanup(&lmLocal); }
static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, bits32 step, struct bwgSection **pSectionList) /* Read the single column data in section until get to end. */ { struct lm *lmLocal = lmInit(0); /* Stream through section until get to end of file or next section, * adding values from single column to list. */ char *words[1]; char *line; struct bwgFixedStepItem *item, *itemList = NULL; int originalSectionSize = 0; bits32 sectionEnd = sectionStart; while (lineFileNextReal(lf, &line)) { if (steppedSectionEnd(line, 1)) { lineFileReuse(lf); break; } chopLine(line, words); lmAllocVar(lmLocal, item); item->val = lineFileNeedDouble(lf, words, 0); if (sectionEnd + span > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&itemList, item); ++originalSectionSize; } sectionEnd += step; } slReverse(&itemList); /* Break up into sections of no more than items-per-slot size, and convert to packed format. */ int sizeLeft = originalSectionSize; for (item = itemList; item != NULL; ) { /* Figure out size of this section */ int sectionSize = sizeLeft; if (sectionSize > itemsPerSlot) sectionSize = itemsPerSlot; sizeLeft -= sectionSize; /* Allocate and fill in section. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = chrom; section->start = sectionStart; sectionStart += sectionSize * step; section->end = sectionStart - step + span; section->type = bwgTypeFixedStep; section->itemStep = step; section->itemSpan = span; section->itemCount = sectionSize; /* Allocate array for data, and copy from list to array representation */ struct bwgFixedStepPacked *packed; /* An array */ section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize); int i; for (i=0; i<sectionSize; ++i) { packed->val = item->val; item = item->next; ++packed; } /* Add section to list. */ slAddHead(pSectionList, section); } lmCleanup(&lmLocal); }
void outputUniqueOnSharedKey(char *inTab, struct asObject *as, struct asColumn *keyCol, struct slPair *fieldList, char *outTab, char *outErr) /* Scan through tab-separated file inTab and output fields in fieldList to * outTab. Make sure there is only one row for each value of sharedKey field. * If there would be multiple different rows in output with sharedKey, * complain about it in outErr. */ { /* Open input and output. */ struct lineFile *lf = lineFileOpen(inTab, TRUE); FILE *f = mustOpen(outTab, "w"); FILE *fErr = mustOpen(outErr, "w"); /* Set up array for input fields with more than we expect for better error reporting. */ int oldFieldCount = slCount(as->columnList); int newFieldCount = slCount(fieldList); int allocFields = oldFieldCount+10; char *words[allocFields]; /* Set up array for output fields that says where to find them in input. */ int *oldIx = makeNewToOldArray(as, fieldList); /* Figure out index of key field. */ int keyIx = slIxFromElement(as->columnList, keyCol); /* Go through each line of input, outputting selected columns. */ struct hash *uniqHash = hashNew(18); struct hash *errHash = hashNew(0); struct dyString *dy = dyStringNew(1024); int fieldCount; while ((fieldCount = lineFileChopNextTab(lf, words, allocFields)) > 0) { lineFileExpectWords(lf, oldFieldCount, fieldCount); /* Collect possible output into dy. */ dyStringClear(dy); dyStringPrintf(dy, "%s", words[oldIx[0]]); int i; for (i=1; i<newFieldCount; ++i) dyStringPrintf(dy, "\t%s", words[oldIx[i]]); dyStringPrintf(dy, "\n"); /* Check that this line is either unique for this key, or the same as previous lines * for the key. */ char *key = words[keyIx]; char *oldVal = hashFindVal(uniqHash, key); if (oldVal != NULL) { if (!sameString(oldVal, dy->string)) { /* Error reporting is a little complex. We want to output all lines associated * with key, including the first one, but we only want to do first line once. */ if (!hashLookup(errHash, key)) { hashAdd(errHash, key, NULL); fputs(oldVal, fErr); } fputs(dy->string, fErr); } } else { hashAdd(uniqHash, key, cloneString(dy->string)); fputs(dy->string, f); } } /* Report error summary */ if (errHash->elCount > 0) { warn("Warning: %d shared keys have multiple values in table 2. See %s.\n" "Only first row for each key put in %s" , errHash->elCount, outErr, outTab); if (!mergeOk) noWarnAbort(); } /* Clean up and go home. */ freez(&oldIx); carefulClose(&fErr); carefulClose(&f); lineFileClose(&lf); }
static struct labeledFile *parseToLabeledFiles(struct customPp *cpp, int colCount, char *formatType, char *markerType, boolean firstLineLabels, struct sqlConnection *conn, boolean report) /* Parse out cpp until next track, creating a list of labeled * binary files. */ { /* Allocate a labeledFile for each column of real data. */ struct labeledFile *fileList = NULL, *fileEl; int posColCount = markerCols(markerType); int i; for (i=posColCount; i<colCount; ++i) { struct tempName tempName; char buf[16]; safef(buf, sizeof(buf), "hggUp%d", i); trashDirFile(&tempName, "hgg", buf, ".cgb"); safef(buf, sizeof(buf), "%d", i+1-posColCount); fileEl = labeledFileNew(tempName.forCgi, buf); slAddHead(&fileList, fileEl); } slReverse(&fileList); boolean ok = FALSE; if (sameString(markerType, cgfMarkerGenomic)) ok = mayProcessGenomic(conn, cpp, colCount, formatType, firstLineLabels, fileList, report); else if (sameString(markerType, cgfMarkerSts)) ok = mayProcessDb(conn, cpp, colCount, formatType, firstLineLabels, fileList, "stsMap", "select chrom,round((chromStart+chromEnd)*0.5),name from %s", "stsAlias", "select alias,trueName from %s", report, FALSE); else if (sameString(markerType, cgfMarkerSnp)) { char *snpTable = hFindLatestSnpTableConn(conn, NULL); if (snpTable == NULL) errAbort("No SNP table in %s", sqlGetDatabase(conn)); char *query = "select chrom,chromStart,name from %s"; ok = mayProcessDb(conn, cpp, colCount, formatType, firstLineLabels, fileList, snpTable, query, NULL, NULL, report, TRUE); } else if (sameString(markerType, cgfMarkerAffy100)) { warn("Support for Affy 100k chip coming soon."); } else if (sameString(markerType, cgfMarkerAffy500) || sameString(markerType, cgfMarkerAffy6) || sameString(markerType, cgfMarkerAffy6SV) || sameString(markerType, cgfMarkerHumanHap300) || sameString(markerType, cgfMarkerHumanHap550) || sameString(markerType, cgfMarkerHumanHap650) || sameString(markerType, cgfMarkerHumanHap1M) || sameString(markerType, cgfMarkerAgilentCgh244A) ) { char *table = ""; if (sameString(markerType, cgfMarkerAffy500)) table = affy500Table; if (sameString(markerType, cgfMarkerAffy6)) table = affy6Table; if (sameString(markerType, cgfMarkerAffy6SV)) table = affy6SVTable; if (sameString(markerType, cgfMarkerHumanHap300)) table = illumina300Table; if (sameString(markerType, cgfMarkerHumanHap550)) table = illumina550Table; if (sameString(markerType, cgfMarkerHumanHap650)) table = illumina650Table; if (sameString(markerType, cgfMarkerHumanHap1M)) table = illumina1MTable; if (sameString(markerType, cgfMarkerAgilentCgh244A)) table = agilentCgh244ATable; if (!sqlTableExists(conn, table)) errAbort("Sorry, no data for %s on this assembly.", markerType); ok = mayProcessDb(conn, cpp, colCount, formatType, firstLineLabels, fileList, table, "select chrom,chromStart,name from %s", NULL, NULL, report, FALSE); } else { errAbort("Unknown identifier format. markerType=%s", markerType); } if (ok) return fileList; else { noWarnAbort(); return NULL; } }
void ctgToChromFa(char *chromName, char *insertFile, char *chromDir, char *orderLst, char *outName, struct hash *liftHash) /* ctgToChromFa - convert contig level fa files to chromosome level. */ { struct hash *uniq = newHash(0); struct bigInsert *bi; struct chromInserts *chromInserts; struct hash *insertHash = newHash(9); struct lineFile *lf = lineFileOpen(orderLst, TRUE); FILE *f = mustOpen(outName, "w"); char ctgFaName[512]; char *words[2]; int liftChromSize = 0; int actualChromSize = 0; boolean isFirst = TRUE; chromInsertsRead(insertFile, insertHash); chromInserts = hashFindVal(insertHash, chromName); fprintf(f, ">%s\n", chromName); while (lineFileNextRow(lf, words, 1)) { char *contig = words[0]; int nSize; if (liftHash != NULL) { struct lift *lift = hashMustFindVal(liftHash, contig); nSize = lift->nBefore; liftChromSize = lift->chromSize; } else nSize = chromInsertsGapSize(chromInserts, rmChromPrefix(contig), isFirst); hashAddUnique(uniq, contig, NULL); addN(f, nSize); actualChromSize += nSize; isFirst = FALSE; sprintf(ctgFaName, "%s/%s/%s.fa", chromDir, contig, contig); if (fileExists(ctgFaName)) { actualChromSize += addFa(f, ctgFaName); } else { warn("%s does not exist\n", ctgFaName); if (!cgiVarExists("missOk")) noWarnAbort(); } } lineFileClose(&lf); if (chromInserts != NULL) if ((bi = chromInserts->terminal) != NULL) { addN(f, bi->size); actualChromSize += bi->size; } if (liftHash != NULL) { if (actualChromSize > liftChromSize) errAbort("Error: chromosome size from lift file is %d, but actual fa size is %d. Possible inconsistency between lift and inserts?", liftChromSize, actualChromSize); else if (actualChromSize < liftChromSize) addN(f, (liftChromSize - actualChromSize)); } if (linePos != 0) fputc('\n', f); fclose(f); }