示例#1
0
文件: meta.c 项目: avilella/methylQA
struct meta *metaNextStanza(struct lineFile *lf)
/* Return next stanza in a meta file.  Does not set parent/child/next pointers.
 * Returns NULL at end of file.  Does a little error checking,  making sure
 * that indentation level is consistent across all lines of stanza. Returns
 * indentation level. */
{
    /* See if anything left in file, and if not return. */
    if (!raSkipLeadingEmptyLines(lf, NULL))
        return NULL;

    /* Allocate return structure and vars to help parse. */
    struct meta *meta;
    AllocVar(meta);
    struct dyString *dy = dyStringNew(256);
    char *tag,*val;

    /* Loop to get all tags in stanza. */
    boolean firstTime = TRUE;
    int initialIndent = 0;
    for (;;)
    {
        dyStringClear(dy);
        if (!raNextTagVal(lf, &tag, &val, dy))
            break;

        /* Make tag/val and add it to list. */
        struct metaTagVal *mtv;
        AllocVar(mtv);
        mtv->tag = cloneString(tag);
        mtv->val = cloneString(val);
        slAddHead(&meta->tagList, mtv);

        /* Check indentation. */
        int indent =  countLeadingSpacesDetabbing(dy->string, 8);
        if (firstTime)
        {
            initialIndent = indent;
            firstTime = FALSE;
        }
        else
        {
            if (indent != initialIndent)
            {
                warn("Error line %d of %s\n", lf->lineIx, lf->fileName);
                warn("Indentation level %d doesn't match level %d at start of stanza.",
                     indent, initialIndent);
                if (strchr(dy->string, '\t'))
                    warn("There are tabs in the indentation, be sure tab stop is set to 8 spaces.");
                noWarnAbort();
            }
        }
    }
    slReverse(&meta->tagList);

    /* Set up remaining fields and return. */
    assert(meta->tagList != NULL);
    meta->name = meta->tagList->val;
    meta->indent = initialIndent;
    return meta;
}
示例#2
0
文件: server.c 项目: davidhoover/kent
void usage()
/* Explain usage and exit. */
{
struct bzp *bzp = bzpDefault();
printf("blatzServer version %d - Set up in-memory server for\n", bzpVersion());
printf("cross-species DNA alignments\n");
printf("usage:\n");
printf("   blatzServer start file(s)\n");
printf("Starts up server. Files are either fasta files, nib files, 2bit files\n");
printf("or text files containing the names of the above files one per line.\n");
printf("It's important that the sequence be repeat masked with repeats in\n");
printf("lower case.\n");
printf("Options: (defaults are shown for numerical parameters)\n");
bzpServerOptionsHelp(bzp);
bzpClientOptionsHelp(bzp);
printf("  -debug Writes diagnostic output to console and no daemon fork\n");
printf("  -subnet=255.255.255.255 Restrict access to subnet\n");
printf("  -port=%d Use specified TCP/IP port\n", bzpDefaultPort);
printf("  -host=%s Query specified host\n", host);
printf("  -cpu=%d Use specified number of CPUs (processes)\n", cpuCount);
printf("Other uses:\n");
printf("   blatzServer stop\n");
printf("      this terminates server\n");
printf("   blatzServer status\n");
printf("      this prints status info including version\n");
noWarnAbort();
}
示例#3
0
void tagStormCheck(char *schemaFile, char *tagStormFile)
/* tagStormCheck - Check that a tagStorm conforms to a schema.. */
{
/* Load up schema from file.  Make a hash of all non-wildcard
 * tags, and a list of wildcard tags. */
struct tagSchema *schema, *schemaList = tagSchemaFromFile(schemaFile);
struct slRef *wildSchemaList = NULL, *requiredSchemaList = NULL;

/* Split up schemaList into hash and wildSchemaList.  Calculate schemaSize */
struct hash *hash = hashNew(0);
int schemaSize = 0;
for (schema = schemaList; schema != NULL; schema = schema->next)
    {
    ++schemaSize;
    if (anyWild(schema->name))
	{
	refAdd(&wildSchemaList, schema);
	}
    else
	hashAdd(hash, schema->name, schema);
    if (schema->required != 0)
        refAdd(&requiredSchemaList, schema);
    }
slReverse(&wildSchemaList);
schemaList = NULL;

struct tagStorm *tagStorm = tagStormFromFile(tagStormFile);
struct dyString *scratch = dyStringNew(0);
rCheck(tagStorm->forest, tagStormFile, wildSchemaList, hash, requiredSchemaList, scratch);

if (gErrCount > 0)
    noWarnAbort();
else
    verbose(1, "No problems detected.\n");
}
示例#4
0
文件: rudp.c 项目: ma-compbio/RACA
struct rudp *rudpMustOpen()
/* Open up unbound rudp.  Warn and die if there is a problem. */
{
struct rudp *ru = rudpOpen();
if (ru == NULL)
    noWarnAbort();
return ru;
}
示例#5
0
static void tagAbort(struct htmlPage *page, struct htmlTag *tag, char *format, ...)
/* Print abort message and some context of tag. */
{
va_list args;
va_start(args, format);
tagVaWarn(page, tag, format, args);
va_end(args);
noWarnAbort();
}
示例#6
0
文件: rudp.c 项目: ma-compbio/RACA
struct rudp *rudpMustOpenBound(struct sockaddr_in *sai)
/* Open up a rudp socket bound to a particular port and address
 * or die trying. */
{
struct rudp *ru = rudpOpenBound(sai);
if (ru == NULL)
    noWarnAbort();
return ru;
}
示例#7
0
void handleFileError(struct sqlConnection *conn, int submitId, int fileId, char *err)
/* Write out error to stderr and also save it in errorMessage field of file 
 * and submit table. */
{
/* Write out error message to errorMessage field of table. */
warn("%s", trimSpaces(err));
edwWriteErrToTable(conn, "edwFile", fileId, err);
edwWriteErrToTable(conn, "edwSubmit", submitId, err);
noWarnAbort(err);
}
示例#8
0
文件: cmParse.c 项目: bowhan/kent
void mustChangeDir(char *dir)
/* Change directory or die trying. */
{
if (chdir(dir) < 0)
    {
    warn("Couldn't change directory to %s", dir);
    perror("");
    noWarnAbort();
    }
}
示例#9
0
static void perr(char *function, int err)
/* Print out error for function and abort on
 * non-zero error code.. */
{
if (err != 0)
    {
    pwarn(function, err);
    noWarnAbort();
    }
}
示例#10
0
文件: hPrint.c 项目: bowhan/kent
void hvPrintf(char *format, va_list args)
/* Suppressable variable args printf. Check for write error so we can
 * terminate if http connection breaks. */
{
if (suppressHtml)
    return;
vprintf(format, args);
if (ferror(stdout))
    noWarnAbort();
}
示例#11
0
void reportError(char *fileName, int startLine, char *format, ...)
/* Report error and abort if there are too many errors. */
{
va_list args;
va_start(args, format);
if (++gErrCount > clMaxErr)
    noWarnAbort();
vaWarn(format, args);
warn(" in stanza starting line %d of %s", startLine, fileName);
}
示例#12
0
struct htmlPage *htmlPageParseOk(char *url, char *fullText)
/* Parse out page and return only if status ok. */
{
struct htmlPage *page = htmlPageParse(url, fullText);
if (page == NULL)
   noWarnAbort();
if (page->status->status != 200)
   errAbort("%s returned with status code %d", url, page->status->status);
return page;
}
void recordAbort(struct raRecord *rec, char *format, ...)
/* Issue a warning message. */
{
va_list args;
va_start(args, format);
vaWarn(format, args);
va_end(args);
recordLocationReport(rec, stderr);
noWarnAbort();
}
示例#14
0
struct edwUser *edwMustGetUserFromEmail(struct sqlConnection *conn, char *email)
/* Return user associated with email or put up error message. */
{
struct edwUser *user = edwUserFromEmail(conn, email);
if (user == NULL)
    {
    edwWarnUnregisteredUser(email);
    noWarnAbort();
    }
return user;
}
static void hDumpStackAbortHandler()
/* abort handle that prints stack dump then invokes the previous abort
 * handler on the stack. */
{
if (!stackDumpDisabled)
    {
    stackDumpDisabled = TRUE;
    popWarnHandler(); // remove us from the stack
    dumpStack("\nStack dump:");
    // continue with next abort handler
    noWarnAbort();
    }
}
示例#16
0
void vaErrAbort(char *format, va_list args)
/* Abort function, with optional (vprintf formatted) error message. */
{
/* flag is needed because both errAbort and warn generate message
 * using the warn handler, however sometimes one needed to know
 * (like when logging), if it's an error or a warning.  This is far from
 * perfect, as this isn't cleared if the error handler continues,
 * as with an exception mechanism. */
struct perThreadAbortVars *ptav = getThreadVars();
ptav->errAbortInProgress = TRUE;
vaWarn(format, args);
noWarnAbort();
}
void usage(struct bzp *bzp)
/* Explain usage and exit. */
{
printf("blatz version %d - Align dna across species\n", bzpVersion());
printf("usage:\n");
printf("   blatz target query output\n");
printf("where target and query are either fasta files, nib files, 2bit files\n");
printf("or a text files containing the names of the above files one per line.\n");
printf("It's important that the sequence be repeat masked with repeats in\n");
printf("lower case.\n");
printf("Options: (defaults are shown for numerical parameters)\n");
bzpServerOptionsHelp(bzp);
bzpClientOptionsHelp(bzp);
noWarnAbort();
}
示例#18
0
int edwOpenAndRecordInDir(struct sqlConnection *conn, 
	char *submitDir, char *submitFile, char *url,
	int *retHostId, int *retDirId)
/* Return a low level read socket handle on URL if possible.  Consult and
 * update the edwHost and edwDir tables to help log and troubleshoot remote
 * problems. The url parameter should be just a concatenation of submitDir and
 * submitFile. */
{
/* Wrap routine to open network file in errCatch and remember whether it works. */
struct errCatch *errCatch = errCatchNew();
int sd = -1;
boolean success = TRUE;
if (errCatchStart(errCatch))
    {
    sd = netUrlMustOpenPastHeader(url);
    }
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    success = FALSE;
    warn("Error: %s", trimSpaces(errCatch->message->string));
    }

/* Parse url into pieces */
struct netParsedUrl npu;
ZeroVar(&npu);
netParseUrl(url, &npu);
char urlDir[PATH_LEN], urlFileName[PATH_LEN], urlExtension[PATH_LEN];
splitPath(npu.file, urlDir, urlFileName, urlExtension);

/* Record success of open attempt in host and submitDir tables. */
int hostId = edwGetHost(conn, npu.host);
recordIntoHistory(conn, hostId, "edwHost", success);
int submitDirId = edwGetSubmitDir(conn, hostId, submitDir);
recordIntoHistory(conn, submitDirId, "edwSubmitDir", success);

/* Finish up error processing, bailing out of further processing if there was an error. */
errCatchFree(&errCatch);
if (!success)
    noWarnAbort();

/* Update optional return variables and return socket to read from. */
if (retHostId != NULL)
    *retHostId = hostId;
if (retDirId != NULL)
    *retDirId = submitDirId;
return sd;
}
示例#19
0
void usage()
/* print usage and quit */
{
int i;
printf(
       "altSplice - constructs altSplice graphs using psl alignments\n"
       "from est and mrna databases. Must specify either a bed file\n"
       "or a genePred file to load coordinates. This file should contain\n"
       "data for one and only one chromosome.\n"
       "usage:\n"
       "   altSplice -db=hg15 -beds=rnaCluster.bed -agxOut=out.agx\n"
       "where options are:\n");
for(i=0; i<ArraySize(optionSpecs) -1; i++)
    fprintf(stderr, "   -%s -- %s\n", optionSpecs[i].name, optionDescripts[i]);
noWarnAbort();
}
示例#20
0
static void addFilteredBedsOnRegion(char *fileName, struct region *region, char *table,
				    struct asFilter *filter, struct lm *bedLm,
				    struct bed **pBedList, struct hash *idHash, int *pMaxOut,
				    boolean isTabix)
/* Add relevant beds in reverse order to pBedList */
{
struct vcfFile *vcff;
if (isTabix)
    vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
			       100, *pMaxOut);
else
    vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end,
			  100, *pMaxOut, TRUE);
if (vcff == NULL)
    noWarnAbort();
struct lm *lm = lmInit(0);
char *row[VCFDATALINE_NUM_COLS];
char numBuf[VCF_NUM_BUF_SIZE];
// Temporary storage for row-ification:
struct dyString *dyAlt = newDyString(1024);
struct dyString *dyFilter = newDyString(1024);
struct dyString *dyInfo = newDyString(1024);
struct dyString *dyGt = newDyString(1024);
struct vcfRecord *rec;
for (rec = vcff->records;  rec != NULL;  rec = rec->next)
    {
    vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row);
    if (asFilterOnRow(filter, row))
        {
	if ((idHash != NULL) && (hashLookup(idHash, rec->name) == NULL))
	    continue;
	struct bed *bed;
	lmAllocVar(bedLm, bed);
	bed->chrom = lmCloneString(bedLm, region->chrom);
	bed->chromStart = rec->chromStart;
	bed->chromEnd = rec->chromEnd;
	bed->name = lmCloneString(bedLm, rec->name);
	slAddHead(pBedList, bed);
	}
    (*pMaxOut)--;
    if (*pMaxOut <= 0)
	break;
    }
dyStringFree(&dyAlt);  dyStringFree(&dyFilter);  dyStringFree(&dyInfo);  dyStringFree(&dyGt);
lmCleanup(&lm);
vcfFileFree(&vcff);
}
示例#21
0
struct mouseChromCache *newMouseChromCache(char *chrom, int chromSize, 
	char *ratMouseDir)
/* Create a new chromCache. */
{
struct mouseChromCache *mcc;
char fileName[512];
struct lineFile *lf;
char *row[3];
int start,end;
long long *pPos;

/* Open up file with actual alignments.  Warn and return NULL
 * if it doesn't exist. */
sprintf(fileName, "%s/%s.axt", ratMouseDir, chrom);
lf = lineFileMayOpen(fileName, TRUE);

/* Allocate structure and store basic info in it. */
AllocVar(mcc);
mcc->name = cloneString(chrom);
mcc->size = chromSize;
mcc->lf = lf;
if (lf == NULL)
    {
    warn("%s doesn't exist", fileName);
    if (!noDieMissing)
        noWarnAbort(); 
    return mcc;
    }

/* Read index file into bk. */
sprintf(fileName, "%s/%s.axt.ix", ratMouseDir, chrom);
mcc->bk = binKeeperNew(0, chromSize);
lf = lineFileOpen(fileName, TRUE);
verbose(1, "Reading %s\n", fileName);
while (lineFileRow(lf, row))
    {
    start = lineFileNeedNum(lf, row, 0);
    end = lineFileNeedNum(lf, row, 1) + start;
    AllocVar(pPos);
    *pPos = atoll(row[2]);
    binKeeperAdd(mcc->bk, start, end, pPos);
    }
lineFileClose(&lf);

/* Return initialized object. */
return mcc;
}
static void usage()
/* Explain usage and exit. */
{
struct bzp *bzp = bzpDefault();
printf("blatzClient version %d - Ask server to do\n", bzpVersion());
printf("cross-species DNA alignments and save results.\n");
printf("usage:\n");
printf("   blatzClient queryFile outputFile.\n");
printf("The queryFile can be in fasta, nib, or 2bit format or a \n");
printf("text file containing the names of the above files one per line.\n");
printf("It's important that the sequence be repeat masked with repeats in\n");
printf("lower case.\n");
printf("Options: (defaults are shown for numerical parameters)\n");
bzpClientOptionsHelp(bzp);
printf("  -port=%d Use specified TCP/IP port\n", bzpDefaultPort);
printf("  -host=%s Query specified host\n", host);
noWarnAbort();
}
示例#23
0
struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix)
/* Return some semi-random IDs from a VCF file. */
{
/* Read 10000 items from vcf file,  or if they ask for a big list, then 4x what they ask for. */
struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table);
char *fileName = vcfFileName(tdb, conn, table, hDefaultChrom(database));
struct lineFile *lf = isTabix ? lineFileTabixMayOpen(fileName, TRUE) :
				lineFileMayOpen(fileName, TRUE);
if (lf == NULL)
    noWarnAbort();
int orderedCount = count * 4;
if (orderedCount < 100)
    orderedCount = 100;
struct slName *idList = NULL;
char *words[4];
int i;
for (i = 0;  i < orderedCount && lineFileChop(lf, words); i++)
    {
    // compress runs of identical ID, in case most are placeholder
    if (i == 0 || !sameString(words[2], idList->name))
	slAddHead(&idList, slNameNew(words[2]));
    }
lineFileClose(&lf);
/* Shuffle list and trim it to count if necessary. */
shuffleList(&idList);
struct slName *sl;
for (sl = idList, i = 0; sl != NULL; sl = sl->next, i++)
    {
    if (i+1 >= count)
	{
	slNameFreeList(&(sl->next));
	break;
	}
    }
freez(&fileName);
return idList;
}
示例#24
0
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, 
	struct hash *chromSizeHash, struct lm *lm, 
	int itemsPerSlot, struct bwgSection **pSectionList)
/* Parse out bedGraph section until we get to something that is not in bedGraph format. */
{
/* Set up hash and list to store chromosomes. */
struct hash *chromHash = hashNew(0);
struct bedGraphChrom *chrom, *chromList = NULL;

/* Collect lines in items on appropriate chromosomes. */
struct bwgBedGraphItem *item;
char *line;
while (lineFileNextReal(lf, &line))
    {
    /* Check for end of section. */
    if (stepTypeLine(line))
        {
	lineFileReuse(lf);
	break;
	}

    /* Parse out our line and make sure it has exactly 4 columns. */
    char *words[5];
    int wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 4, wordCount);

    /* Get chromosome. */
    char *chromName = words[0];
    chrom = hashFindVal(chromHash, chromName);
    if (chrom == NULL)
        {
	lmAllocVar(chromHash->lm, chrom);
	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
	chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
	slAddHead(&chromList, chrom);
	}

    /* Convert to item and add to chromosome list. */
    lmAllocVar(lm, item);
    item->start = lineFileNeedNum(lf, words, 1);
    item->end = lineFileNeedNum(lf, words, 2);
    item->val = lineFileNeedDouble(lf, words, 3);

    /* Do sanity checking on coordinates. */
    if (item->start > item->end)
        errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", 
		item->start, item->end, lf->lineIx, lf->fileName);
    if (item->end > chrom->size)
	{
        warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u",
	        lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&chrom->itemList, item);
	}
    }
slSort(&chromList, bedGraphChromCmpName);

/* Loop through each chromosome and output the item list, broken into sections
 * for that chrom. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    slSort(&chrom->itemList, bwgBedGraphItemCmp);

    /* Check to make sure no overlap between items. */
    struct bwgBedGraphItem *item = chrom->itemList, *nextItem;
    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
        {
	if (item->end > nextItem->start)
	    errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again",
	        chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end);
	item = nextItem;
	}

    /* Break up into sections of no more than items-per-slot size. */
    struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
    for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
	{
	/* Find end item of this section, and start item for next section.
	 * Terminate list at end item. */
	int sectionSize = 0;
	int i;
	endItem = startItem;
	for (i=0; i<itemsPerSlot; ++i)
	    {
	    if (nextStartItem == NULL)
		break;
	    endItem = nextStartItem;
	    nextStartItem = nextStartItem->next;
	    ++sectionSize;
	    }
	endItem->next = NULL;

	/* Fill in section and add it to section list. */
	struct bwgSection *section;
	lmAllocVar(lm, section);
	section->chrom = cloneString(chrom->name);
	section->start = startItem->start;
	section->end = endItem->end;
	section->type = bwgTypeBedGraph;
	section->items.bedGraphList = startItem;
	section->itemCount = sectionSize;
	slAddHead(pSectionList, section);
	}
    }

/* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in 
 * hash's memory. */
hashFree(&chromHash);
chromList = NULL;
}
示例#25
0
static void parseSteppedSection(struct lineFile *lf, boolean clipDontDie, 
	struct hash *chromSizeHash, char *initialLine, 
	struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList)
/* Parse out a variableStep or fixedStep section and add it to list, breaking it up as need be. */
{
/* Parse out first word of initial line and make sure it is something we recognize. */
char *typeWord = nextWord(&initialLine);
enum bwgSectionType type = bwgTypeFixedStep;
if (sameString(typeWord, "variableStep"))
    type = bwgTypeVariableStep;
else if (sameString(typeWord, "fixedStep"))
    type = bwgTypeFixedStep;
else
    errAbort("Unknown type %s\n", typeWord);

/* Set up defaults for values we hope to parse out of rest of line. */
int span = 0;
bits32 step = 0;
bits32 start = 0;
char *chrom = NULL;

/* Parse out var=val pairs. */
char *varEqVal;
while ((varEqVal = nextWord(&initialLine)) != NULL)
    {
    char *wordPairs[2];
    int wc = chopByChar(varEqVal, '=', wordPairs, 2);
    if (wc != 2)
        errAbort("strange var=val pair line %d of %s", lf->lineIx, lf->fileName);
    char *var = wordPairs[0];
    char *val = wordPairs[1];
    if (sameString(var, "chrom"))
        chrom = cloneString(val);
    else if (sameString(var, "span"))
	span = parseUnsignedVal(lf, var, val);
    else if (sameString(var, "step"))
	step = parseUnsignedVal(lf, var, val);
    else if (sameString(var, "start"))
	{
        start = parseUnsignedVal(lf, var, val);
	}
    else
	errAbort("Unknown setting %s=%s line %d of %s", var, val, lf->lineIx, lf->fileName);
    }

/* Check that we have all that are required and no more, and call type-specific routine to parse
 * rest of section. */
if (chrom == NULL)
    errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName);
bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM);
if (start > chromSize)
    {
    warn("line %d of %s: chromosome %s has %u bases, but item starts at %u",
    	lf->lineIx, lf->fileName, chrom, chromSize, start);
    if (!clipDontDie)
        noWarnAbort();
    }
if (type == bwgTypeFixedStep)
    {
    if (start == 0)
	errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName);
    if (step == 0)
	errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName);
    if (span == 0)
	span = step;
    parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, 
    	chrom, chromSize, span, start-1, step, pSectionList);
    }
else
    {
    if (start != 0)
	errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName);
    if (step != 0)
	errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName);
    if (span == 0)
	span = 1;
    parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, 
    	chrom, chromSize, span, pSectionList);
    }
}
示例#26
0
static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
	int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList)
/* Read the single column data in section until get to end. */
{
struct lm *lmLocal = lmInit(0);

/* Stream through section until get to end of file or next section,
 * adding values from single column to list. */
char *words[2];
char *line;
struct bwgVariableStepItem *item, *nextItem, *itemList = NULL;
int originalSectionSize = 0;
while (lineFileNextReal(lf, &line))
    {
    if (steppedSectionEnd(line, 2))
	{
        lineFileReuse(lf);
	break;
	}
    chopLine(line, words);
    lmAllocVar(lmLocal, item);
    int start = lineFileNeedNum(lf, words, 0);
    if (start <= 0)
	{
	errAbort("line %d of %s: zero or negative chromosome coordinate not allowed",
	    lf->lineIx, lf->fileName);
	}
    item->start = start - 1;
    item->val = lineFileNeedDouble(lf, words, 1);
    if (item->start + span > chromSize)
        {
	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
	    lf->lineIx, lf->fileName, chrom, chromSize, item->start + span);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
        {
	slAddHead(&itemList, item);
	++originalSectionSize;
	}
    }
slSort(&itemList, bwgVariableStepItemCmp);

/* Make sure no overlap between items. */
if (itemList != NULL)
    {
    item = itemList;
    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
        {
	if (item->start + span > nextItem->start)
	    errAbort("Overlap on %s between items starting at %d and %d.\n"
	             "Please remove overlaps and try again",
		    chrom, item->start, nextItem->start);
	item = nextItem;
	}
    }

/* Break up into sections of no more than items-per-slot size. */
int sizeLeft = originalSectionSize;
for (item = itemList; item != NULL; )
    {
    /* Figure out size of this section  */
    int sectionSize = sizeLeft;
    if (sectionSize > itemsPerSlot)
        sectionSize = itemsPerSlot;
    sizeLeft -= sectionSize;

    /* Convert from list to array representation. */
    struct bwgVariableStepPacked *packed, *p;		
    p = lmAllocArray(lm, packed, sectionSize);
    int i;
    for (i=0; i<sectionSize; ++i)
        {
	p->start = item->start;
	p->val = item->val;
	item = item->next;
	++p;
	}

    /* Fill in section and add it to list. */
    struct bwgSection *section;
    lmAllocVar(lm, section);
    section->chrom = chrom;
    section->start = packed[0].start;
    section->end = packed[sectionSize-1].start + span;
    section->type = bwgTypeVariableStep;
    section->items.variableStepPacked = packed;
    section->itemSpan = span;
    section->itemCount = sectionSize;
    slAddHead(pSectionList, section);
    }
lmCleanup(&lmLocal);
}
示例#27
0
static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
	int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, 
	bits32 step, struct bwgSection **pSectionList)
/* Read the single column data in section until get to end. */
{
struct lm *lmLocal = lmInit(0);

/* Stream through section until get to end of file or next section,
 * adding values from single column to list. */
char *words[1];
char *line;
struct bwgFixedStepItem *item, *itemList = NULL;
int originalSectionSize = 0;
bits32 sectionEnd = sectionStart;
while (lineFileNextReal(lf, &line))
    {
    if (steppedSectionEnd(line, 1))
	{
        lineFileReuse(lf);
	break;
	}
    chopLine(line, words);
    lmAllocVar(lmLocal, item);
    item->val = lineFileNeedDouble(lf, words, 0);
    if (sectionEnd + span > chromSize)
	{
	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
	    lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&itemList, item);
	++originalSectionSize;
	}
    sectionEnd += step;
    }
slReverse(&itemList);

/* Break up into sections of no more than items-per-slot size, and convert to packed format. */
int sizeLeft = originalSectionSize;
for (item = itemList; item != NULL; )
    {
    /* Figure out size of this section  */
    int sectionSize = sizeLeft;
    if (sectionSize > itemsPerSlot)
        sectionSize = itemsPerSlot;
    sizeLeft -= sectionSize;


    /* Allocate and fill in section. */
    struct bwgSection *section;
    lmAllocVar(lm, section);
    section->chrom = chrom;
    section->start = sectionStart;
    sectionStart += sectionSize * step;
    section->end = sectionStart - step + span;
    section->type = bwgTypeFixedStep;
    section->itemStep = step;
    section->itemSpan = span;
    section->itemCount = sectionSize;

    /* Allocate array for data, and copy from list to array representation */
    struct bwgFixedStepPacked *packed;		/* An array */
    section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize);
    int i;
    for (i=0; i<sectionSize; ++i)
        {
	packed->val = item->val;
	item = item->next;
	++packed;
	}

    /* Add section to list. */
    slAddHead(pSectionList, section);
    }
lmCleanup(&lmLocal);
}
示例#28
0
void outputUniqueOnSharedKey(char *inTab, struct asObject *as, struct asColumn *keyCol,
    struct slPair *fieldList, char *outTab, char *outErr)
/* Scan through tab-separated file inTab and output fields in fieldList to
 * outTab. Make sure there is only one row for each value of sharedKey field. 
 * If there would be multiple different rows in output with sharedKey, 
 * complain about it in outErr. */
{
/* Open input and output. */
struct lineFile *lf = lineFileOpen(inTab, TRUE);
FILE *f = mustOpen(outTab, "w");
FILE *fErr = mustOpen(outErr, "w");

/* Set up array for input fields with more than we expect for better error reporting. */
int oldFieldCount = slCount(as->columnList);
int newFieldCount = slCount(fieldList);
int allocFields = oldFieldCount+10;
char *words[allocFields];

/* Set up array for output fields that says where to find them in input. */
int *oldIx = makeNewToOldArray(as, fieldList);

/* Figure out index of key field. */
int keyIx = slIxFromElement(as->columnList, keyCol);

/* Go through each line of input, outputting selected columns. */
struct hash *uniqHash = hashNew(18); 
struct hash *errHash = hashNew(0);
struct dyString *dy = dyStringNew(1024);
int fieldCount;
while ((fieldCount = lineFileChopNextTab(lf, words, allocFields)) > 0)
    {
    lineFileExpectWords(lf, oldFieldCount, fieldCount);

    /* Collect possible output into dy. */
    dyStringClear(dy);
    dyStringPrintf(dy, "%s", words[oldIx[0]]);
    int i;
    for (i=1; i<newFieldCount; ++i)
	dyStringPrintf(dy,  "\t%s", words[oldIx[i]]);
    dyStringPrintf(dy, "\n");

    /* Check that this line is either unique for this key, or the same as previous lines
     * for the key. */
    char *key = words[keyIx];
    char *oldVal = hashFindVal(uniqHash, key);
    if (oldVal != NULL)
        {
	if (!sameString(oldVal, dy->string))
	    {
	    /* Error reporting is a little complex.  We want to output all lines associated
	     * with key, including the first one, but we only want to do first line once. */
	    if (!hashLookup(errHash, key))
	        {
		hashAdd(errHash, key, NULL);
		fputs(oldVal, fErr);
		}
	    fputs(dy->string, fErr);
	    }
	}
    else
	{
	hashAdd(uniqHash, key, cloneString(dy->string));
        fputs(dy->string, f);
	}
    }

/* Report error summary */
if (errHash->elCount > 0)
    {
    warn("Warning: %d shared keys have multiple values in table 2. See %s.\n"
         "Only first row for each key put in %s" , errHash->elCount, outErr, outTab);
    if (!mergeOk)
        noWarnAbort();
    }

/* Clean up and go home. */
freez(&oldIx);
carefulClose(&fErr);
carefulClose(&f);
lineFileClose(&lf);
}
示例#29
0
static struct labeledFile *parseToLabeledFiles(struct customPp *cpp,
	int colCount, char *formatType, char *markerType,
	boolean firstLineLabels, struct sqlConnection *conn, boolean report)
/* Parse out cpp until next track, creating a list of labeled
 * binary files. */
{
/* Allocate a labeledFile for each column of real data. */
struct labeledFile *fileList = NULL, *fileEl;
int posColCount = markerCols(markerType);
int i;
for (i=posColCount; i<colCount; ++i)
    {
    struct tempName tempName;
    char buf[16];
    safef(buf, sizeof(buf), "hggUp%d", i);
    trashDirFile(&tempName, "hgg", buf, ".cgb");
    safef(buf, sizeof(buf), "%d", i+1-posColCount);
    fileEl = labeledFileNew(tempName.forCgi, buf);
    slAddHead(&fileList, fileEl);
    }
slReverse(&fileList);

boolean ok = FALSE;
if (sameString(markerType, cgfMarkerGenomic))
    ok = mayProcessGenomic(conn, cpp, colCount, formatType, 
    	firstLineLabels, fileList, report);
else if (sameString(markerType, cgfMarkerSts))
    ok = mayProcessDb(conn, cpp, colCount, formatType, 
    	firstLineLabels, fileList, "stsMap",
    	"select chrom,round((chromStart+chromEnd)*0.5),name from %s",
	"stsAlias", "select alias,trueName from %s", report, FALSE);
else if (sameString(markerType, cgfMarkerSnp))
    {
    char *snpTable = hFindLatestSnpTableConn(conn, NULL);
    if (snpTable == NULL)
        errAbort("No SNP table in %s", sqlGetDatabase(conn));
    char *query = "select chrom,chromStart,name from %s";
    ok = mayProcessDb(conn, cpp, colCount, formatType, 
	    firstLineLabels, fileList, snpTable, 
	    query, NULL, NULL, report, TRUE);
    }
else if (sameString(markerType, cgfMarkerAffy100))
    {
    warn("Support for Affy 100k chip coming soon.");
    }
else if (sameString(markerType, cgfMarkerAffy500)
      || sameString(markerType, cgfMarkerAffy6)
      || sameString(markerType, cgfMarkerAffy6SV)
      || sameString(markerType, cgfMarkerHumanHap300)
      || sameString(markerType, cgfMarkerHumanHap550)
      || sameString(markerType, cgfMarkerHumanHap650)
      || sameString(markerType, cgfMarkerHumanHap1M)
      || sameString(markerType, cgfMarkerAgilentCgh244A)
        )
    {
    char *table = "";
    if (sameString(markerType, cgfMarkerAffy500)) table = affy500Table;
    if (sameString(markerType, cgfMarkerAffy6))   table = affy6Table;
    if (sameString(markerType, cgfMarkerAffy6SV)) table = affy6SVTable;
    if (sameString(markerType, cgfMarkerHumanHap300)) table = illumina300Table;
    if (sameString(markerType, cgfMarkerHumanHap550)) table = illumina550Table;
    if (sameString(markerType, cgfMarkerHumanHap650)) table = illumina650Table;
    if (sameString(markerType, cgfMarkerHumanHap1M)) table = illumina1MTable;
    if (sameString(markerType, cgfMarkerAgilentCgh244A)) table = agilentCgh244ATable;
    if (!sqlTableExists(conn, table))
        errAbort("Sorry, no data for %s on this assembly.",
		markerType);
    ok = mayProcessDb(conn, cpp, colCount, formatType,
    	firstLineLabels, fileList, table,
    	"select chrom,chromStart,name from %s", NULL, NULL, report, FALSE);
    }
else
    {
    errAbort("Unknown identifier format. markerType=%s", markerType);
    }
if (ok)
    return fileList;
else
    {
    noWarnAbort();
    return NULL;
    }
}
void ctgToChromFa(char *chromName, char *insertFile, char *chromDir, 
	char *orderLst, char *outName, struct hash *liftHash)
/* ctgToChromFa - convert contig level fa files to chromosome level. */
{
struct hash *uniq = newHash(0);
struct bigInsert *bi;
struct chromInserts *chromInserts;
struct hash *insertHash = newHash(9);
struct lineFile *lf = lineFileOpen(orderLst, TRUE);
FILE *f = mustOpen(outName, "w");
char ctgFaName[512];
char *words[2];
int liftChromSize = 0;
int actualChromSize = 0;
boolean isFirst = TRUE;

chromInsertsRead(insertFile, insertHash);
chromInserts = hashFindVal(insertHash, chromName);
fprintf(f, ">%s\n", chromName);
while (lineFileNextRow(lf, words, 1))
    {
    char *contig = words[0];
    int nSize;
    
    if (liftHash != NULL)
        {
	struct lift *lift = hashMustFindVal(liftHash, contig);
	nSize = lift->nBefore;
	liftChromSize = lift->chromSize;
	}
    else
        nSize = chromInsertsGapSize(chromInserts, rmChromPrefix(contig), isFirst);
    hashAddUnique(uniq, contig, NULL);
    addN(f, nSize);
    actualChromSize += nSize;
    isFirst = FALSE;
    sprintf(ctgFaName, "%s/%s/%s.fa", chromDir, contig, contig);
    if (fileExists(ctgFaName))
        {
	actualChromSize += addFa(f, ctgFaName);
	}
    else
        {
	warn("%s does not exist\n", ctgFaName);
	if (!cgiVarExists("missOk"))
	    noWarnAbort();
	}
    }
lineFileClose(&lf);
if (chromInserts != NULL)
    if  ((bi = chromInserts->terminal) != NULL)
        {
	addN(f, bi->size);
	actualChromSize += bi->size;
	}
if (liftHash != NULL)
    {
    if (actualChromSize > liftChromSize)
	errAbort("Error: chromosome size from lift file is %d, but actual fa size is %d.  Possible inconsistency between lift and inserts?",
		 liftChromSize, actualChromSize);
    else if (actualChromSize < liftChromSize)
	addN(f, (liftChromSize - actualChromSize));
    }
if (linePos != 0)
   fputc('\n', f);
fclose(f);
}