예제 #1
0
static char *mkStatusValSet(struct sqlConnection *conn)
/* Generate set of CCDS status values to use, based on cmd options or
 * defaults.  WARNING: static return. */
{
static char *statValSet = NULL;
if (statValSet != NULL)
    return statValSet;
struct dyString *buf = dyStringNew(0);
struct slName *val;
struct hash *validStats = ccdsStatusValLoad(conn);

for (val = statVals; val != NULL; val = val->next)
    {
    ccdsStatusValCheck(validStats, val->name);
    if (buf->stringSize > 0)
        dyStringAppendC(buf, ',');
    dyStringPrintf(buf, "\"%s\"", val->name);
    }
hashFree(&validStats);
statValSet = dyStringCannibalize(&buf);
return statValSet;
}
예제 #2
0
static struct targetHits *bundleIntoTargets(struct axtBundle *abList)
/* BLAST typically outputs everything on the same query and target
 * in one clump.  This routine rearranges axts in abList to do this. */
{
struct targetHits *targetList = NULL, *target;
struct hash *targetHash = newHash(10);
struct axtBundle *ab;
struct axtRef *ref;

/* Build up a list of targets in database hit by query sorted by
 * score of hits. */
for (ab = abList; ab != NULL; ab = ab->next)
    {
    struct axt *axt;
    for (axt = ab->axtList; axt != NULL; axt = axt->next)
	{
	target = hashFindVal(targetHash, axt->tName);
	if (target == NULL)
	    {
	    AllocVar(target);
	    slAddHead(&targetList, target);
	    hashAdd(targetHash, axt->tName, target);
	    target->name = cloneString(axt->tName);
	    target->size = ab->tSize;
	    }
	if (axt->score > target->score)
	    target->score = axt->score;
	AllocVar(ref);
	ref->axt = axt;
	slAddHead(&target->axtList, ref);
	}
    }
slSort(&targetList, targetHitsCmpScore);
for (target = targetList; target != NULL; target = target->next)
    slSort(&target->axtList, axtRefCmpScore);

hashFree(&targetHash);
return targetList;
}
예제 #3
0
파일: seqOut.c 프로젝트: bowhan/kent
void doRefGeneMrnaSequence(struct sqlConnection *conn, struct bed *bedList)
/* Fetch refGene mRNA sequence. */
{
struct hash *uniqHash = newHash(18);
struct bed *bed;
boolean gotResults = FALSE;
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    if (!hashLookup(uniqHash, bed->name))
        {
	char *fa = hGetSeqAndId(conn, bed->name, NULL);
	hashAdd(uniqHash, bed->name, NULL);
	if (fa != NULL)
	    hPrintf("%s", fa);
	freez(&fa);
	gotResults = TRUE;
	}
    }
if (!gotResults)
    hPrintf(NO_RESULTS);
hashFree(&uniqHash);
}
예제 #4
0
struct slName *valsForVar(char *varName, struct taggedFile *tfList)
/* Return all values for given variable. */
{
struct slName *list = NULL;
struct hash *uniqHash = hashNew(7);
struct taggedFile *tf;
for (tf = tfList; tf != NULL; tf = tf->next)
    {
    char *val = metaTagValFindVal(tf->tagList, varName);
    if (val != NULL)
        {
	if (hashLookup(uniqHash, val) == NULL)
	    {
	    hashAdd(uniqHash, val, NULL);
	    slNameAddHead(&list, val);
	    }
        }
    }
hashFree(&uniqHash);
slNameSort(&list);
return list;
}
예제 #5
0
파일: ra.c 프로젝트: Puneet-Shivanand/zinba
void raFoldIn(char *fileName, struct hash *hashOfHash)
/* Read ra's in file name and fold them into hashOfHash. 
 * This will add ra's and ra fields to whatever already
 * exists in the hashOfHash,  overriding fields of the
 * same name if they exist already. */
{
struct lineFile *lf = lineFileMayOpen(fileName, TRUE);
if (lf != NULL)
    {
    struct hash *uniqHash = hashNew(0);
    char *name;
    while ((name = raFoldInOneRetName(lf, hashOfHash)) != NULL)
	{
	if (hashLookup(uniqHash, name))
	    errAbort("%s duplicated in record ending line %d of %s", name, 
	    	lf->lineIx, lf->fileName);
	hashAdd(uniqHash, name, NULL);
	}
    lineFileClose(&lf);
    hashFree(&uniqHash);
    }
}
예제 #6
0
struct hash *txgIntoKeeperHash(struct txGraph *txgList)
/* Create a hash full of bin keepers (one for each chromosome or contig.
 * The binKeepers are full of txGraphs. */
{
struct hash *sizeHash = txgChromMinSizeHash(txgList);
struct hash *bkHash = hashNew(16);
struct txGraph *txg;
for (txg = txgList; txg != NULL; txg = txg->next)
    {
    struct binKeeper *bk = hashFindVal(bkHash, txg->tName);
    if (bk == NULL)
        {
	struct minChromSize *chrom = hashMustFindVal(sizeHash, txg->tName);
	verbose(3, "New binKeeper for %s\n", txg->tName);
	bk = binKeeperNew(0, chrom->minSize);
	hashAdd(bkHash, txg->tName, bk);
	}
    binKeeperAdd(bk, txg->tStart, txg->tEnd, txg);
    }
hashFree(&sizeHash);
return bkHash;
}
예제 #7
0
struct slPair *tagListIncludingParents(struct tagStanza *stanza)
/* Return a list of all tags including ones defined in parents. */
{
struct hash *uniq = hashNew(0);
struct slPair *list = NULL;
struct tagStanza *ts;
for (ts = stanza; ts != NULL; ts = ts->parent)
    {
    struct slPair *pair;
    for (pair = ts->tagList; pair != NULL; pair = pair->next)
       {
       if (!hashLookup(uniq, pair->name))
           {
	   slPairAdd(&list, pair->name, pair->val);
	   hashAdd(uniq, pair->name, pair);
	   }
       }
    }
hashFree(&uniq);
slReverse(&list);
return list;
}
예제 #8
0
파일: upload.c 프로젝트: Truhigh/goahead
PUBLIC void websFreeUpload(Webs *wp)
{
    WebsUpload  *up;
    WebsKey     *s;

    for (s = hashFirst(wp->files); s; s = hashNext(wp->files, s)) {
        up = s->content.value.symbol;
        freeUploadFile(up);
        if (up == wp->currentFile) {
            wp->currentFile = 0;
        }
    }
    hashFree(wp->files);
    if (wp->currentFile) {
        freeUploadFile(wp->currentFile);
        wp->currentFile = 0;
    }
    if (wp->upfd >= 0) {
        close(wp->upfd);
        wp->upfd = -1;
    }
}
static struct genePos *wildAssociationFilter(
	struct slName *wildList, boolean orLogic, 
	struct column *col, struct sqlConnection *conn, struct genePos *list)
/* Handle relatively slow filtering when there is a wildcard present. */
{
struct assocGroup *ag = assocGroupNew(16);
struct genePos *gp;
struct hash *passHash = newHash(16); /* Hash of items passing filter. */
int assocCount = 0;
struct sqlResult *sr;
char **row;

/* Build up associations. */
sr = sqlGetResult(conn, col->queryFull);
while ((row = sqlNextRow(sr)) != NULL)
    {
    ++assocCount;
    assocGroupAdd(ag, row[0],row[1]);
    }
sqlFreeResult(&sr);

/* Look for matching associations and put them on newList. */
for (gp = list; gp != NULL; gp = gp->next)
    {
    char *key = (col->protKey 
	? (kgVersion == KG_III ? lookupProtein(conn, gp->name) : gp->protein)
	: gp->name);
    struct assocList *al = hashFindVal(ag->listHash, key);
    if (al != NULL)
	{
	if (wildList == NULL || wildMatchRefs(wildList, al->list, orLogic))
	    hashAdd(passHash, gp->name, gp);
	}
    }
list = weedUnlessInHash(list, passHash);
hashFree(&passHash);
assocGroupFree(&ag);
return list;
}
void statsOnSubsets(struct nearTest *list, int subIx, FILE *f)
/* Report tests of certain subtype. */
{
struct nearTest *test;
struct hash *hash = newHash(0);
struct slName *typeList = NULL, *type;

fprintf(f, "\n%s subtotals\n", nearTestInfoTypes[subIx]);

/* Get list of all types in this field. */
for (test = list; test != NULL; test = test->next)
    {
    char *info = test->info[subIx];
    if (!hashLookup(hash, info))
       {
       type = slNameNew(info);
       hashAdd(hash, info, type);
       slAddHead(&typeList, type);
       }
    }
slNameSort(&typeList);
hashFree(&hash);

for (type = typeList; type != NULL; type = type->next)
    {
    struct qaStatistics *stats;
    AllocVar(stats);
    for (test = list; test != NULL; test = test->next)
        {
	if (sameString(type->name, test->info[subIx]))
	    {
	    qaStatisticsAdd(stats, test->status);
	    }
	}
    qaStatisticsReport(stats, type->name, f);
    freez(&stats);
    }
}
struct group *groupTracks(char *db, struct trackDb *tracks)
/* Make up groups and assign tracks to groups. */
{
struct trackDb *track;
struct trackRef *tr;
struct group *group, *groups = NULL;
struct grp *grp;
struct grp *grps = hLoadGrps(db);
struct hash *groupHash = newHash(8);

/* Sort groups by priority */
slSort(&grps, cmpGroupPri);

/* Create hash and list of groups */
for (grp = grps; grp != NULL; grp = grp->next)
    {
    AllocVar(group);
    group->name = cloneString(grp->name);
    group->label = cloneString(grp->label);
    slAddTail(&groups, group);
    hashAdd(groupHash, grp->name, group);
    }

/* Add tracks to group */
for (track = tracks; track != NULL; track = track->next)
    {
    AllocVar(tr);
    tr->track = track;
    group = hashFindVal(groupHash, track->grp);
    slAddHead(&group->tracks, tr);
    }

/* order tracks within groups by priority */
for (group = groups; group != NULL; group = group->next)
    slSort(&group->tracks, cmpTrackPri);
hashFree(&groupHash);
return groups;
}
struct raLevel *raLevelRead(char *initialFile, struct lm *lm)
/* Read initialFile and all files that are included by it. */
{
/* Create structure for level. */
struct raLevel *level;
lmAllocVar(lm, level);
char dir[PATH_LEN];
splitPath(initialFile, dir, NULL, NULL);
level->name = lmCloneString(lm, dir);

/* Build up list of files by recursion. */
if (fileExists(initialFile))
    {
    struct hash *circularHash = hashNew(0);
    hashAdd(circularHash, initialFile, NULL);
    recurseThroughIncludes(initialFile, lm, circularHash, level, &level->fileList);
    hashFree(&circularHash);
    slReverse(&level->fileList);
    }

level->trackHash = hashLevelTracks(level);
return level;
}
예제 #13
0
struct bed *wikiTrackGetFilteredBeds(char *name, struct region *regionList,
	struct lm *lm, int *retFieldCount)
/* Get list of beds from the wikiTrack * in current regions and that pass
 *	filters.  You can bedFree this when done.  */
{
struct bed *bedList = NULL;
struct hash *idHash = NULL;
struct bedFilter *bf = NULL;
struct region *region = NULL;

/* Figure out how to filter things. */
bf = bedFilterForCustomTrack(name);
idHash = identifierHash(database, name);

/* Grab filtered beds for each region. */
for (region = regionList; region != NULL; region = region->next)
    wikiTrackFilteredBedOnRegion(region, idHash, bf, lm, &bedList);

/* clean up. */
hashFree(&idHash);
slReverse(&bedList);
return bedList;
}
예제 #14
0
struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn)
/* Get fields of bigBed into hti structure. */
{
/* Get columns in asObject format. */
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);

/* Allocate hTableInfo structure and fill in info about bed fields. */
struct hash *colHash = asColumnHash(as);
struct hTableInfo *hti;
AllocVar(hti);
hti->rootName = cloneString(table);
hti->isPos= TRUE;
fillField(colHash, "chrom", hti->chromField);
fillField(colHash, "chromStart", hti->startField);
fillField(colHash, "chromEnd", hti->endField);
fillField(colHash, "name", hti->nameField);
fillField(colHash, "score", hti->scoreField);
fillField(colHash, "strand", hti->strandField);
fillField(colHash, "thickStart", hti->cdsStartField);
fillField(colHash, "thickEnd", hti->cdsEndField);
fillField(colHash, "blockCount", hti->countField);
fillField(colHash, "chromStarts", hti->startsField);
fillField(colHash, "blockSizes", hti->endsSizesField);
hti->hasCDS = (bbi->definedFieldCount >= 8);
hti->hasBlocks = (bbi->definedFieldCount >= 12);
char type[256];
safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount,
	(bbi->definedFieldCount == bbi->fieldCount ? '.' : '+'));
hti->type = cloneString(type);

freeMem(fileName);
hashFree(&colHash);
bbiFileClose(&bbi);
return hti;
}
예제 #15
0
파일: altSplice.c 프로젝트: bowhan/kent
char *altGraphXMakeImage(struct altGraphX *ag)
/* create a drawing of splicing pattern */
{
MgFont *font = mgSmallFont();
int fontHeight = mgFontLineHeight(font);
struct spaceSaver *ssList = NULL;
struct hash *heightHash = NULL;
int rowCount = 0;
struct tempName gifTn;
int pixWidth = atoi(cartUsualString(cart, "pix", DEFAULT_PIX_WIDTH ));
int pixHeight = 0;
struct hvGfx *hvg;
int lineHeight = 0;
double scale = 0;
Color shadesOfGray[9];
int maxShade = ArraySize(shadesOfGray)-1;

scale = (double)pixWidth/(ag->tEnd - ag->tStart);
lineHeight = 2 * fontHeight +1;
altGraphXLayout(ag, ag->tStart, ag->tEnd, scale, 100, &ssList, &heightHash, &rowCount);
hashFree(&heightHash);
pixHeight = rowCount * lineHeight;
makeTempName(&gifTn, "hgc", ".png");
hvg = hvGfxOpenPng(pixWidth, pixHeight, gifTn.forCgi, FALSE);
makeGrayShades(hvg, maxShade, shadesOfGray);
hvGfxSetClip(hvg, 0, 0, pixWidth, pixHeight);
altGraphXDrawPack(ag, ssList, hvg, 0, 0, pixWidth, lineHeight, lineHeight-1,
                  ag->tStart, ag->tEnd, scale,
		  font, MG_BLACK, shadesOfGray, "Dummy", NULL);
hvGfxUnclip(hvg);
hvGfxClose(&hvg);
printf(
       "<IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d><BR>\n",
       gifTn.forHtml, pixWidth, pixHeight);
return cloneString(gifTn.forHtml);
}
예제 #16
0
파일: nibTwo.c 프로젝트: JinfengChen/pblat
void nibTwoCacheFree(struct nibTwoCache **pNtc)
/* Free up resources associated with nibTwoCache. */
{
struct nibTwoCache *ntc = *pNtc;
if (ntc != NULL)
    {
    freez(&ntc->pathName);
    if (ntc->isTwoBit)
        twoBitClose(&ntc->tbf);
    else
        {
	struct hashEl *el, *list = hashElListHash(ntc->nibHash);
	struct nibInfo *nib;
	for (el = list; el != NULL; el = el->next)
	     {
	     nib = el->val;
	     nibInfoFree(&nib);
	     }
	hashElFreeList(&list);
	hashFree(&ntc->nibHash);
	}
    freez(pNtc);
    }
}
예제 #17
0
파일: bigBed.c 프로젝트: cestmoi7/AGAPE
struct bigBedInterval *bigBedMultiNameQuery(struct bbiFile *bbi, struct bptFile *index,
    int fieldIx, char **names, int nameCount, struct lm *lm)
/* Fetch all records matching any of the names. Using given index on given field.
 * Return list is allocated out of lm. */
{
/* Set up name index and get list of chunks that match any of our names. */
struct fileOffsetSize *fosList = bigBedChunksMatchingNames(bbi, index, names, nameCount);

/* Create hash of all names. */
struct hash *hash = newHash(0);
int nameIx;
for (nameIx=0; nameIx < nameCount; ++nameIx)
    hashAdd(hash, names[nameIx], NULL);


/* Get intervals where name matches hash target. */
struct bigBedInterval *intervalList = bigBedIntervalsMatchingName(bbi, fosList, 
    bbWordIsInHash, fieldIx, hash, lm);

/* Clean up and return results. */
slFreeList(&fosList);
hashFree(&hash);
return intervalList;
}
예제 #18
0
struct gffGroup *splitGroupByChrom(struct gffFile *gff, struct gffGroup *oldGroup)
/* Split up a group into multiple groups,  each one chromosome specific. */
{
struct gffGroup *groupList = NULL, *group;
struct hash *seqHash = hashNew(0);

verbose(2, "Regrouping %s with %d elements\n", oldGroup->name, slCount(oldGroup->lineList));
struct gffLine *gl, *nextGl;
for (gl = oldGroup->lineList; gl != NULL; gl = nextGl)
    {
    nextGl = gl->next;
    group = hashFindVal(seqHash, gl->seq);
    if (group == NULL)
        {
	AllocVar(group);
	group->name = oldGroup->name;
	group->seq = gl->seq;
	group->source = oldGroup->source;
	group->start = gl->start;
	group->end = gl->end;
	group->strand = gl->strand;
	slAddHead(&groupList, group);
	hashAdd(seqHash, group->seq, group);
	}
    else
        {
	group->start = min(gl->start, group->start);
	group->end = max(gl->end, group->end);
	}
    slAddHead(&group->lineList, gl);
    }
hashFree(&seqHash);
for (group = groupList; group != NULL; group = group->next)
    slReverse(&group->lineList);
return groupList;
}
예제 #19
0
void mergeDataAndAlignments()
/** Load up the psls, hash them and transform into beds. */
{
char *pslFileName = NULL;
char *bedOutName = NULL;
char *affyFileName = NULL;
char *expRecordOutName = NULL;
char *expFileName = NULL;
struct hash *pslHash = NULL;
struct bed *bed = NULL;
/* Parse some arguments and make sure they exist. */
pslFileName = optionVal("pslFile", NULL);
if(pslFileName == NULL)
    errAbort("Must specify -pslFile flag. Use -help for usage.");
bedOutName = optionVal("bedOut", NULL);
if(bedOutName == NULL)
    errAbort("Must specify -bedOut flag. Use -help for usage.");
affyFileName = optionVal("affyFile", NULL);
if(affyFileName == NULL)
    errAbort("Must specify -affyFile flag. Use -help for usage.");
expRecordOutName = optionVal("expRecordOut", NULL);
if(expRecordOutName == NULL)
    errAbort("Must specify -expRecordOut flag. Use -help for usage.");
expFileName = optionVal("expFile", NULL);
if(expFileName == NULL)
    errAbort("Must specify -expFile flag. Use -help for usage.");
/* Hash psls according to their name. */
warn("Reading psls from: %s", pslFileName);
pslHash = hashPsls(pslFileName);
warn("Outputing beds:");
outputBedsFromPsls(pslHash, bedOutName, expRecordOutName, affyFileName, expFileName);
warn("\nFreeing Memory.");
hashTraverseVals(pslHash, pslFreeListWrapper);
hashFree(&pslHash);
warn("Done.");
}
예제 #20
0
파일: joiner.c 프로젝트: bowhan/kent
static struct slName *parseDatabaseList(struct lineFile *lf, char *s)
/* Parse out comma-separated list of databases, with 
 * possible !db's. */
{
struct slName *list, *el;
struct hash *notHash;

/* Get comma-separated list. */
list = slNameListFromComma(s);
if (list == NULL)
     errAbort("Empty database name line %d of %s", 
	lf->lineIx, lf->fileName);

/* Remove !'s */
notHash = makeNotHash(list);
if (notHash != NULL)
    {
    struct slName *newList = NULL, *next;
    for (el = list; el != NULL; el = next)
        {
	next = el->next;
	if (el->name[0] != '!' && !hashLookup(notHash, el->name))
	    {
	    slAddHead(&newList, el);
	    }
	else
	    {
	    freeMem(el);
	    }
	}
    hashFree(&notHash);
    slReverse(&newList);
    list = newList;
    }
return list;
}
예제 #21
0
파일: web.c 프로젝트: elmargb/kentUtils
static void printSomeGenomeListHtmlNamedMaybeCheck(char *customOrgCgiName,
	 char *db, struct dbDb *dbList, char *onChangeText, boolean doCheck)
/* Prints to stdout the HTML to render a dropdown list
 * containing a list of the possible genomes to choose from.
 * param db - a database whose genome will be the default genome.
 *                       If NULL, no default selection.
 * param onChangeText - Optional (can be NULL) text to pass in
 *                              any onChange javascript. */
{
char *orgList[1024];
int numGenomes = 0;
struct dbDb *cur = NULL;
struct hash *hash = hashNew(10); // 2^^10 entries = 1024
char *selGenome = hGenomeOrArchive(db);
char *values [1024];
char *cgiName;

for (cur = dbList; cur != NULL; cur = cur->next)
    {
    if (!hashFindVal(hash, cur->genome) &&
	(!doCheck || hDbExists(cur->name)))
        {
        hashAdd(hash, cur->genome, cur);
        orgList[numGenomes] = trackHubSkipHubName(cur->genome);
        values[numGenomes] = cur->genome;
        numGenomes++;
	if (numGenomes >= ArraySize(orgList))
	    internalErr();
        }
    }

cgiName = (customOrgCgiName != NULL) ? customOrgCgiName : orgCgiName;
cgiMakeDropListFull(cgiName, orgList, values, numGenomes,
                    selGenome, onChangeText);
hashFree(&hash);
}
예제 #22
0
void showGenomes(char *genome, struct pcrServer *serverList)
/* Put up drop-down list with genomes on it. */
{
    struct hash *uniqHash = hashNew(8);
    struct pcrServer *server;
    char *onChangeText = "onchange='" ORGFORM_KEEP_PARAMS ORGFORM_KEEP_ORG
                         ORGFORM_RESET_DB
                         ORGFORM_RESET_TARGET
                         ORGFORM_SUBMIT;

    printf("<SELECT NAME=\"org\" %s>\n", onChangeText);
    for (server = serverList; server != NULL; server = server->next)
    {
        if (!hashLookup(uniqHash, server->genome))
        {
            hashAdd(uniqHash, server->genome, NULL);
            printf("  <OPTION%s VALUE=\"%s\">%s</OPTION>\n",
                   (sameWord(genome, server->genome) ? " SELECTED" : ""),
                   server->genome, server->genome);
        }
    }
    printf("</SELECT>\n");
    hashFree(&uniqHash);
}
예제 #23
0
void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f)
/* Print out selected fields from Big Bed.  If fields is NULL, then print out all fields. */
{
if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = bigBedGetFields(table, conn);
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    hashAddInt(fieldHash, bb->name, i);

// If bigBed has name column, look up pasted/uploaded identifiers if any:
struct hash *idHash = NULL;
if (slCount(bbList) >= 4)
    idHash = identifierHash(db, table);

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

/* Output row of labels */
fprintf(f, "#%s", fieldArray[0]);
for (i=1; i<fieldCount; ++i)
    fprintf(f, "\t%s", fieldArray[i]);
fprintf(f, "\n");

/* Open up bigBed file. */
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);
struct asFilter *filter = NULL;

if (anyFilter())
    {
    filter = asFilterFromCart(cart, db, table, as);
    if (filter)
        {
	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	}
    }

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
for (region = regionList; region != NULL; region = region->next)
    {
    struct lm *lm = lmInit(0);
    struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom,
    	region->start, region->end, 0, lm);
    char *row[bbi->fieldCount];
    char startBuf[16], endBuf[16];
    for (iv = ivList; iv != NULL; iv = iv->next)
        {
	bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
	if (asFilterOnRow(filter, row))
	    {
	    if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL))
		continue;
	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", row[columnArray[i]]);
	    fprintf(f, "\n");
	    }
	}
    lmCleanup(&lm);
    }

/* Clean up and exit. */
bbiFileClose(&bbi);
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}
예제 #24
0
파일: bam.c 프로젝트: maximilianh/kent
void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f)
/* Print out selected fields from BAM.  If fields is NULL, then print out all fields. */
{
struct hTableInfo *hti = NULL;
hti = getHti(db, table, conn);
struct hash *idHash = NULL;
char *idField = getIdField(db, curTrack, table, hti);
int idFieldNum = 0;

/* if we know what field to use for the identifiers, get the hash of names */
if (idField != NULL)
    idHash = identifierHash(db, table);

if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = bamGetFields();
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    {
    /* if we know the field for identifiers, save it away */
    if ((idField != NULL) && sameString(idField, bb->name))
	idFieldNum = i;
    hashAddInt(fieldHash, bb->name, i);
    }

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

/* Output row of labels */
fprintf(f, "#%s", fieldArray[0]);
for (i=1; i<fieldCount; ++i)
    fprintf(f, "\t%s", fieldArray[i]);
fprintf(f, "\n");

struct asObject *as = bamAsObj();
struct asFilter *filter = NULL;

if (anyFilter())
    {
    filter = asFilterFromCart(cart, db, table, as);
    if (filter)
        {
	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	}
    }

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();

int maxOut = bigFileMaxOutput();
for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
    {
    struct lm *lm = lmInit(0);
    char *fileName = bamFileName(table, conn, region->chrom);
    struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom,
    	region->start, region->end, lm);
    char *row[SAMALIGNMENT_NUM_COLS];
    char numBuf[BAM_NUM_BUF_SIZE];
    for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next)
        {
	samAlignmentToRow(sam, numBuf, row);
	if (asFilterOnRow(filter, row))
	    {
	    /* if we're looking for identifiers, check if this matches */
	    if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL))
		continue;

	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", row[columnArray[i]]);
	    fprintf(f, "\n");
	    maxOut --;
	    }
	}
    freeMem(fileName);
    lmCleanup(&lm);
    }

if (maxOut == 0)
    warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput());
/* Clean up and exit. */
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}
void freeRbmTreeHash(struct hash **pTreeHash)
/* Free up a whole hash of rbmTrees of ranges. */
{
hashTraverseEls(*pTreeHash, hashElFreeRbmTree);
hashFree(pTreeHash);
}
예제 #26
0
static struct mafAli *mafFromBed12(char *database, char *track,
    struct bed *bed, struct slName *orgList)
/* Construct a maf out of exons in bed. */
{
/* Loop through all block in bed, collecting a list of mafs, one
 * for each block.  While we're at make a hash of all species seen. */
struct hash *speciesHash = hashNew(0);
struct mafAli *mafList = NULL, *maf, *bigMaf;
struct mafComp *comp, *bigComp;
int totalTextSize = 0;
int i;
for (i=0; i<bed->blockCount; ++i)
    {
    int start = bed->chromStart + bed->chromStarts[i];
    int end = start + bed->blockSizes[i];
    if (thickOnly)
        {
	start = max(start, bed->thickStart);
	end = min(end, bed->thickEnd);
	}
    if (start < end)
        {
	maf = hgMafFrag(database, track, bed->chrom, start, end, '+',
	   database, NULL);
	slAddHead(&mafList, maf);
	for (comp = maf->components; comp != NULL; comp = comp->next)
	    hashStore(speciesHash, comp->src);
	totalTextSize += maf->textSize; 
	}
    }
slReverse(&mafList);

/* Add species in order list too */
struct slName *org;
for (org = orgList; org != NULL; org = org->next)
    hashStore(speciesHash, org->name);

/* Allocate memory for return maf that contains all blocks concatenated together. 
 * Also fill in components with any species seen at all. */
AllocVar(bigMaf);
bigMaf->textSize = totalTextSize;
struct hashCookie it = hashFirst(speciesHash);
struct hashEl *hel;
while ((hel = hashNext(&it)) != NULL)
    {
    AllocVar(bigComp);
    bigComp->src = cloneString(hel->name);
    bigComp->text = needLargeMem(totalTextSize + 1);
    memset(bigComp->text, '.', totalTextSize);
    bigComp->text[totalTextSize] = 0;
    bigComp->strand = '+';
    bigComp->srcSize = totalTextSize;	/* It's safe if a bit of a lie. */
    hel->val = bigComp;
    slAddHead(&bigMaf->components, bigComp);
    }

/* Loop through maf list copying in data. */
int textOffset = 0;
for (maf = mafList; maf != NULL; maf = maf->next)
    {
    for (comp = maf->components; comp != NULL; comp = comp->next)
        {
	bigComp = hashMustFindVal(speciesHash, comp->src);
	memcpy(bigComp->text + textOffset, comp->text, maf->textSize);
	bigComp->size += comp->size;
	}
    textOffset += maf->textSize;
    }

/* Cope with strand of darkness. */
if (bed->strand[0] == '-')
    {
    for (comp = bigMaf->components; comp != NULL; comp = comp->next)
	reverseComplement(comp->text, bigMaf->textSize);
    }

/* If got an order list then reorder components according to it. */
if (orgList != NULL)
    {
    struct mafComp *newList = NULL;
    for (org = orgList; org != NULL; org = org->next)
        {
	comp = hashMustFindVal(speciesHash, org->name);
	slAddHead(&newList, comp);
	}
    slReverse(&newList);
    bigMaf->components = newList;
    }

/* Rename our own component to bed name */
comp = hashMustFindVal(speciesHash, database);
freeMem(comp->src);
comp->src = cloneString(bed->name);


/* Clean up and go home. */
hashFree(&speciesHash);
mafAliFreeList(&mafList);
return bigMaf;
}
예제 #27
0
파일: pfam.c 프로젝트: blumroy/kentUtils
static struct genePos *pfamAdvFilter(struct column *col, 
	struct sqlConnection *defaultConn, struct genePos *list)
/* Do advanced filter on for pfam. */
{
char *terms = advFilterVal(col, "terms");
if (terms != NULL)
    {
    struct sqlConnection *conn = sqlConnect(col->protDb);
    char query[256];
    struct sqlResult *sr;
    struct dyString *dy = newDyString(1024);
    char **row;
    boolean orLogic = advFilterOrLogic(col, "logic", TRUE);
    struct slName *term, *termList = stringToSlNames(terms);
    struct hash *passHash = newHash(17);
    struct hash *prevHash = NULL;
    struct genePos *gp;

    /* Build up hash of all genes. */
    struct hash *geneHash = newHash(18);
    for (gp = list; gp != NULL; gp = gp->next)
        hashAdd(geneHash, gp->name, gp);
    for (term = termList; term != NULL; term = term->next)
        {
	/* Build up a list of IDs of descriptions that match term. */
	struct slName *idList = NULL, *id;
	if (isPfamId(term->name))
	    {
	    idList = slNameNew(term->name);
	    }
	else
	    {
	    char *sqlWild = sqlLikeFromWild(term->name);
	    sqlSafef(query, sizeof(query),
	    	"select pfamAC from pfamDesc where description like '%s'",
		sqlWild);
	    sr = sqlGetResult(conn, query);
	    while ((row = sqlNextRow(sr)) != NULL)
		{
	        id = slNameNew(row[0]);
		slAddHead(&idList, id);
		}
	    sqlFreeResult(&sr);
	    }

	if (idList != NULL)
	    {
	    /* Build up query that includes all IDs. */
	    dyStringClear(dy);
	    sqlDyStringPrintf(dy, "select name from %s where ", col->table);
	    sqlDyStringPrintf(dy, "value='%s'", idList->name);
	    for (id = idList->next; id != NULL; id = id->next)
		sqlDyStringPrintf(dy, "or value='%s'", id->name);

	    /* Execute query and put matchers into hash. */
	    sr = sqlGetResult(defaultConn, dy->string);
	    while ((row = sqlNextRow(sr)) != NULL)
		{
		gp = hashFindVal(geneHash, row[0]);
		if (gp != NULL)
		    {
		    char *name = gp->name;
		    if (prevHash == NULL || hashLookup(prevHash, name) != NULL)
			hashStore(passHash, name);
		    }
		}
	    sqlFreeResult(&sr);
	    slFreeList(&idList);
	    }
	if (!orLogic)
	    {
	    hashFree(&prevHash);
	    if (term->next != NULL)
		{
		prevHash = passHash;
		passHash = newHash(17);
		}
	    }
	}
    list = weedUnlessInHash(list, passHash);
    hashFree(&prevHash);
    hashFree(&passHash);
    dyStringFree(&dy);
    sqlDisconnect(&conn);
    }
return list;
}
예제 #28
0
파일: bedTrack.c 프로젝트: maximilianh/kent
static void filterBed(struct track *tg, struct linkedFeatures **pLfList)
/* Apply filters if any to mRNA linked features. */
{
struct linkedFeatures *lf, *next, *newList = NULL, *oldList = NULL;
struct mrnaUiData *mud = tg->extraUiData;
struct mrnaFilter *fil;
char *type;
boolean anyFilter = FALSE;
boolean colorIx = 0;
boolean isExclude = FALSE;
boolean andLogic = TRUE;

if (*pLfList == NULL || mud == NULL)
    return;

/* First make a quick pass through to see if we actually have
 * to do the filter. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    fil->pattern = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, fil->suffix, "");
    if (fil->pattern[0] != 0)
        anyFilter = TRUE;
    }
if (!anyFilter)
    return;

type = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, mud->filterTypeSuffix, "red");
if (sameString(type, "exclude"))
    isExclude = TRUE;
else if (sameString(type, "include"))
    isExclude = FALSE;
else
    colorIx = getFilterColor(type, MG_BLACK);
type = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, mud->logicTypeSuffix, "and");
andLogic = sameString(type, "and");

/* Make a pass though each filter, and start setting up search for
 * those that have some text. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    if (fil->pattern[0] != 0) // Already retrieved above.
	fil->hash = newHash(10);
    }

/* Scan tables id/name tables to build up hash of matching id's. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    struct hash *hash = fil->hash;
    int wordIx, wordCount;
    char *words[128];

    if (hash != NULL)
	{
	boolean anyWild;
	char *dupPat = cloneString(fil->pattern);
	wordCount = chopLine(dupPat, words);
	for (wordIx=0; wordIx <wordCount; ++wordIx)
	    {
	    char *pattern = cloneString(words[wordIx]);
	    if (lastChar(pattern) != '*')
		{
		int len = strlen(pattern)+1;
		pattern = needMoreMem(pattern, len, len+1);
		pattern[len-1] = '*';
		}
	    anyWild = (strchr(pattern, '*') != NULL || strchr(pattern, '?') != NULL);
	    touppers(pattern);
	    for(lf = *pLfList; lf != NULL; lf=lf->next)
		{
		char copy[SMALLBUF];
		boolean gotMatch;
		safef(copy, sizeof(copy), "%s", lf->name);
		touppers(copy);
		if (anyWild)
		    gotMatch = wildMatch(pattern, copy);
		else
		    gotMatch = sameString(pattern, copy);
		if (gotMatch)
		    {
		    hashAdd(hash, lf->name, NULL);
		    }
		}
	    freez(&pattern);
	    }
	freez(&dupPat);
	}
    }

/* Scan through linked features coloring and or including/excluding ones that
 * match filter. */
for (lf = *pLfList; lf != NULL; lf = next)
    {
    boolean passed = andLogic;
    next = lf->next;
    for (fil = mud->filterList; fil != NULL; fil = fil->next)
	{
	if (fil->hash != NULL)
	    {
	    if (hashLookup(fil->hash, lf->name) == NULL)
		{
		if (andLogic)
		    passed = FALSE;
		}
	    else
		{
		if (!andLogic)
		    passed = TRUE;
		}
	    }
	}
    if (passed ^ isExclude)
	{
	slAddHead(&newList, lf);
	if (colorIx > 0)
	    lf->filterColor = colorIx;
	}
    else
        {
	slAddHead(&oldList, lf);
	}
    }

slReverse(&newList);
slReverse(&oldList);
if (colorIx > 0)
   {
   /* Draw stuff that passes filter first in full mode, last in dense. */
   if (tg->visibility == tvDense)
       {
       newList = slCat(oldList, newList);
       }
   else
       {
       newList = slCat(newList, oldList);
       }
   }
*pLfList = newList;
tg->limitedVisSet = FALSE;	/* Need to recalculate this after filtering. */

/* Free up hashes, etc. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    hashFree(&fil->hash);
    }
}
예제 #29
0
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, 
	struct hash *chromSizeHash, struct lm *lm, 
	int itemsPerSlot, struct bwgSection **pSectionList)
/* Parse out bedGraph section until we get to something that is not in bedGraph format. */
{
/* Set up hash and list to store chromosomes. */
struct hash *chromHash = hashNew(0);
struct bedGraphChrom *chrom, *chromList = NULL;

/* Collect lines in items on appropriate chromosomes. */
struct bwgBedGraphItem *item;
char *line;
while (lineFileNextReal(lf, &line))
    {
    /* Check for end of section. */
    if (stepTypeLine(line))
        {
	lineFileReuse(lf);
	break;
	}

    /* Parse out our line and make sure it has exactly 4 columns. */
    char *words[5];
    int wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 4, wordCount);

    /* Get chromosome. */
    char *chromName = words[0];
    chrom = hashFindVal(chromHash, chromName);
    if (chrom == NULL)
        {
	lmAllocVar(chromHash->lm, chrom);
	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
	chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
	slAddHead(&chromList, chrom);
	}

    /* Convert to item and add to chromosome list. */
    lmAllocVar(lm, item);
    item->start = lineFileNeedNum(lf, words, 1);
    item->end = lineFileNeedNum(lf, words, 2);
    item->val = lineFileNeedDouble(lf, words, 3);

    /* Do sanity checking on coordinates. */
    if (item->start > item->end)
        errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", 
		item->start, item->end, lf->lineIx, lf->fileName);
    if (item->end > chrom->size)
	{
        warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u",
	        lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&chrom->itemList, item);
	}
    }
slSort(&chromList, bedGraphChromCmpName);

/* Loop through each chromosome and output the item list, broken into sections
 * for that chrom. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    slSort(&chrom->itemList, bwgBedGraphItemCmp);

    /* Check to make sure no overlap between items. */
    struct bwgBedGraphItem *item = chrom->itemList, *nextItem;
    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
        {
	if (item->end > nextItem->start)
	    errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again",
	        chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end);
	item = nextItem;
	}

    /* Break up into sections of no more than items-per-slot size. */
    struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
    for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
	{
	/* Find end item of this section, and start item for next section.
	 * Terminate list at end item. */
	int sectionSize = 0;
	int i;
	endItem = startItem;
	for (i=0; i<itemsPerSlot; ++i)
	    {
	    if (nextStartItem == NULL)
		break;
	    endItem = nextStartItem;
	    nextStartItem = nextStartItem->next;
	    ++sectionSize;
	    }
	endItem->next = NULL;

	/* Fill in section and add it to section list. */
	struct bwgSection *section;
	lmAllocVar(lm, section);
	section->chrom = cloneString(chrom->name);
	section->start = startItem->start;
	section->end = endItem->end;
	section->type = bwgTypeBedGraph;
	section->items.bedGraphList = startItem;
	section->itemCount = sectionSize;
	slAddHead(pSectionList, section);
	}
    }

/* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in 
 * hash's memory. */
hashFree(&chromHash);
chromList = NULL;
}
예제 #30
0
void freeFreqHash(struct hash **pFreqHash)
/* Free up the hash we created. */
{
hashTraverseEls(*pFreqHash, hashElSlPairListFree);
hashFree(pFreqHash);
}