예제 #1
0
int main(int argc, char *argv[])
/* read ContigLocFilter, writing to individual chrom tables */
{
struct hashCookie cookie;
struct hashEl *hel;
char *chromName;

if (argc != 3)
    usage();

snpDb = argv[1];
contigGroup = argv[2];
hSetDb(snpDb);

/* check for needed tables */
if(!hTableExistsDb(snpDb, "ContigLocFilter"))
    errAbort("no ContigLocFilter table in %s\n", snpDb);
if(!hTableExistsDb(snpDb, "ContigInfo"))
    errAbort("no ContigInfo table in %s\n", snpDb);

chromHash = loadChroms(contigGroup);
if (chromHash == NULL) 
    {
    verbose(1, "couldn't get chrom info\n");
    return 1;
    }

writeSplitTables();

verbose(1, "closing files...\n");
cookie = hashFirst(chromHash);
while (hel = hashNext(&cookie))
    fclose(hel->val);

verbose(1, "creating tables...\n");
cookie = hashFirst(chromHash);
while ((chromName = hashNextName(&cookie)) != NULL)
    createTable(chromName);

verbose(1, "loading database...\n");
cookie = hashFirst(chromHash);
while ((chromName = hashNextName(&cookie)) != NULL)
    {
    verbose(1, "chrom = %s\n", chromName);
    loadDatabase(chromName);
    }

return 0;
}
예제 #2
0
char *identifierWhereClause(char *idField, struct hash *idHash)
/* If the number of pasted IDs is reasonably low, return a where-clause component for the IDs. */
{
if (idHash == NULL || idField == NULL)
    return NULL;
int numIds = hashNumEntries(idHash);
int maxIdsInWhere = cartUsualInt(cart, "hgt_maxIdsInWhere", DEFAULT_MAX_IDS_IN_WHERE);
if (numIds > 0 && numIds <= maxIdsInWhere)
    {
    struct dyString *dy = dyStringNew(16 * numIds);
    dyStringPrintf(dy, "%s in (", idField);
    struct hashCookie hc = hashFirst(idHash);
    boolean first = TRUE;
    char *id;
    while ((id = hashNextName(&hc)) != NULL)
	{
	if (first)
	    first = FALSE;
	else
	    dyStringAppend(dy, ", ");
	dyStringPrintf(dy, "'%s'", id);
	}
    dyStringAppend(dy, ")");
    return dyStringCannibalize(&dy);
    }
return NULL;
}
void doLog()
{
FILE *logFileHandle = mustOpen("snpGetSeqDup.log", "w");
struct hashCookie cookie = hashFirst(uniqHash);
char *rsId = NULL;
int count = 0;
struct hashEl *hel = NULL;
char *fileName = NULL;
struct dyString *dy = newDyString(1024);

while ((rsId = hashNextName(&cookie)) != NULL)
    {
    count = 0;
    for (hel = hashLookup(snpHash, rsId); hel != NULL; hel = hashLookupNext(hel))
	count++;
    if (count == 1) continue;
    for (hel = hashLookup(snpHash, rsId); hel != NULL; hel = hashLookupNext(hel))
        {
	fileName = (char *)hel->val;
	dyStringAppend(dy, fileName);
	dyStringAppend(dy, " ");
	}
    fprintf(logFileHandle, "%s\t%s\n", rsId, dy->string);
    dyStringClear(dy);
    }

carefulClose(&logFileHandle);
}
예제 #4
0
void writeResults()
/* loop through nameHash */
/* print all coords from coordHash to outputFileHandle */
/* also print count per SNP to logFileHandle */
{
struct hashCookie cookie;
struct hashEl *hel= NULL;
struct coords *cel = NULL;
int count = 0;
char *name;

verbose(1, "writing results...\n");
cookie = hashFirst(nameHash);
while ((name = hashNextName(&cookie)) != NULL)
    {
    count = 0;
    for (hel = hashLookup(coordHash, name); hel != NULL; hel= hashLookupNext(hel))
        {
	cel = (struct coords *)hel->val;
	fprintf(outputFileHandle, "%s\t%d\t%d\t%s\tMultipleAlignments\n", cel->chrom, cel->start, cel->end, name);
	count++;
	}
    fprintf(logFileHandle, "%s\t%d\n", name, count);
    }
}
예제 #5
0
int main(int argc, char *argv[])
{

char *snpDb = NULL;
char *snpTableName = NULL;
struct hashCookie cookie;
char *chromName = NULL;

if (argc != 3)
    usage();

snpDb = argv[1];
hSetDb(snpDb);

snpTableName = argv[2];

/* check that tables exist */
if (!hTableExists(snpTableName))
    errAbort("no %s table in %s\n", snpTableName, snpDb);
if (!hTableExists("chromInfo"))
    errAbort("no chromInfo table in %s\n", snpDb);

loadChroms();
getSnps(snpTableName);

verbose(1, "creating tables...\n");
cookie = hashFirst(chromHash);
while ((chromName = hashNextName(&cookie)) != NULL)
    createTable(chromName);

verbose(1, "loading database...\n");
cookie = hashFirst(chromHash);
while ((chromName = hashNextName(&cookie)) != NULL)
    {
    verbose(1, "chrom = %s\n", chromName);
    loadDatabase(chromName);
    }

return 0;
}
예제 #6
0
void checkIds(char *inputFileName, char *outputFileName)
/* report if duplicate ID found */
/* put all ids in idHash */
{
struct chain *chainEl;
struct lineFile *lf = lineFileOpen(inputFileName, TRUE);
FILE *outputFileHandle = NULL;
char idString[64];
char *idString2 = NULL;
struct hashEl *hel = NULL;
struct hashEl *hel2 = NULL;
int chainCount = 0;
int dupCount = 0;
struct hashCookie cookie;

idHash = newHash(0);
duplicateHash = newHash(0);
while ((chainEl = chainRead(lf)) != NULL)
    {
    chainCount++;
    safef(idString, sizeof(idString), "%d", chainEl->id);
    hel = hashLookup(idHash, idString);
    if (hel == NULL)
        hashAdd(idHash, cloneString(idString), NULL);
    else
        {
	hel2 = hashLookup(duplicateHash, idString);
	if (hel2 == NULL)
	    hashAdd(duplicateHash, cloneString(idString), NULL);
	}
    }
verbose(1, "chain count = %d\n", chainCount);
// freeHash(&idHash);

/* print contents of duplicateHash */
outputFileHandle = mustOpen(outputFileName, "w");
cookie = hashFirst(duplicateHash);
while ((idString2 = hashNextName(&cookie)) != NULL)
    {
    dupCount++;
    fprintf(outputFileHandle, "%s\n", idString2);
    }
verbose(1, "count of duplicate IDs = %d\n", dupCount);
carefulClose(&outputFileHandle);
// freeHash(&duplicateHash);
}
void processSnps(char *oldTableName, char *newTableName)
{
static struct hash *contigLocHash = NULL;
static struct hash *mapInfoHash = NULL;
static struct hash *oldNameHash = NULL;
static struct hash *newNameHash = NULL;

struct hashCookie cookie;
struct hashEl *helNameNew = NULL;
struct hashEl *contigLocElement = NULL;
struct hashEl *mapInfoElement = NULL;
char *name;
FILE *outputFileHandle = mustOpen("snpMissing.out", "w");

int count = 0;

verbose(1, "creating hashes...\n");
contigLocHash = getUniqueStringHash("snp_id", "ContigLoc");
mapInfoHash = getUniqueStringHash("snp_id", "MapInfo");
oldNameHash = getUniqueStringHash("name", oldTableName);
newNameHash = getUniqueStringHash("name", newTableName);

verbose(1, "writing results...\n");
cookie = hashFirst(oldNameHash);
while ((name = hashNextName(&cookie)) != NULL)
    {
    count++;
    helNameNew = hashLookup(newNameHash, name);
    if (helNameNew == NULL)
        {
	fprintf(outputFileHandle, "rs%s in %s but not in %s\n", name, oldTableName, newTableName);
        contigLocElement = hashLookup(contigLocHash, name);
	if (contigLocElement != NULL)
	    fprintf(outputFileHandle, "found in ContigLoc\n");
        mapInfoElement = hashLookup(mapInfoHash, name);
	if (mapInfoElement != NULL)
	    fprintf(outputFileHandle, "found in MapInfo\n");
	}

    if (count == 100000) break;
    }
carefulClose(&outputFileHandle);
}
예제 #8
0
struct locus *readSs(char *pbFile, char *strandFile)
/* determine which allele matches assembly and store in details file */
{
struct hash *strandHash = readStrand(strandFile);
struct strand *strand = NULL;
struct hash *missingHugoIdHash = newHash(16);
struct hashCookie hashPtr;
char *missingName;
struct locus     *l  = NULL, *lPtr = NULL;
struct alleleInfo *aPtr = NULL;
struct lineFile  *lf = lineFileOpen(pbFile, TRUE); /* input file */
char             *row[4], *row2[3]; /* number of fields in input file */
char  *pbName;
char   chrom[32];
int    chromStart;
int    chromEnd;
char   name[32];
char  *allele;

while (lineFileRow(lf, row)) /* process one snp at a time */
    {
    struct alleleInfo *ai1 = NULL, *ai2 = NULL, *aiPtr;
    struct locus *m        = NULL;

    chopString(row[0], "-", row2, 3);
    chromStart = sqlUnsigned(row2[0]);
    chromEnd   = chromStart+1;
    safef(chrom, sizeof(chrom), "chr%s", row2[2]);

    if(l==NULL||l->chrom==NULL||l->chromStart!=chromStart||!(sameString(l->chrom,chrom)))
	{
	AllocVar(m);
	safef(name, sizeof(name), "%s_%d", row2[1], ++ssnpId);
	m->chrom       = cloneString(chrom);
	m->chromStart  = chromStart;
	m->chromEnd    = chromEnd;
	m->name        = cloneString(name);
	m->hugoId      = cloneString(row2[1]);
	m->strictSnp   = TRUE;
	slAddHead(&l, m);
	}

    allele=cloneString(row[2]);
    convertToUppercase(allele);
    if ( sameString(allele,"A") || sameString(allele,"C") || 
	 sameString(allele,"G") || sameString(allele,"T") )
	{
	for (aiPtr=l->alleles; aiPtr!=NULL; aiPtr=aiPtr->next)
	    if (sameString(aiPtr->allele, allele))
		break;
	if (aiPtr==NULL)
	    {
	    AllocVar(ai1);
	    ai1->allele=cloneString(allele);
	    slAddHead(&(l->alleles), ai1);
	    l->alleleCount++;
	    aiPtr=l->alleles;
	    }
	aiPtr->count++;
	l->sampleSize++;
	}

    allele=cloneString(row[3]);
    convertToUppercase(allele);
    if ( sameString(allele,"A") || sameString(allele,"C") || 
	 sameString(allele,"G") || sameString(allele,"T") )
	{
	for (aiPtr=l->alleles; aiPtr!=NULL; aiPtr=aiPtr->next)
	    if (sameString(aiPtr->allele, allele))
		break;
	if (aiPtr==NULL)
	    {
	    AllocVar(ai2);
	    ai2->allele=cloneString(allele);
	    slAddHead(&(l->alleles), ai2);
	    l->alleleCount++;
	    aiPtr=l->alleles;
	    }
	aiPtr->count++;
	l->sampleSize++;
	}
    }
slReverse(&l);
for(lPtr=l; lPtr!=NULL; lPtr=lPtr->next)
    {
    strand = hashFindVal(strandHash, lPtr->hugoId);
    if (strand == NULL)
	{
	hashStore(missingHugoIdHash, lPtr->hugoId);
	slRemoveEl(l, lPtr);
	continue;
	}
    lPtr->strand = cloneString(strand->strand);
    }
freeHash(&strandHash);
hashPtr = hashFirst(missingHugoIdHash);
while ( (missingName=hashNextName(&hashPtr)) != NULL )
    printf("HUGO ID was not found in strand.txt (usually from proteome.hgncXref): %s\n", missingName);
freeHash(&missingHugoIdHash);
return l;
}
예제 #9
0
파일: mafClick.c 프로젝트: davidhoover/kent
static void mafOrAxtClick2(struct sqlConnection *conn, struct sqlConnection *conn2, struct trackDb *tdb, char *axtOtherDb, char *fileName)
/* Display details for MAF or AXT tracks. */
{
hgBotDelay();
if (winEnd - winStart > 30000)
    {
    printf("Zoom so that window is 30,000 bases or less to see alignments and conservation statistics\n");
    }
else
    {
    struct mafAli *mafList = NULL, *maf, *subList = NULL;
    int aliIx = 0, realCount = 0;
    char dbChrom[64];
    char option[128];
    char *capTrack;
    struct consWiggle *consWig, *consWiggles;
    struct hash *speciesOffHash = NULL;
    char *speciesOrder = NULL;
    char *speciesTarget = trackDbSetting(tdb, SPECIES_TARGET_VAR);
    char buffer[1024];
    int useTarg = FALSE;
    int useIrowChains = FALSE;

    safef(option, sizeof(option), "%s.%s", tdb->track, MAF_CHAIN_VAR);
    if (cartCgiUsualBoolean(cart, option, FALSE) &&
	trackDbSetting(tdb, "irows") != NULL)
	    useIrowChains = TRUE;

    safef(buffer, sizeof(buffer), "%s.vis",tdb->track);
    if (useIrowChains)
	{
	if (!cartVarExists(cart, buffer) && (speciesTarget != NULL))
	    useTarg = TRUE;
	else
	    {
	    char *val;

	    val = cartUsualString(cart, buffer, "useCheck");
            useTarg = sameString("useTarg",val);
            }
        }

    if (sameString(tdb->type, "bigMaf"))
        {
        char *fileName = trackDbSetting(tdb, "bigDataUrl");
        struct bbiFile *bbi = bigBedFileOpen(fileName);
        mafList = bigMafLoadInRegion(bbi, seqName, winStart, winEnd);
        }
    else
        mafList = mafOrAxtLoadInRegion2(conn,conn2, tdb, seqName, winStart, winEnd,
                                        axtOtherDb, fileName);
    safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), seqName);

    safef(option, sizeof(option), "%s.speciesOrder", tdb->track);
    speciesOrder = cartUsualString(cart, option, NULL);
    if (speciesOrder == NULL)
	speciesOrder = trackDbSetting(tdb, "speciesOrder");

    for (maf = mafList; maf != NULL; maf = maf->next)
        {
        int mcCount = 0;
        struct mafComp *mc;
        struct mafAli *subset;
        struct mafComp *nextMc;

        /* remove empty components and configured off components
         * from MAF, and ignore
         * the entire MAF if all components are empty
         * (solely for gap annotation) */

        if (!useTarg)
            {
            for (mc = maf->components->next; mc != NULL; mc = nextMc)
		{
		char buf[64];
                char *organism;
		mafSrcDb(mc->src, buf, sizeof buf);
                organism = hOrganism(buf);
                if (!organism)
                    organism = buf;
		nextMc = mc->next;
		safef(option, sizeof(option), "%s.%s", tdb->track, buf);
		if (!cartUsualBoolean(cart, option, TRUE))
		    {
		    if (speciesOffHash == NULL)
			speciesOffHash = newHash(4);
		    hashStoreName(speciesOffHash, organism);
		    }
		if (!cartUsualBoolean(cart, option, TRUE))
		    slRemoveEl(&maf->components, mc);
		else
		    mcCount++;
		}
	    }
        if (mcCount == 0)
            continue;

	if (speciesOrder)
	    {
	    int speciesCt;
	    char *species[2048];
	    struct mafComp **newOrder, *mcThis;
	    int i;

	    mcCount = 0;
	    speciesCt = chopLine(cloneString(speciesOrder), species);
	    newOrder = needMem((speciesCt + 1) * sizeof (struct mafComp *));
	    newOrder[mcCount++] = maf->components;

	    for (i = 0; i < speciesCt; i++)
		{
		if ((mcThis = mafMayFindCompSpecies(maf, species[i], '.')) == NULL)
		    continue;
		newOrder[mcCount++] = mcThis;
		}

	    maf->components = NULL;
	    for (i = 0; i < mcCount; i++)
		{
		newOrder[i]->next = 0;
		slAddHead(&maf->components, newOrder[i]);
		}

	    slReverse(&maf->components);
	    }
	subset = mafSubsetE(maf, dbChrom, winStart, winEnd, TRUE);
	if (subset != NULL)
	    {
	    /* Reformat MAF if needed so that sequence from current
	     * database is the first component and on the
	     * plus strand. */
	    mafMoveComponentToTop(subset, dbChrom);
	    if (subset->components->strand == '-')
		mafFlipStrand(subset);
	    subset->score = mafScoreMultiz(subset);
	    slAddHead(&subList, subset);
	    ++realCount;
	    }
	}
    slReverse(&subList);
    mafAliFreeList(&mafList);
    if (subList != NULL)
	{
	char *showVarName = "hgc.showMultiBase";
	char *showVarVal = cartUsualString(cart, showVarName, "all");
	boolean onlyDiff = sameWord(showVarVal, "diff");
#ifdef ADDEXONCAPITAL
	char *codeVarName = "hgc.multiCapCoding";
	char *codeVarVal = cartUsualString(cart, codeVarName, "coding");
	boolean onlyCds = sameWord(codeVarVal, "coding");
#endif
        /* add links for conservation score statistics */
        consWiggles = wigMafWiggles(database, tdb);
        int wigCount = slCount(consWiggles);
        if (wigCount == 1)
            {
            conservationStatsLink(tdb, "Conservation score statistics", consWiggles->table);
            }
        else if (wigCount > 1)
            {
            /* multiple wiggles. List all that have been turned on with
             * checkboxes */

            /* Scan for cart variables -- do any exist, are any turned on ? */
            boolean wigSet = FALSE;
            boolean wigOn = FALSE;
            for (consWig = consWiggles; consWig != NULL;
                        consWig = consWig->next)
                {
                char *wigVarSuffix = NULL;
                (void)wigMafWiggleVar(tdb->track, consWig, &wigVarSuffix);
                if (cartVarExistsAnyLevel(cart, tdb, FALSE, wigVarSuffix))
                    {
                    wigSet = TRUE;
                    if (cartBooleanClosestToHome(cart, tdb, FALSE, wigVarSuffix))
                        wigOn = TRUE;
                    }
                }
            /* If there are no cart vars, turn on the first (default) wig */
            if (!wigSet)
                {
                char *prefix = tdb->track; // use when setting things to the cart
                if (tdbIsContainerChild(tdb))
                    prefix = tdbGetContainer(tdb)->track;

                cartSetBoolean(cart, wigMafWiggleVar(prefix, consWiggles, NULL), TRUE);
                wigOn = TRUE;
                }
            if (wigOn)
                {
                boolean first = TRUE;
                for (consWig = consWiggles; consWig != NULL;
                            consWig = consWig->next)
                    {
                    if (first)
                        {
                        printf("Conservation score statistics:");
                        first = FALSE;
                        }
                    char *wigVarSuffix = NULL;
                    (void)wigMafWiggleVar(tdb->track, consWig, &wigVarSuffix);
                    if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, wigVarSuffix,FALSE))
                        {
                        printf("&nbsp;&nbsp;");
                        subChar(consWig->uiLabel, '_', ' ');
                        conservationStatsLink(tdb,
                            consWig->uiLabel, consWig->table);
                        }
                    }
                }
            }
        puts("</P>\n");

        /* no alignment to display when in visibilities where only wiggle is shown */
        char *vis = cartOptionalString(cart, tdb->track);
        if (vis)
            {
            enum trackVisibility tv = hTvFromStringNoAbort(vis);
            if (tv == tvSquish || tv == tvDense)
                return;
            }

#ifdef ADDEXONCAPITAL
	puts("<FORM ACTION=\"../cgi-bin/hgc\" NAME=\"gpForm\" METHOD=\"GET\">");
	cartSaveSession(cart);
	cgiContinueHiddenVar("g");
	cgiContinueHiddenVar("c");
	cgiContinueHiddenVar("i");
	printf("Capitalize ");
        cgiMakeDropListFull(codeVarName, codeAll, codeAll,
	    ArraySize(codeAll), codeVarVal, autoSubmit);
	printf("exons based on ");
        capTrack = genePredDropDown(cart, trackHash,
                                       "gpForm", "hgc.multiCapTrack");
#endif
	printf("show ");
        cgiMakeDropListFull(showVarName, showAll, showAll,
	    ArraySize(showAll), showVarVal, autoSubmit);
	printf("bases");
	printf("<BR>\n");
	printf("</FORM>\n");

#ifdef REVERSESTRAND
        /* notify if bases are complemented (hgTracks is on reverse strand) */
        if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE))
            puts("<EM>Alignment displayed on reverse strand</EM><BR>");
#endif
	puts("Place cursor over species for alignment detail. Click on 'B' to link to browser ");
	puts("for aligned species, click on 'D' to get DNA for aligned species.<BR>");

	printf("<TT><PRE>");

        /* notify if species removed from alignment */
        if (speciesOffHash) 
            {
            char *species;
            struct hashCookie hc = hashFirst(speciesOffHash);
            puts("<B>Components not displayed:</B> ");
            while ((species = hashNextName(&hc)) != NULL)
                printf("%s ", species);
            puts("<BR>");
            }


	for (maf = subList; maf != NULL; maf = maf->next)
	    {
	    mafLowerCase(maf);
#ifdef ADDEXONCAPITAL
	    if (capTrack != NULL)
                capMafOnTrack(maf, capTrack, onlyCds);
#endif
            printf("<B>Alignment block %d of %d in window, %d - %d, %d bps </B>\n",
                   ++aliIx,realCount,maf->components->start + 1,
                   maf->components->start + maf->components->size, maf->components->size);
            mafPrettyOut(stdout, maf, 70,onlyDiff, aliIx);
            }
	mafAliFreeList(&subList);
	}
    else
	{
        printf("No multiple alignment in browser window");
	}
    printf("</PRE></TT>");
    }
}