Esempio n. 1
0
void doTransRegCodeProbe(struct trackDb *tdb, char *item,
	char *codeTable, char *motifTable,
	char *tfToConditionTable, char *conditionTable)
/* Display detailed info on a ChIP-chip probe from transRegCode experiments. */
{
char query[256];
struct sqlResult *sr;
char **row;
int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
struct sqlConnection *conn = hAllocConn(database);
struct transRegCodeProbe *probe = NULL;

cartWebStart(cart, database, "ChIP-chip Probe Info");
sqlSafef(query, sizeof(query), "select * from %s where name = '%s'",
	tdb->table, item);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    probe = transRegCodeProbeLoad(row+rowOffset);
sqlFreeResult(&sr);
if (probe != NULL)
    {
    struct tfData *tfList = NULL, *tf;
    struct hash *tfHash = newHash(0);
    struct transRegCode *trc;
    int i;

    /* Print basic info. */
    printf("<B>Name:</B> %s<BR>\n", probe->name);
    printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd,
    	NULL, TRUE, probe->name);

    /* Make up list of all transcriptionFactors. */
    for (i=0; i<probe->tfCount; ++i)
        {
	/* Parse out factor and condition. */
	char *tfName = probe->tfList[i];
	char *condition = strchr(tfName, '_');
	struct tfCond *cond;
	if (condition != NULL)
	    *condition++ = 0;
	else
	    condition = "n/a";
	tf = hashFindVal(tfHash, tfName);
	if (tf == NULL)
	    {
	    AllocVar(tf);
	    hashAddSaveName(tfHash, tfName, tf, &tf->name);
	    slAddHead(&tfList, tf);
	    }
	AllocVar(cond);
	cond->name = cloneString(condition);
	cond->binding = probe->bindVals[i];
	slAddHead(&tf->conditionList, cond);
	}
    slSort(&tfList, tfDataCmpName);

    /* Fold in motif hits in region. */
    if (sqlTableExists(conn, codeTable))
        {
	sr = hRangeQuery(conn, codeTable,
		probe->chrom, probe->chromStart, probe->chromEnd,
		"chipEvidence != 'none'", &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	    {
	    trc = transRegCodeLoad(row+rowOffset);
	    tf = hashFindVal(tfHash, trc->name);
	    if (tf != NULL)
		slAddTail(&tf->trcList, trc);
	    }
	sqlFreeResult(&sr);
	}
    if (tfList == NULL)
	printf("No significant immunoprecipitation.");
    else
	{
	tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable);
	}
    transRegCodeProbeFree(&probe);
    growthConditionSection(conn, conditionTable);
    }
printf("\n<HR>\n");
printTrackHtml(tdb);
hFreeConn(&conn);
}
Esempio n. 2
0
void addIfNew(char *name)
{
struct hashEl *hel = hashLookup(nameHash, name);
if (hel == NULL)
    hashAdd(nameHash, cloneString(name), NULL);
}
Esempio n. 3
0
void xmfaToMaf(char *in, char *out)
/* xmfaToMaf - Convert from xmfa to maf format. */
{
int c;
FILE *input  = mustOpen(in,  "r");
FILE *output = mustOpen(out, "w");

char* commentLine;
struct dnaSeq* sequence;

struct mafAli *ali;

struct sqlConnection* conn = hAllocConn();

mafWriteStart(output, "mlagan");

AllocVar(ali);
while(myFaReadMixedNext(input, TRUE, "default name", TRUE, &commentLine, &sequence)) {
    char srcName[128];
    
    c = fgetc(input);
    if(c == '=' || c == '>') { /* add the current sequence and process the block if we've see an '='*/
        char org[32];
        char chrom[32];
        int start;
        int stop;
        char strand;
        struct mafComp *comp;
        double score;

        char buffer[1024];

        ungetc(c, input);
        
        AllocVar(comp);
        /* parse the comment line */
        sscanf(commentLine, ">%s %[^:]:%d-%d %c", org, chrom, &start, &stop, &strand);
        /* build the name */
        safef(srcName, sizeof(srcName), "%s.%s", optionVal(org, org), chrom);
        comp->src = cloneString(srcName);

        sqlSafef(buffer, 1024, "SELECT size FROM %s.chromInfo WHERE chrom = \"%s\"", optionVal(org, org), chrom);
        assert(sqlQuickQuery(conn, buffer, buffer, 1024) != 0);
        comp->srcSize = atoi(buffer);

        comp->strand = strand;

        start = start - 1;

        comp->start = start;
        comp->size = ungappedSize(sequence);

        if(strand == '-')
            comp->start = comp->srcSize - (comp->start + comp->size);
        
        comp->text = sequence->dna;
        sequence->dna = 0;
        slAddHead(&ali->components, comp);
        freeDnaSeq(&sequence);

        if(c == '=') {
            fscanf(input, "= score=%lf\n", &score);

            ali->score = score;

            slReverse(&ali->components);
            mafWrite(output, ali);
            mafAliFree(&ali);

            AllocVar(ali);
        }
    }
}

mafWriteEnd(output);
}
Esempio n. 4
0
static char *makeResultName(char *tableName, char *path)
/* return path in trash for corresponding autoupgrade result file */
{
safef(path, AUTOUPGRPATHSIZE, "../trash/AUTO_UPGRADE_RESULT_%s", tableName);
return cloneString(path);
}
Esempio n. 5
0
static void saveAxtBundle(char *chromName, int chromSize, int chromOffset,
                          struct ffAli *ali,
                          struct dnaSeq *tSeq, struct hash *t3Hash, struct dnaSeq *qSeq,
                          boolean qIsRc, boolean tIsRc,
                          enum ffStringency stringency, int minMatch, struct gfOutput *out)
/* Save alignment to axtBundle. */
{
    struct axtData *ad = out->data;
    struct ffAli *sAli, *eAli, *ff, *rt, *eFf = NULL;
    struct axt *axt;
    struct dyString *q = newDyString(1024), *t = newDyString(1024);
    struct axtBundle *gab;
    struct trans3 *t3List = NULL;

    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    AllocVar(gab);
    gab->tSize = chromSize;
    gab->qSize = qSeq->size;
    for (sAli = ali; sAli != NULL; sAli = eAli)
    {
        eAli = ffNextBreak(sAli, 8, tSeq, t3List);
        dyStringClear(q);
        dyStringClear(t);
        for (ff = sAli; ff != eAli; ff = ff->right)
        {
            dyStringAppendN(q, ff->nStart, ff->nEnd - ff->nStart);
            dyStringAppendN(t, ff->hStart, ff->hEnd - ff->hStart);
            rt = ff->right;
            if (rt != eAli)
            {
                int nGap = rt->nStart - ff->nEnd;
                int nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE)
                              + chromOffset;
                int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE)
                            + chromOffset;
                int hGap = nhStart - ohEnd;
                int gap = Blatmax(nGap, hGap);
                if (nGap < 0 || hGap < 0)
                {
                    errAbort("Negative gap size in %s vs %s", tSeq->name, qSeq->name);
                }
                if (nGap == gap)
                {
                    dyStringAppendN(q, ff->nEnd, gap);
                    dyStringAppendMultiC(t, '-', gap);
                }
                else
                {
                    dyStringAppendN(t, ff->hEnd, gap);
                    dyStringAppendMultiC(q, '-', gap);
                }
            }
            eFf = ff;	/* Keep track of last block in bunch */
        }
        assert(t->stringSize == q->stringSize);
        AllocVar(axt);
        axt->qName = cloneString(qSeq->name);
        axt->qStart = sAli->nStart - qSeq->dna;
        axt->qEnd = eFf->nEnd - qSeq->dna;
        axt->qStrand = (qIsRc ? '-' : '+');
        axt->tName = cloneString(chromName);
        axt->tStart = trans3GenoPos(sAli->hStart, tSeq, t3List, FALSE) + chromOffset;
        axt->tEnd = trans3GenoPos(eFf->hEnd, tSeq, t3List, TRUE) + chromOffset;
        axt->tStrand = (tIsRc ? '-' : '+');
        axt->symCount = t->stringSize;
        axt->qSym = cloneString(q->string);
        axt->tSym = cloneString(t->string);
        axt->frame = trans3Frame(sAli->hStart, t3List);
        if (out->qIsProt)
            axt->score = axtScoreProteinDefault(axt);
        else
            axt->score = axtScoreDnaDefault(axt);
        slAddHead(&gab->axtList, axt);
    }
    slReverse(&gab->axtList);
    dyStringFree(&q);
    dyStringFree(&t);
    slAddHead(&ad->bundleList, gab);
}
char *scanSettingsForCT(char *userName, char *sessionName, char *contents,
			int *pLiveCount, int *pExpiredCount)
/* Parse the CGI-encoded session contents into {var,val} pairs and search
 * for custom tracks.  If found, refresh the custom track.  Parsing code 
 * taken from cartParseOverHash. 
 * If any nonexistent custom track files are found, return a SQL update
 * command that will remove those from this session.  We can't just do 
 * the update here because that messes up the caller's query. */
{
int contentLength = strlen(contents);
struct dyString *newContents = dyStringNew(contentLength+1);
struct dyString *oneSetting = dyStringNew(contentLength / 4);
char *updateIfAny = NULL;
char *contentsToChop = cloneString(contents);
char *namePt = contentsToChop;
verbose(3, "Scanning %s %s\n", userName, sessionName);
while (isNotEmpty(namePt))
    {
    char *dataPt = strchr(namePt, '=');
    char *nextNamePt;
    if (dataPt == NULL)
	errAbort("Mangled session content string %s", namePt);
    *dataPt++ = 0;
    nextNamePt = strchr(dataPt, '&');
    if (nextNamePt != NULL)
	*nextNamePt++ = 0;
    dyStringClear(oneSetting);
    dyStringPrintf(oneSetting, "%s=%s%s",
		   namePt, dataPt, (nextNamePt ? "&" : ""));
    if (startsWith(CT_FILE_VAR_PREFIX, namePt))
	{
	boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE;
	cgiDecode(dataPt, dataPt, strlen(dataPt));
	verbose(3, "Found variable %s = %s\n", namePt, dataPt);
	/* If the file does not exist, omit this setting from newContents so 
	 * it doesn't get copied from session to session.  If it does exist,
	 * leave it up to customFactoryTestExistence to parse the file for 
	 * possible customTrash table references, some of which may exist 
	 * and some not. */
	if (! fileExists(dataPt))
	    {
	    verbose(3, "Removing %s from %s %s\n", oneSetting->string,
		    userName, sessionName);
	    thisGotExpiredCT = TRUE;
	    }
	else
	    {
	    char *db = namePt + strlen(CT_FILE_VAR_PREFIX);
	    dyStringAppend(newContents, oneSetting->string);
	    customFactoryTestExistence(db, dataPt,
				       &thisGotLiveCT, &thisGotExpiredCT);
	    }
	if (thisGotLiveCT && pLiveCount != NULL)
	    (*pLiveCount)++;
	if (thisGotExpiredCT && pExpiredCount != NULL)
	    (*pExpiredCount)++;
	if (thisGotExpiredCT)
	    {
	    if (verboseLevel() >= 3)
		verbose(3, "Found expired custom track in %s %s: %s\n",
			userName, sessionName, dataPt);
	    else
		verbose(2, "Found expired custom track: %s\n", dataPt);
	    }
	if (thisGotLiveCT)
	    verbose(4, "Found live custom track: %s\n", dataPt);
	}
    else
	dyStringAppend(newContents, oneSetting->string);
    namePt = nextNamePt;
    }
if (newContents->stringSize != contentLength)
    {
    struct dyString *update = dyStringNew(contentLength*2);
    if (newContents->stringSize > contentLength)
	errAbort("Uh, why is newContents (%d) longer than original (%d)??",
		 newContents->stringSize, contentLength);
    dyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable);
    dyStringAppendN(update, newContents->string, newContents->stringSize);
    dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 "
		   "where userName=\"%s\" and sessionName=\"%s\";",
		   userName, sessionName);
    verbose(3, "Removing one or more dead CT file settings from %s %s "
	    "(original length %d, now %d)\n", 
	    userName, sessionName,
	    contentLength, newContents->stringSize);
    updateIfAny = dyStringCannibalize(&update);
    }
dyStringFree(&oneSetting);
dyStringFree(&newContents);
freeMem(contentsToChop);
return updateIfAny;
}
Esempio n. 7
0
char *sqlStringComma(char **pS)
/* Return string at *pS.  (Either quoted or not.)  Advance *pS. */
{
return cloneString(sqlGetOptQuoteString(pS));
}
Esempio n. 8
0
void txGeneFromBed(char *inBed, char *inPicks, char *ucscFa, char *uniProtFa, char *refPepFa, char *outKg)
/* txGeneFromBed - Convert from bed to knownGenes format table (genePred + uniProt ID). */
{
/* Load protein sequence into hashes */
struct hash *uniProtHash = faReadAllIntoHash(uniProtFa, dnaUpper);
struct hash *ucscProtHash = faReadAllIntoHash(ucscFa, dnaUpper);
struct hash *refProtHash =faReadAllIntoHash(refPepFa, dnaUpper);

/* Load picks into hash.  We don't use cdsPicksLoadAll because empty fields
 * cause that autoSql-generated routine problems. */
struct hash *pickHash = newHash(18);
struct cdsPick *pick;
struct lineFile *lf = lineFileOpen(inPicks, TRUE);
char *row[CDSPICK_NUM_COLS];
while (lineFileRowTab(lf, row))
    {
    pick = cdsPickLoad(row);
    hashAdd(pickHash, pick->name, pick);
    }

/* Load in bed */
struct bed *bed, *bedList = bedLoadNAll(inBed, 12);

/* Do reformatting and write output. */
FILE *f = mustOpen(outKg, "w");
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    char *protAcc = NULL;
    if (bed->thickStart < bed->thickEnd)
	{
        pick = hashMustFindVal(pickHash, bed->name);
	struct dnaSeq *spSeq = NULL, *uniSeq = NULL, *refPep = NULL, *ucscSeq;
	ucscSeq = hashMustFindVal(ucscProtHash, bed->name);
	if (pick->swissProt[0])
	    spSeq = hashMustFindVal(uniProtHash, pick->swissProt);
	if (pick->uniProt[0])
	    uniSeq = hashMustFindVal(uniProtHash, pick->uniProt);
	if (pick->refProt[0])
	    refPep = hashMustFindVal(refProtHash, pick->refProt);

	/* First we look for an exact match between the ucsc protein and
	 * something from swissProt/uniProt. */
	if (spSeq != NULL && sameString(ucscSeq->dna, spSeq->dna))
	    protAcc = pick->swissProt;
	if (protAcc == NULL && uniSeq != NULL && sameString(ucscSeq->dna, uniSeq->dna))
	    protAcc = pick->uniProt;
	if (protAcc == NULL && refPep != NULL && sameString(ucscSeq->dna, refPep->dna))
	    {
	    protAcc = cloneString(pick->refProt);
	    chopSuffix(protAcc);
	    }

	if (protAcc == NULL)
	    {
	    if (pick->uniProt[0])
	        protAcc = pick->uniProt;
	    else 
		{
	        protAcc = cloneString(pick->refProt);
		chopSuffix(protAcc);
		}
	    }
	}
    outputKg(bed, emptyForNull(protAcc), f);
    }
carefulClose(&f);
}
static struct chrGapList *createGaps(struct bed *bounds)
{
struct bed *bedEl = NULL;
char *prevChr = NULL;
struct chrGapList *gaps = NULL;
struct gap *prevGap = NULL;
struct bed *prevBedEl = NULL;
struct chrGapList *curChrList = NULL;
int boundingChrCount = 0;
int overlappedBounding = 0;

for (bedEl = bounds; bedEl != NULL; bedEl = bedEl->next)
    {
    /*	the first bedEl does not yet start a new gap, must have a second */
    if ((NULL == prevChr) || differentWord(prevChr,bedEl->chrom))
	{
	struct chrGapList *cEl;
	AllocVar(cEl);
	cEl->chrom = cloneString(bedEl->chrom);
	cEl->gList = NULL;
	if (prevChr)
	    {
	    if (NULL == prevGap)
		{
		verbose(2,"WARNING: only one element on %s ! No gap defined.\n",
			prevChr);
 		slPopHead(&gaps);
		--boundingChrCount;
		}
	    freeMem(prevChr);
	    }
	prevChr = cloneString(bedEl->chrom);
	prevGap = NULL;
	prevBedEl = bedEl;	/*	bounding element before first gap */
	verbose(4,"new chrom on bounding gap creation %s, adding %#lx\n",
		prevChr, (unsigned long) cEl);
	slAddHead(&gaps,cEl);
	++boundingChrCount;
	curChrList = cEl;
	}
    else
	{
	struct gap *gEl;
	AllocVar(gEl);
	gEl->prev = prevGap;	/*	first one is NULL	*/
	gEl->upstream = prevBedEl;
	gEl->isUpstreamBound = TRUE;	/*	bounding element	*/
	gEl->downstream = bedEl;
	gEl->isDownstreamBound = TRUE;	/*	bounding element */
	gEl->next = NULL;		/*	not there yet	*/

	if (prevGap == NULL)	/*	first one is NULL	*/
	    {
	    curChrList->gList = gEl;	/*	starting the list	*/
	    }
	else
	    {
	    prevGap->next = gEl;
	    }

	prevGap = gEl;

	/*	gapSize is between downstream and upstream	*/
	gEl->gapSize = bedEl->chromStart - prevBedEl->chromEnd;
	verbose(5,"gap: %s:%d-%d size %d (%d)\n",
		bedEl->chrom, gEl->upstream->chromEnd,
			gEl->downstream->chromStart, gEl->gapSize,
			gEl->downstream->chromStart - gEl->upstream->chromEnd);
	if (gEl->gapSize < 0)
	    {
	    ++overlappedBounding;
	    if (verboseLevel()>3)
		{
		warn("WARNING: overlapping bounding elements at\n\t"
		    "%s:%d-%d <-> %s:%d-%d",
			prevBedEl->chrom, prevBedEl->chromStart,
			    prevBedEl->chromEnd, bedEl->chrom,
				bedEl->chromStart, bedEl->chromEnd);
		}
	    gEl->gapSize = 0;
	    }
	prevBedEl = bedEl;
	}
    }

if (prevChr)
    {
    /*	potentially the last one is a single item on a chrom	*/
    if (NULL == prevGap)
	{
	verbose(2,"WARNING: only one element on %s ! No gap defined.\n",
		prevChr);
	slPopHead(&gaps);
	--boundingChrCount;
	}
    freeMem(prevChr);
    }

slReverse(&gaps);
verbose(3,"bounding chrom count: %d (=? %d), overlapped items: %d\n",
	boundingChrCount, slCount(gaps), overlappedBounding);

return(gaps);
}
Esempio n. 10
0
struct knownMore *knownMoreLoad(char **row)
/* Load a knownMore from row fetched with select * from knownMore
 * from database.  Dispose of this with knownMoreFree(). */
{
struct knownMore *ret;

AllocVar(ret);
ret->name = cloneString(row[0]);
ret->transId = cloneString(row[1]);
ret->geneId = cloneString(row[2]);
ret->gbGeneName = sqlUnsigned(row[3]);
ret->gbProductName = sqlUnsigned(row[4]);
ret->gbProteinAcc = cloneString(row[5]);
ret->gbNgi = cloneString(row[6]);
ret->gbPgi = cloneString(row[7]);
ret->omimId = sqlUnsigned(row[8]);
ret->omimName = cloneString(row[9]);
ret->hugoId = sqlUnsigned(row[10]);
ret->hugoSymbol = cloneString(row[11]);
ret->hugoName = cloneString(row[12]);
ret->hugoMap = cloneString(row[13]);
ret->pmId1 = sqlUnsigned(row[14]);
ret->pmId2 = sqlUnsigned(row[15]);
ret->refSeqAcc = cloneString(row[16]);
ret->aliases = cloneString(row[17]);
ret->locusLinkId = sqlUnsigned(row[18]);
ret->gdbId = cloneString(row[19]);
return ret;
}
static void randomPlacement(char *bounding, char *placed)
{
struct bed *boundingElements = bedLoadAll(bounding);
struct bed *placeItems = bedLoadAll(placed);
struct bed *nearestNeighbors = NULL;
int boundingCount = slCount(boundingElements);
int placedCount = slCount(placeItems);
int neighborCount = 0;
struct chrGapList *boundingGaps = NULL;
struct chrGapList *duplicateGapList = NULL;
struct chrGapList *neighborGaps = NULL;
struct statistic *statsList = NULL;
struct statistic *statEl = NULL;

if (neighbor)
    {
    nearestNeighbors = bedLoadAll(neighbor);
    slSort(&nearestNeighbors, bedCmp);	/* order by chrom,chromStart */
    neighborCount = slCount(nearestNeighbors);
    verbose(2, "neighbor element count: %d\n", neighborCount);
    neighborGaps = createGaps(nearestNeighbors);
    }
slSort(&boundingElements, bedCmp);	/* order by chrom,chromStart */
slSort(&placeItems, bedCmp);		/* order by chrom,chromStart */

verbose(2, "bounding element count: %d\n", boundingCount);
verbose(2, "placed item count: %d\n", placedCount);

boundingGaps = createGaps(boundingElements);

if (TRUE)	/*	display initial placement stats only	*/
    {
    char *neighborName = NULL;

    if (neighbor)
	{
	neighborName = cloneString(neighbor);
	duplicateGapList = cloneGapList(neighborGaps);
	}
    else
	{
	neighborName = cloneString(bounding);
	duplicateGapList = cloneGapList(boundingGaps);
	}

    verbose(2,"stats before initial placement:  =================\n");
    statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL);
    printf("statistics on gaps before any placements:\n\t(%s)\n", neighborName);
    statsPrint(statEl);
    slAddHead(&statsList,statEl);

    initialPlacement(duplicateGapList,placeItems);

    verbose(2,"stats after initial placement:  =================\n");
    statEl = gapStats(duplicateGapList, zeroBedOutFile, shoulderBedOutFile,
	distOut);
    printf("statistics after initial placement of placed items:\n\t(%s)\n",
		placed);
    statsPrint(statEl);
    slAddHead(&statsList,statEl);

    freeChrList(&duplicateGapList, FALSE);
    slReverse(&statsList);
    freeMem(neighborName);
    }

if (trials > 0)
    {
    int trial;

    srand48((long int)seed);	/* for default seed=0, same set of randoms */

    slSort(&placeItems, bedCmpSize);	/* order by size of elements */
    slReverse(&placeItems);		/* largest ones first	*/
    measurePlaced(placeItems);		/* show placed item characteristics */
    for (trial = 0; trial < trials; ++trial)
	{
	struct bed *randomPlacedBedList;
	duplicateGapList = cloneGapList(boundingGaps);
	randomPlacedBedList = randomTrial(duplicateGapList,placeItems);
	if (neighbor)
	    {
	    struct chrGapList *duplicateNeighborList;
	    slSort(&randomPlacedBedList,bedCmp);/*order by chrom,chromStart*/
	    duplicateNeighborList = cloneGapList(neighborGaps);
	    initialPlacement(duplicateNeighborList,randomPlacedBedList);
	    statEl = gapStats(duplicateNeighborList, (char *)NULL, (char *)NULL, (char *)NULL);
	    freeChrList(&duplicateNeighborList, FALSE);
	    }
	else
	    statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL);

	slAddHead(&statsList,statEl);
	/*	this gap list has temporary bed elements that were
	 *	created by the randomTrial(), they need to be freed as
	 *	the list is released, hence the TRUE signal.
	 *	It isn't a true freeBedList operation because the chrom
	 *	names are left intact in the original copy of the bed
	 *	list.  (The names were being shared.)
	 */
	if ((trial == (trials - 1)) && (bedOutFile != NULL))
	    {
	    bedListOutput(duplicateGapList, bedOutFile);
	    }
	freeChrList(&duplicateGapList, TRUE);
	}
    slReverse(&statsList);
    statsPrint(statsList);
    }
if (neighbor)
    {
    bedFreeList(&nearestNeighbors);
    freeChrList(&neighborGaps, FALSE);
    }
bedFreeList(&boundingElements);
bedFreeList(&placeItems);
freeChrList(&boundingGaps, FALSE);
}
Esempio n. 12
0
void encode2Meta(char *database, char *manifestIn, char *outMetaRa)
/* encode2Meta - Create meta files.. */
{
int dbIx = stringArrayIx(database, metaDbs, ArraySize(metaDbs));
if (dbIx < 0)
    errAbort("Unrecognized database %s", database);

/* Create a three level meta.ra format file based on hgFixed.encodeExp
 * and database.metaDb tables. The levels are composite, experiment, file */
struct metaNode *metaTree = metaTreeNew("encode2");

/* Load up the manifest. */
struct encode2Manifest *mi, *miList = encode2ManifestShortLoadAll(manifestIn);
struct hash *miHash = hashNew(18);
for (mi = miList; mi != NULL; mi = mi->next)
    hashAdd(miHash, mi->fileName, mi);
verbose(1, "%d files in %s\n", miHash->elCount, manifestIn);

/* Load up encodeExp info. */
struct sqlConnection *expConn = sqlConnect(expDb);
struct encodeExp *expList = encodeExpLoadByQuery(expConn, "NOSQLINJ select * from encodeExp");
sqlDisconnect(&expConn);
verbose(1, "%d experiments in encodeExp\n", slCount(expList));

struct hash *compositeHash = hashNew(0);

/* Go through each  organism database in turn. */
int i;
for (i=0; i<ArraySize(metaDbs); ++i)
    {
    char *db = metaDbs[i];
    if (!sameString(database, db))
        continue;

    verbose(1, "exploring %s\n", db);
    struct mdbObj *mdb, *mdbList = getMdbList(db);
    verbose(1, "%d meta objects in %s\n", slCount(mdbList), db);

    /* Get info on all composites. */
    for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
        {
	char *objType = mdbVarLookup(mdb->vars, "objType");
	if (objType != NULL && sameString(objType, "composite"))
	    {
	    char compositeName[256];
	    safef(compositeName, sizeof(compositeName), "%s", mdb->obj);
	    struct metaNode *compositeNode = metaNodeNew(compositeName);
	    slAddHead(&metaTree->children, compositeNode);
	    compositeNode->parent = metaTree;
	    struct mdbVar *v;
	    for (v=mdb->vars; v != NULL; v = v->next)
	        {
		metaNodeAddVar(compositeNode, v->var, v->val);
		}
	    metaNodeAddVar(compositeNode, "assembly", db);
	    hashAdd(compositeHash, mdb->obj, compositeNode);
	    }
	}

    /* Make up one more for experiments with no composite. */
    char *noCompositeName = "wgEncodeZz";
    struct metaNode *noCompositeNode = metaNodeNew(noCompositeName);
    slAddHead(&metaTree->children, noCompositeNode);
    noCompositeNode->parent = metaTree;
    hashAdd(compositeHash, noCompositeName, noCompositeNode);


    /* Now go through objects trying to tie experiments to composites. */ 
    struct hash *expToComposite = hashNew(16);
    for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
        {
	char *composite = mdbVarLookup(mdb->vars, "composite");
	if (originalData(composite))
	    {
	    char *dccAccession = mdbVarLookup(mdb->vars, "dccAccession");
	    if (dccAccession != NULL)
	        {
		char *oldComposite = hashFindVal(expToComposite, dccAccession);
		if (oldComposite != NULL)
		    {
		    if (!sameString(oldComposite, composite))
		        verbose(2, "%s maps to %s ignoring mapping to %s", dccAccession, oldComposite, composite);
		    }
		else
		    {
		    hashAdd(expToComposite, dccAccession, composite);
		    }
		}
	    }
	}
    /* Now get info on all experiments in this organism. */
    struct hash *expHash = hashNew(0);
    struct encodeExp *exp;
    for (exp = expList; exp != NULL; exp = exp->next)
        {
	if (sameString(exp->organism, organisms[i]))
	    {
	    if (exp->accession != NULL)
		{
		char *composite = hashFindVal(expToComposite,  exp->accession);
		struct metaNode *compositeNode;
		if (composite != NULL)
		    {
		    compositeNode = hashMustFindVal(compositeHash, composite);
		    }
		else
		    {
		    compositeNode = noCompositeNode;
		    }
		struct metaNode *expNode = wrapNodeAroundExp(exp);
		hashAdd(expHash, expNode->name, expNode);
		slAddHead(&compositeNode->children, expNode);
		expNode->parent = compositeNode;
		}
	    }
	}

    for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
	{
	char *fileName = NULL, *dccAccession = NULL;
	char *objType = mdbVarLookup(mdb->vars, "objType");
	if (objType != NULL && sameString(objType, "composite"))
	    continue;
	dccAccession = mdbVarLookup(mdb->vars, "dccAccession");
	if (dccAccession == NULL)
	    continue;
	char *composite = hashFindVal(expToComposite,  dccAccession);
	if (composite == NULL)
	    errAbort("Can't find composite for %s", mdb->obj);
	struct mdbVar *v;
	for (v = mdb->vars; v != NULL; v = v->next)
	    {
	    char *var = v->var, *val = v->val;
	    if (sameString("fileName", var))
		{
		fileName = val;
		char path[PATH_LEN];
		char *comma = strchr(fileName, ',');
		if (comma != NULL)
		     *comma = 0;	/* Cut off comma separated list. */
		safef(path, sizeof(path), "%s/%s/%s", db, 
		    composite, fileName);  /* Add database path */
		fileName = val = v->val = cloneString(path);
		}
	    }
	if (fileName != NULL)
	    {
	    if (hashLookup(miHash, fileName))
		{
		struct metaNode *expNode = hashFindVal(expHash, dccAccession);
		if (expNode != NULL)
		    {
		    struct metaNode *fileNode = metaNodeNew(mdb->obj);
		    slAddHead(&expNode->children, fileNode);
		    fileNode->parent = expNode;
		    struct mdbVar *v;
		    for (v=mdb->vars; v != NULL; v = v->next)
			{
			metaNodeAddVar(fileNode, v->var, v->val);
			}
		    }
		}
	    }
	}
#ifdef SOON
#endif /* SOON */
    }

struct hash *suppress = makeSuppress();
struct hash *closeEnoughTags = makeCloseEnoughTags();

metaTreeHoist(metaTree, closeEnoughTags);
metaTreeSortChildrenSortTags(metaTree);
FILE *f = mustOpen(outMetaRa, "w");
struct metaNode *node;
for (node = metaTree->children; node != NULL; node = node->next)
    metaTreeWrite(0, 0, BIGNUM, FALSE, NULL, node, suppress, f);
carefulClose(&f);

/* Write warning about tags in highest parent. */
struct mdbVar *v;
for (v = metaTree->vars; v != NULL; v = v->next)
    verbose(1, "Omitting universal %s %s\n", v->var, v->val);
}
Esempio n. 13
0
boolean myFaReadMixedNext(FILE *f, boolean preserveCase, char *defaultName, 
    boolean mustStartWithComment, char **retCommentLine, struct dnaSeq **retSeq)
/* Read next sequence from .fa file. Return sequence in retSeq.  
 * If retCommentLine is non-null return the '>' line in retCommentLine.
 * The whole thing returns FALSE at end of file. 
 * Contains parameter to preserve mixed case. */
{
char lineBuf[1024];
int lineSize;
char *words[1];
int c;
off_t offset = ftello(f);
size_t dnaSize = 0;
DNA *dna, *sequence, b;
int bogusChars = 0;
char *name = defaultName;

if (name == NULL)
    name = "";
dnaUtilOpen();
if (retCommentLine != NULL)
    *retCommentLine = NULL;
*retSeq = NULL;

/* Skip first lines until it starts with '>' */
for (;;)
    {
    if(fgets(lineBuf, sizeof(lineBuf), f) == NULL)
        {
        if (ferror(f))
            errnoAbort("read of fasta file failed");
        return FALSE;
        }
    lineSize = strlen(lineBuf);
    if (lineBuf[0] == '>')
        {
	if (retCommentLine != NULL)
            *retCommentLine = cloneString(lineBuf);
        offset = ftello(f);
        chopByWhite(lineBuf, words, ArraySize(words));
        name = words[0]+1;
        break;
        }
    else if (!mustStartWithComment)
        {
        if (fseeko(f, offset, SEEK_SET) < 0)
            errnoAbort("fseek on fasta file failed");
        break;
        }
    else
        offset += lineSize;
    }
/* Count up DNA. */
for (;;)
    {
    c = fgetc(f);
    if (c == EOF || c == '>' || c == '=')
        break;
    if (!isspace(c) && !isdigit(c))
        {
        ++dnaSize;
        }
    }

/* Allocate DNA and fill it up from file. */
dna = sequence = needHugeMem(dnaSize+1);
if (fseeko(f, offset, SEEK_SET) < 0)
    errnoAbort("fseek on fasta file failed");
for (;;)
    {
    c = fgetc(f);
    if (c == EOF || c == '>' || c == '=')
        break;
    if (!isspace(c) && !isdigit(c))
        {
        // check for non-DNA char
        if (ntChars[c] == 0)
            {
            *dna++ = preserveCase ? 'N' : 'n';
            }
        else
            {
            *dna++ = preserveCase ? c : ntChars[c];
            }
        }
    }
if (c == '>' || c == '=')
    ungetc(c, f);
*dna = 0;

*retSeq = newDnaSeq(sequence, dnaSize, name);
if (ferror(f))
    errnoAbort("read of fasta file failed");
return TRUE;
}
void addBacEndInfo(char *spFile)
/* Add BAC end info from Shiaw-Pyng's file to clones in cloneHash. */
{
struct lineFile *lf = lineFileOpen(spFile, TRUE);
char *line;
int lineSize, wordCount;
int spCount = 0;
char *words[16];

while (lineFileNext(lf, &line, &lineSize))
    {
    char *s, *e, c;
    struct clone *clone;
    struct endInfo *end;
    char *firstWord;
    char *contig;

    if (line[0] == '#')
       continue;
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;
    firstWord = words[0];
    s = strchr(firstWord, '.');
    if (s == NULL)
	errAbort("Expecting dot line %d of %s\n", lf->lineIx, lf->fileName);
    *s++ = 0;
    if ((clone = hashFindVal(cloneHash, firstWord)) == NULL)
	{
	warn("%s in %s but not .finf files", firstWord, spFile);
	continue;
	}
    if (!startsWith("Contig", s))
	errAbort("Expecting .Contig line %d of %s\n", lf->lineIx, lf->fileName);
    s += 6;
    contig = s;
    if (wordCount == 1)
	{
	/* Older style - just one word. */
	e = strrchr(contig, '.');
	if (e == NULL)
	    errAbort("Expecting last dot line %d of %s\n", lf->lineIx, lf->fileName);
	*e++ = 0;
	AllocVar(end);
	subChar(s, '.', '_');
	end->contig = cloneString(contig);
	end->text = cloneString(e);
	c = lastChar(end->text);
	if (!(c == 'L' || c == 'R'))
	    c = '?';
	end->lr = c;
	slAddHead(&clone->spList, end);
	++spCount;
	}
    else if (wordCount == 15)
        {
	/* Newer style - 15 words. */
	if (!sameWord(words[11], "total_repeats"))
	    {
	    AllocVar(end);
	    end->contig = cloneString(contig);
	    end->text = cloneString(words[2]);
	    c = words[3][0];
	    if (!(c == 'L' || c == 'R'))
		c = '?';
	    end->lr = c;
	    slAddHead(&clone->spList, end);
	    ++spCount;
	    }
	}
    else
        {
	lineFileExpectWords(lf, 15, wordCount);
	}
    }
lineFileClose(&lf);
printf("Info on %d ends in %s\n", spCount, spFile);
}
Esempio n. 15
0
struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, 
	struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep)
/* Go through bed file and collect chromosomes and statistics.  If eim parameter is non-NULL
 * collect max field sizes there too. */
{
int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1);
char *row[maxRowSize];
struct bbiChromUsage *usage = NULL, *usageList = NULL;
int lastStart = -1;
bits32 id = 0;
bits64 totalBases = 0, bedCount = 0;
int minDiff = BIGNUM;

lineFileRemoveInitialCustomTrackLines(lf);

for (;;)
    {
    int rowSize = 0;

    if (tabSep)
        rowSize = lineFileChopCharNext(lf, '\t', row, maxRowSize);
    else
        rowSize = lineFileChopNext(lf, row, maxRowSize);
    if (rowSize == 0)
        break;
    lineFileExpectAtLeast(lf, maxRowSize, rowSize);
    char *chrom = row[0];
    int start = lineFileNeedNum(lf, row, 1);
    int end = lineFileNeedNum(lf, row, 2);
    if (eim != NULL)
	bbExIndexMakerUpdateMaxFieldSize(eim, row);
    if (start > end)
        {
	    errAbort("end (%d) before start (%d) line %d of %s",
	    	end, start, lf->lineIx, lf->fileName);
	}
    ++bedCount;
    totalBases += (end - start);
    if (usage == NULL || differentString(usage->name, chrom))
        {
	/* make sure chrom names are sorted in ASCII order */
	if ((usage != NULL) && strcmp(usage->name, chrom) > 0)
	    {
	    errAbort("%s is not case-sensitive sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" with LC_COLLATE=C,  or bedSort and try again.",
	    	lf->fileName, lf->lineIx);
	    }
	struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom);
	if (chromHashEl == NULL)
	    errAbort("%s is not found in chromosome sizes file", chrom);
	int chromSize = ptToInt(chromHashEl->val);
	AllocVar(usage);
	usage->name = cloneString(chrom);
	usage->id = id++;
	usage->size = chromSize;
	slAddHead(&usageList, usage);
	lastStart = -1;
	}
    if (end > usage->size)
        errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName);
    usage->itemCount += 1;
    if (lastStart >= 0)
        {
	int diff = start - lastStart;
	if (diff < minDiff)
	    {
	    if (diff < 0)
		errAbort("%s is not sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
		    lf->fileName, lf->lineIx);
	    minDiff = diff;
	    }
	}
    lastStart = start;
    }
slReverse(&usageList);
double aveSize = 0;
if (bedCount > 0)
    aveSize = (double)totalBases/bedCount;
*retMinDiff = minDiff;
*retAveSize = aveSize;
*retBedCount = bedCount;
return usageList;
}
void readFinfFiles(char *gsDir)
/* Read in .finf files and save info in cloneHash/cloneList. */
{
struct lineFile *lf;
struct clone *clone = NULL;
struct endInfo *end;
char fileName[512];
int i;
char *words[7];
char lastClone[64];
char cloneName[64];
int gsInfoCount = 0;
struct frag *frag;
boolean isFin;
char *s, *e;

strcpy(lastClone, "");
for (i=0; i<ArraySize(gsFiles); ++i)
    {
    isFin = (i <= 0);
    sprintf(fileName, "%s/%s", gsDir, gsFiles[i]);
    printf("Reading info from %s\n", fileName);
    lf = lineFileOpen(fileName, TRUE);
    while (lineFileRow(lf, words))
        {
	if (!sameString(words[1], lastClone))
	    {
	    struct clone *oldClone;
	    strcpy(lastClone, words[1]);
	    strcpy(cloneName, words[1]);
	    AllocVar(clone);
	    s = strchr(cloneName, '.');
	    if (s == NULL)
	        errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName);
	    if (strlen(s) >= sizeof(clone->version))
	        errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName);
	    strcpy(clone->version, s);
	    chopSuffix(cloneName);
	    clone->size = atoi(words[3]);
	    if ((oldClone = hashFindVal(cloneHash, cloneName)) != NULL)
		{
		if (isFin && clone->size == oldClone->size && sameString(clone->version, oldClone->version))
		    warn("Apparently benign duplication of %s line %d of %s", cloneName, lf->lineIx, lf->fileName);
		else
		    warn("%s duplicated line %d of %s (size %d oldSize %d)", cloneName, lf->lineIx, lf->fileName,
		    	clone->size, oldClone->size);
		}
	    hashAddSaveName(cloneHash, cloneName, clone, &clone->name);
	    clone->isFin = isFin;
	    slAddHead(&cloneList, clone);
	    }
	frag = newFrag(words[0], lf);
	slAddTail(&clone->fragList, frag);
	++clone->fragCount;
	if (!clone->isFin && !sameString(words[6], "?") && !sameString(words[6], "i") 
	   && !sameString(words[6], "w"))
	    {
	    char *s = strchr(words[0], '~');
	    char c;

	    if (s == NULL)
	        errAbort("Expecting ~ in fragment name line %d of %s\n", lf->lineIx, lf->fileName);
	    ++s;
	    AllocVar(end);
	    end->contig = cloneString(s);
	    subChar(s, '.', '_');
	    end->text = cloneString(words[6]);
	    c = lastChar(end->text);
	    if (!(c == 'L' || c == 'R'))
	        c = '?';
	    end->lr = c;
	    slAddHead(&clone->gsList, end);
	    ++gsInfoCount;
	    }
	}
    lineFileClose(&lf);
    }
printf("Found %d ends in %d clones\n", gsInfoCount, slCount(cloneList));
}
Esempio n. 17
0
void readOneOut(char *rmskFile)
/* Read .out file rmskFile, check each line, and print OK lines to .tab. */
{
    struct lineFile *lf;
    char *line, *words[24];
    int lineSize, wordCount;

    /* Open .out file and process header. */
    lf = lineFileOpen(rmskFile, TRUE);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Empty %s", lf->fileName);
    if (!startsWith("   SW  perc perc", line))
    {
        if (!startsWith("   SW   perc perc", line))
            errAbort("%s doesn't seem to be a RepeatMasker .out file, first "
                     "line seen:\n%s", lf->fileName, line);
    }
    lineFileNext(lf, &line, &lineSize);
    lineFileNext(lf, &line, &lineSize);

    /* Process line oriented records of .out file. */
    while (lineFileNext(lf, &line, &lineSize))
    {
        static struct rmskOut r;
        char *s;

        wordCount = chopLine(line, words);
        if (wordCount < 14)
            errAbort("Expecting 14 or 15 words line %d of %s",
                     lf->lineIx, lf->fileName);
        r.swScore = atoi(words[0]);
        r.milliDiv = makeMilli(words[1], lf);
        r.milliDel = makeMilli(words[2], lf);
        r.milliIns = makeMilli(words[3], lf);
        r.genoName = words[4];
        r.genoStart = atoi(words[5])-1;
        r.genoEnd = atoi(words[6]);
        r.genoLeft = parenSignInt(words[7], lf);
        r.strand[0]  = (words[8][0] == '+' ? '+' : '-');
        r.repName = words[9];
        r.repClass = words[10];
        char *repClassTest = cloneString(r.repClass);
        stripChar(repClassTest, '(');
        stripChar(repClassTest, ')');
        int nonDigitCount = countLeadingNondigits(repClassTest);
        int wordOffset = 0;
        // this repClass is only digits, (or only (digits) with surrounding parens)
        //   this is the sign of an empty field here
        // due to custom library in use that has no class/family indication
        if (0 == nonDigitCount)
        {
            wordOffset = 1;
            r.repClass = cloneString("Unspecified");
            r.repFamily = cloneString("Unspecified");
        }
        else
        {
            s = strchr(r.repClass, '/');
            if (s == NULL)
                r.repFamily = r.repClass;
            else
            {
                *s++ = 0;
                r.repFamily = s;
            }
        }
        r.repStart = parenSignInt(words[11-wordOffset], lf);
        r.repEnd = atoi(words[12-wordOffset]);
        r.repLeft = parenSignInt(words[13-wordOffset], lf);
        r.id[0] = ((wordCount > (14-wordOffset)) ? words[14-wordOffset][0] : ' ');
        if (checkRepeat(&r, lf))
        {
            FILE *f = getFileForChrom(r.genoName);
            if (!noBin)
                fprintf(f, "%u\t", hFindBin(r.genoStart, r.genoEnd));
            rmskOutTabOut(&r, f);
        }
    }
}
Esempio n. 18
0
void endHandler(struct xap *xap, char *name)
/* Called at end of a tag */
{
struct table *table = xap->stack->object;
struct table *parentTable = xap->stack[1].object;
struct field *field;
struct fieldRef *fieldRef;
struct assocRef *assocRef;
char *text = skipLeadingSpaces(xap->stack->text->string);
char *primaryKeyVal = NULL;
struct assoc *assoc;
static struct dyString *uniq = NULL;

if (table->promoted)	/* Simple case - copy text to parent table. */
    {
    for (fieldRef = table->parentKeys; fieldRef != NULL; fieldRef = fieldRef->next)
	{
	field = fieldRef->field;
	if (field->table == parentTable)
	    {
	    struct dyString **parentContent = contentStack + table->fieldCount;
	    struct dyString *dy = parentContent[field->tablePos];
	    if (!field->isString && text[0] == 0)
	        text = "0";
	    dyStringAppend(dy, text);
	    break;
	    }
	}
    }
else
    {
    if (text[0] != 0)
	{
	field = hashFindVal(table->fieldHash, textField);
	if (field == NULL)
	    errAbort("No text for %s expected in dtd", table->name);
	dyStringAppendEscapedForTabFile(contentStack[field->tablePos], text);
	}

    /* Construct uniq string from fields, etc. */
    if (uniq == NULL)
	uniq = dyStringNew(0);
    else
	dyStringClear(uniq);
    for (field = table->fieldList; field != NULL; field = field->next)
	{
	if (!(field->isPrimaryKey  && field->isMadeUpKey))
	    {
	    struct dyString *dy = contentStack[field->tablePos];
	    if (dy->stringSize == 0 && !field->isString)
		dyStringAppendC(dy, '0');
	    dyStringAppendN(uniq, dy->string, dy->stringSize);
	    dyStringAppendC(uniq, '\t');
	    }
	}
    for (assoc = table->assocList; assoc != NULL; assoc = assoc->next)
	{
	dyStringPrintf(uniq, "%p\t%s\t", assoc->f, assoc->childKey);
	}

    primaryKeyVal = hashFindVal(table->uniqHash, uniq->string);
    if (primaryKeyVal == NULL)
	{
	struct dyString *priDy = contentStack[table->primaryKey->tablePos];
	if (table->madeUpPrimary)
	    {
	    table->lastId += 1;
	    dyStringPrintf(priDy, "%d", table->lastId);
	    }
	primaryKeyVal = priDy->string;
	for (field = table->fieldList; field != NULL; field = field->next)
	    {
	    struct dyString *dy = contentStack[field->tablePos];
	    fprintf(table->tabFile, "%s", dy->string);
	    if (field->next != NULL)
	       fprintf(table->tabFile, "\t");
	    }
	fprintf(table->tabFile, "\n");
	hashAdd(table->uniqHash, uniq->string, cloneString(primaryKeyVal));
	}
    for (fieldRef = table->parentKeys; fieldRef != NULL; fieldRef = fieldRef->next)
	{
	field = fieldRef->field;
	if (field->table == parentTable)
	    {
	    struct dyString **parentContent = contentStack + table->fieldCount;
	    struct dyString *dy = parentContent[field->tablePos];
	    dyStringAppend(dy, primaryKeyVal);
	    break;
	    }
	}

    for (assocRef = table->parentAssocs; assocRef != NULL; 
	    assocRef = assocRef->next)
	{
	if (assocRef->parent == parentTable)
	    {
	    assoc = assocNew(assocRef->assoc->tabFile,
		primaryKeyVal);
	    slAddHead(&parentTable->assocList, assoc);
	    }
	}

    slReverse(&table->assocList);
    for (assoc = table->assocList; assoc != NULL; assoc = assoc->next)
	fprintf(assoc->f, "%s\t%s\n", primaryKeyVal, assoc->childKey);
    assocFreeList(&table->assocList);
    }
contentStack += table->fieldCount;
}
Esempio n. 19
0
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId)
/* Given gene ID, try and find known gene on browser in same
 * species. */
{
char query[256];
char tableName[256];
int taxon;
char *url = NULL;
char *genomeDb = NULL;

/* Figure out taxon. */
sqlSafef(query, sizeof(query), 
    "select taxon from gene where id = %d", geneId);
taxon = sqlQuickNum(conn, query);

genomeDb = hDbForTaxon(conn, taxon);
if (genomeDb != NULL)
    {
    /* Make sure known genes track exists - we may need
     * to tweak this at some point for model organisms. */
    safef(tableName, sizeof(tableName), "%s.knownToVisiGene", genomeDb);
    if (!sqlTableExists(conn, tableName))
	genomeDb = NULL;
    }

/* If no db for that organism revert to human. */
if (genomeDb == NULL)
    genomeDb = hDefaultDb();

safef(tableName, sizeof(tableName), "%s.knownToVisiGene", genomeDb);
if (sqlTableExists(conn, tableName))
    {
    struct dyString *dy = dyStringNew(0);
    char *knownGene = NULL;
    if (sqlCountColumnsInTable(conn, tableName) == 3)
	{
	sqlDyStringPrintf(dy, 
	   "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId);
	}
    else
	{
	struct slName *imageList, *image;
	sqlSafef(query, sizeof(query), 
	    "select imageProbe.image from probe,imageProbe "
	    "where probe.gene=%d and imageProbe.probe=probe.id", geneId);
	imageList = sqlQuickList(conn, query);
	if (imageList != NULL)
	    {
	    sqlDyStringPrintf(dy, 
	       "select name from %s.knownToVisiGene ", genomeDb);
	    dyStringAppend(dy,
	       "where value in(");
	    for (image = imageList; image != NULL; image = image->next)
		{
		sqlDyStringPrintf(dy, "'%s'", image->name);
		if (image->next != NULL)
		    dyStringAppendC(dy, ',');
		}
	    dyStringAppend(dy, ")");
	    slFreeList(&imageList);
	    }
	}
    if (dy->stringSize > 0)
	{
	knownGene = sqlQuickString(conn, dy->string);
	if (knownGene != NULL)
	    {
	    char temp[1024];
	    safef(temp, sizeof temp, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none",
		genomeDb, knownGene);
	    url = cloneString(temp);
	    }
	}
    dyStringFree(&dy);
    }
freez(&genomeDb);
return url;
}
void refreshNamedSessionCustomTracks(char *centralDbName)
/* refreshNamedSessionCustomTracks -- cron robot for keeping alive custom 
 * tracks that are referenced by saved sessions. */
{
struct sqlConnection *conn = hConnectCentral();
struct slPair *updateList = NULL, *update;
char *actualDbName = sqlGetDatabase(conn);
int liveCount=0, expiredCount=0;

setUdcCacheDir();  /* programs that use udc must call this to initialize cache dir location */

if (!sameString(centralDbName, actualDbName))
    errAbort("Central database specified in hg.conf file is %s but %s "
	     "was specified on the command line.",
	     actualDbName, centralDbName);
else
    verbose(2, "Got connection to %s\n", centralDbName);

long long threshold = 0;
int atime = optionInt("atime", 0);
if (atime > 0)
    {
    time_t now = time(NULL);
    threshold = now - ((long long)atime * 24 * 60 * 60);
    }

if (sqlTableExists(conn, savedSessionTable))
    {
    struct sessionInfo *sessionList = NULL, *si;
    struct sqlResult *sr = NULL;
    char **row = NULL;
    char query[512];
    safef(query, sizeof(query),
	  "select userName,sessionName,UNIX_TIMESTAMP(lastUse),contents from %s "
	  "order by userName,sessionName", savedSessionTable);
    sr = sqlGetResult(conn, query);
    // Slurp results into memory instead of processing row by row,
    // reducing the chance of lost connection.
    while ((row = sqlNextRow(sr)) != NULL)
	{
	if (atime > 0)
	    {
	    long long lastUse = atoll(row[2]);
	    if (lastUse < threshold)
		{
		verbose(2, "User %s session %s is older than %d days, skipping.\n",
			row[0], row[1], atime);
		continue;
		}
	    }
	AllocVar(si);
	safecpy(si->userName, sizeof(si->userName), row[0]);
	safecpy(si->sessionName, sizeof(si->sessionName), row[1]);
	si->contents = cloneString(row[3]);
	slAddHead(&sessionList, si);
	}
    sqlFreeResult(&sr);
    for (si = sessionList;  si != NULL;  si = si->next)
	{
	char *updateIfAny = scanSettingsForCT(si->userName, si->sessionName, si->contents,
					      &liveCount, &expiredCount);
	if (updateIfAny)
	    {
	    AllocVar(update);
	    update->name = updateIfAny;
	    slAddHead(&updateList, update);
	    }
	}
    }

/* Now that we're done reading from savedSessionTable, we can modify it: */
if (optionExists("hardcore"))
    {
    for (update = updateList;  update != NULL;  update = update->next)
	sqlUpdate(conn, update->name);
    }
hDisconnectCentral(&conn);
verbose(1, "Found %d live and %d expired custom tracks in %s.\n",
	liveCount, expiredCount, centralDbName);
}
Esempio n. 21
0
char *visiGeneHypertextGenotype(struct sqlConnection *conn, int id)
/* Return genotype of organism if any in nifty hypertext format. */
{
int genotypeId;
struct slName *geneIdList, *geneId;
char query[256];
struct dyString *html;

/* Look up genotype ID. */
sqlSafef(query, sizeof(query),
    "select specimen.genotype from image,specimen "
    "where image.id=%d and image.specimen = specimen.id", id);
genotypeId = sqlQuickNum(conn, query);
if (genotypeId == 0)
    return NULL;

/* Get list of genes involved. */
sqlSafef(query, sizeof(query),
    "select distinct allele.gene from genotypeAllele,allele "
    "where genotypeAllele.genotype=%d "
    "and genotypeAllele.allele = allele.id"
    , genotypeId);
geneIdList = sqlQuickList(conn, query);
if (geneIdList == NULL)
    return cloneString("wild type");

/* Loop through each gene adding information to html. */
html = dyStringNew(0);
for (geneId = geneIdList; geneId != NULL; geneId = geneId->next)
    {
    char *geneName;
    struct slName *alleleList, *allele;
    int alleleCount;
    boolean needsSlash = FALSE;

    /* Get gene name. */
    sqlSafef(query, sizeof(query), "select name from gene where id='%s'",
        geneId->name);
    geneName = sqlQuickString(conn, query);
    if (geneName == NULL)
        internalErr();

    /* Process each allele of gene. */
    sqlSafef(query, sizeof(query), 
    	"select allele.name from genotypeAllele,allele "
	"where genotypeAllele.genotype=%d "
	"and genotypeAllele.allele = allele.id "
	"and allele.gene=%s"
	, genotypeId, geneId->name);
    alleleList = sqlQuickList(conn, query);
    alleleCount = slCount(alleleList);
    for (allele = alleleList; allele != NULL; allele = allele->next)
        {
	char *simplifiedAllele = getSimplifiedAllele(geneName, allele->name);
	int repCount = 1, rep;
	if (alleleCount == 1)
	    repCount = 2;
	for (rep = 0; rep < repCount; ++rep)
	    {
	    if (needsSlash)
	        dyStringAppendC(html, '/');
	    else
	        needsSlash = TRUE;
	    dyStringAppend(html, geneName);
	    dyStringPrintf(html, "<SUP>%s</SUP>", simplifiedAllele);
	    }
	freeMem(simplifiedAllele);
	}

    if (geneId->next != NULL)
        dyStringAppendC(html, ' ');
    slFreeList(&alleleList);
    freeMem(geneName);
    }

slFreeList(&geneIdList);
return dyStringCannibalize(&html);
}
Esempio n. 22
0
void liftOverMerge(char *oldFile, char *newFile)
/* liftOverMerge - Merge regions in BED5  generated by liftOver -multiple */
{
struct bed *bedList = NULL, *bed = NULL, *otherBed = NULL, *nextBed = NULL;
struct bedList *bedListHeaders = NULL, *bedListHeader = NULL;
FILE *f = mustOpen(newFile, "w");

bedList = bedLoadNAll(oldFile, 5);

/* break down bed list into a list of lists, one per "region", where region
 * is the name field in the bed */
for (bed = bedList; bed != NULL; bed = nextBed)
    {
    verbose(3, "%s:%d-%d %s %d\n", bed->chrom, bed->chromStart, bed->chromEnd,
                                        bed->name, bed->score);
    if (bedListHeader == NULL || 
            differentString(bed->name, bedListHeader->name))
        {
        verbose(2, "region %s\n", bed->name);
        AllocVar(bedListHeader);
        bedListHeader->name = cloneString(bed->name);
        slAddHead(&bedListHeaders, bedListHeader);
        }
    nextBed = bed->next;
    slAddHead(&bedListHeader->bed, bed);
    }
slReverse(&bedListHeaders);

for (bedListHeader = bedListHeaders; bedListHeader != NULL; 
        bedListHeader = bedListHeader->next)
    {
    int ix = 1;
    verbose(3, "region %s\n", bedListHeader->name);
    slReverse(&bedListHeader->bed);

    /* traverse list of bed lists, merging overlapping entries 
     * for each region */
    for (bed = bedListHeader->bed; bed != NULL; bed = bed->next)
        {
        for (otherBed = bed->next; otherBed != NULL; otherBed = nextBed)
            {
            nextBed = otherBed->next;
            if (sameString(bed->chrom, otherBed->chrom) && 
                (max(bed->chromStart, otherBed->chromStart) <= 
                 min(bed->chromEnd, otherBed->chromEnd) + mergeGap))
                {
                /* these regions overlap (or are within the merge gap),
                 * so create one that is a merge, and drop the other */
                verbose(2,"merging %s:%d-%d, %s:%d-%d (overlap=%d)",
                    otherBed->chrom, otherBed->chromStart, otherBed->chromEnd,
                    bed->chrom, bed->chromStart, bed->chromEnd,
                    min(bed->chromEnd, otherBed->chromEnd) -
                        max(bed->chromStart, otherBed->chromStart)); 
                bed->chromStart = min(otherBed->chromStart, bed->chromStart);
                bed->chromEnd = max(otherBed->chromEnd, bed->chromEnd);
                verbose(2," to %s:%d-%d\n",
                        bed->chrom, bed->chromStart, bed->chromEnd);
                slRemoveEl(&bedListHeader->bed, otherBed);
                }
            }
        }
    for (otherBed = bedListHeader->bed; otherBed != NULL; 
            otherBed = otherBed->next)
        {
        otherBed->score = ix++;
        bedOutputN(otherBed, 5, f, '\t', '\n');
        }
    }
}
Esempio n. 23
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn;
    
FILE *inf;
FILE *o1;

char cond_str[256];
char *database;
char *proteinFileName;
char *outputFileName;
char *answer;
char *alias;

char *id;
char *chp0, *chp1, *chp2, *chp;

char *kgID;
char line[2000];

if (argc != 4) usage();
    
database         = cloneString(argv[1]);
proteinFileName  = cloneString(argv[2]);
outputFileName   = cloneString(argv[3]);

conn = hAllocConn(database);

o1 = mustOpen(outputFileName, "w");
    
if ((inf = mustOpen(proteinFileName, "r")) == NULL)
    {		
    fprintf(stderr, "Can't open file %s.\n", proteinFileName);
    exit(8);
    }
	
while (fgets(line, 1000, inf) != NULL)
    {
    chp = strstr(line, "ID   ");
    if (chp != line)
	{
	fprintf(stderr, "expected ID line, but got: %s\n", line);
	exit(1);
	} 
    chp = chp + strlen("ID   ");
    id = chp;
    chp = strstr(id, " ");
    *chp = '\0';
    id = strdup(id);
        
    sqlSafefFrag(cond_str, sizeof cond_str, "proteinID = '%s'", id);
    answer = sqlGetField(database, "knownGene", "name", cond_str);
    kgID = NULL;
    if (answer != NULL)
	{
	kgID = strdup(answer);
	}

    if (fgets(line, 1000, inf) == NULL) 
	{
	break;
	}
    do 
	{
	/* "//" signal end of a record */		
	if ((line[0] == '/') && (line[1] == '/')) break;

	// work on GN (Gene Name) line only
	chp = strstr(line, "GN   ");
	if (chp != NULL)
	    {
	    chp = line + strlen(line) -2;
	    if (*chp == '.') 
		{
		*chp = '\0';
		}
	    else
		{
		chp++;
		*chp = '\0';
		}
	    		
	    chp0 = line + 5;
	    while (chp0 != NULL)
	    	{
            	while (*chp0 == ' ') chp0++;

            	chp1 = strstr(chp0, " OR ");
            	chp2 = strstr(chp0, " AND ");

		chp = NULL;
		if (chp1 != NULL)
		    {
		    if (chp2 != NULL)
			{	
			if (chp1 < chp2)
			    {
			    chp = chp1;
			    }
			else
			    {
			    chp = chp2;
			    }
			}
		    else
			{
			chp = chp1;
			}
		    }

		if (chp2!= NULL)
		    {
		    if (chp1 != NULL)
			{	
			if (chp1 < chp2)
			    {
			    chp = chp1;
			    }
			else
			    {
			    chp = chp2;
			    }
			}
		    else
			{
			chp = chp2;
			}
		    }

            	if (chp == NULL)
            	    {
                    alias = strdup(chp0);
                    chp0 = NULL;
                    }
            	else 
                    {
                    *chp = '\0';
                    alias = strdup(chp0);
                    chp0 = chp+4;
                    }

 	    	if (kgID != NULL)
		    {
		    // clean up "(XXXX" or "XXXX)"
		    if (*alias == '(') alias++;
		    chp = strstr(alias, ")");
		    if (chp != NULL) *chp = '\0';

		    fprintf(o1, "%s\t%s\n", kgID, alias);
		    }
	    	}
	    }
    	} while (fgets(line, 1000, inf) != NULL);
    }
fclose(o1);
hFreeConn(&conn);
return(0);
}