Exemple #1
0
void pslSort(char *command, char *outFile, char *tempDir, char *inDirs[], int inDirCount)
/* Do the two step sort. */
{
int i;
struct slName *fileList = NULL, *name;
char *inDir;
struct slName *dirDir, *dirFile;
char fileName[512];
int fileCount;
int totalFilesProcessed = 0;
int filesPerMidFile;
int midFileCount = 0;
FILE *f;
struct lineFile *lf;
boolean doReflect = FALSE;
boolean suppressSelf = FALSE;
boolean firstOnly = endsWith(command, "1");
boolean secondOnly = endsWith(command, "2");

if (startsWith("dirs", command))
    ;
else if (startsWith("g2g", command))
    {
    doReflect = TRUE;
    suppressSelf = TRUE;
    }
else
    usage();


if (!secondOnly)
    {
    makeDir(tempDir);
    /* Figure out how many files to process. */
    for (i=0; i<inDirCount; ++i)
	{
	inDir = inDirs[i];
	dirDir = listDir(inDir, "*.psl");
	if (slCount(dirDir) == 0)
	    dirDir = listDir(inDir, "*.psl.gz");
	if (slCount(dirDir) == 0)
	    errAbort("No psl files in %s\n", inDir);
	verbose(1, "%s with %d files\n", inDir, slCount(dirDir));
	for (dirFile = dirDir; dirFile != NULL; dirFile = dirFile->next)
	    {
	    sprintf(fileName, "%s/%s", inDir, dirFile->name);
	    name = newSlName(fileName);
	    slAddHead(&fileList, name);
	    }
	slFreeList(&dirDir);
	}
    verbose(1, "%d files in %d dirs\n", slCount(fileList), inDirCount);
    slReverse(&fileList);
    fileCount = slCount(fileList);
    filesPerMidFile = round(sqrt(fileCount));
    // if (filesPerMidFile > 20)
	// filesPerMidFile = 20;  /* bandaide! Should keep track of mem usage. */
    verbose(1, "Got %d files %d files per mid file\n", fileCount, filesPerMidFile);

    /* Read in files a group at a time, sort, and write merged, sorted
     * output of one group. */
    name = fileList;
    while (totalFilesProcessed < fileCount)
	{
	int filesInMidFile = 0;
	struct psl *pslList = NULL, *psl;
	int lfileCount = 0;
	struct lm *lm = lmInit(256*1024);

	for (filesInMidFile = 0; filesInMidFile < filesPerMidFile && name != NULL;
	    ++filesInMidFile, ++totalFilesProcessed, name = name->next)
	    {
	    boolean reflectMe = FALSE;
	    if (doReflect)
		{
		reflectMe = !selfFile(name->name);
		}
	    verbose(2, "Reading %s (%d of %d)\n", name->name, totalFilesProcessed+1, fileCount);
	    lf = pslFileOpen(name->name);
	    while ((psl = nextLmPsl(lf, lm)) != NULL)
		{
		if (psl->qStart == psl->tStart && psl->strand[0] == '+' && 
		    suppressSelf && sameString(psl->qName, psl->tName))
		    {
		    continue;
		    }
		++lfileCount;
		slAddHead(&pslList, psl);
		if (reflectMe)
		    {
		    psl = mirrorLmPsl(psl, lm);
		    slAddHead(&pslList, psl);
		    }
		}
	    lineFileClose(&lf);
	    }
	slSort(&pslList, pslCmpQuery);
	makeMidName(tempDir, midFileCount, fileName);
	verbose(1, "Writing %s\n", fileName);
	f = mustOpen(fileName, "w");
	if (!nohead)
	    pslWriteHead(f);
	for (psl = pslList; psl != NULL; psl = psl->next)
	    {
	    pslTabOut(psl, f);
	    }
	fclose(f);
	pslList = NULL;
	lmCleanup(&lm);
	verbose(2, "lfileCount %d\n", lfileCount);
	++midFileCount;
	}
    }
if (!firstOnly)
    pslSort2(outFile, tempDir);
}
void sortRoughAlis(struct roughAli **pAli)
{
    slSort(pAli, cmpRoughAli);
}
Exemple #3
0
void txGeneCanonical(char *codingCluster, char *infoFile, 
	char *noncodingGraph, char *genesBed, char *nearCoding, 
	char *outCanonical, char *outIsoforms, char *outClusters)
/* txGeneCanonical - Pick a canonical version of each gene - that is the form
 * to use when just interested in a single splicing varient. Produces final
 * transcript clusters as well. */
{
/* Read in input into lists in memory. */
struct txCluster *coding, *codingList = txClusterLoadAll(codingCluster);
struct txGraph *graph, *graphList = txGraphLoadAll(noncodingGraph);
struct bed *bed, *nextBed, *bedList = bedLoadNAll(genesBed, 12);
struct txInfo *info, *infoList = txInfoLoadAll(infoFile);
struct bed *nearList = bedLoadNAll(nearCoding, 12);

/* Make hash of all beds. */
struct hash *bedHash = hashNew(18);
for (bed = bedList; bed != NULL; bed = bed->next)
    hashAdd(bedHash, bed->name, bed);

/* Make has of all info. */
struct hash *infoHash = hashNew(18);
for (info = infoList; info != NULL; info = info->next)
    hashAdd(infoHash, info->name, info);

/* Make a binKeeper structure that we'll populate with coding genes. */
struct hash *sizeHash = minChromSizeFromBeds(bedList);
struct hash *keeperHash = minChromSizeKeeperHash(sizeHash);

/* Make list of coding genes and toss them into binKeeper.
 * This will eat up bed list, but bedHash is ok. */
struct gene *gene, *geneList = NULL;
for (coding = codingList; coding != NULL; coding = coding->next)
    {
    gene = geneFromCluster(coding, bedHash, infoHash);
    slAddHead(&geneList, gene);
    struct binKeeper *bk = hashMustFindVal(keeperHash, gene->chrom);
    binKeeperAdd(bk, gene->start, gene->end, gene);
    }

/* Go through near-coding genes and add them to the coding gene
 * they most overlap. */
for (bed = nearList; bed != NULL; bed = nextBed)
    {
    nextBed = bed->next;
    gene = mostOverlappingGene(keeperHash, bed);
    if (gene == NULL)
        errAbort("%s is near coding, but doesn't overlap any coding!?", bed->name);
    geneAddBed(gene, bed);
    }

/* Add non-coding genes. */
for (graph = graphList; graph != NULL; graph = graph->next)
    {
    gene = geneFromGraph(graph, bedHash);
    slAddHead(&geneList, gene);
    }

/* Sort so it all looks nicer. */
slSort(&geneList, geneCmp);

/* Open up output files. */
FILE *fCan = mustOpen(outCanonical, "w");
FILE *fIso = mustOpen(outIsoforms, "w");
FILE *fClus = mustOpen(outClusters, "w");

/* Loop through, making up gene name, and writing output. */
int geneId = 0;
for (gene = geneList; gene != NULL; gene = gene->next)
    {
    /* Make up name. */
    char name[16];
    safef(name, sizeof(name), "g%05d", ++geneId);

    /* Reverse transcript list just to make it look better. */
    slReverse(&gene->txList);

    /* Write out canonical file output */
    bed = hashMustFindVal(bedHash, gene->niceTx->name);
    fprintf(fCan, "%s\t%d\t%d\t%d\t%s\t%s\n",
    	bed->chrom, bed->chromStart, bed->chromEnd, geneId,
	gene->niceTx->name, gene->niceTx->name);

    /* Write out isoforms output. */
    for (bed = gene->txList; bed != NULL; bed = bed->next)
        fprintf(fIso, "%d\t%s\n", geneId, bed->name);

    /* Write out cluster output, starting with bed 6 standard fields. */
    fprintf(fClus, "%s\t%d\t%d\t%s\t%d\t%c\t",
    	gene->chrom, gene->start, gene->end, name, 0, gene->strand);

    /* Write out thick-start/thick end. */
    if (gene->isCoding)
        {
	int thickStart = gene->end, thickEnd  = gene->start;
	for (bed = gene->txList; bed != NULL; bed = bed->next)
	    {
	    if (bed->thickStart < bed->thickEnd)
	        {
		thickStart = min(thickStart, bed->thickStart);
		thickEnd = max(thickEnd, bed->thickEnd);
		}
	    }
	fprintf(fClus, "%d\t%d\t", thickStart, thickEnd);
	}
    else
        {
	fprintf(fClus, "%d\t%d\t", gene->start, gene->start);
	}

    /* We got no rgb value, just write out zero. */
    fprintf(fClus, "0\t");

    /* Get exons from exonTree. */
    struct range *exon, *exonList = rangeTreeList(gene->exonTree);
    fprintf(fClus, "%d\t", slCount(exonList));
    for (exon = exonList; exon != NULL; exon = exon->next)
	fprintf(fClus, "%d,", exon->start - gene->start);
    fprintf(fClus, "\t");
    for (exon = exonList; exon != NULL; exon = exon->next)
	fprintf(fClus, "%d,", exon->end - exon->start);
    fprintf(fClus, "\t");

    /* Write out associated transcripts. */
    fprintf(fClus, "%d\t", slCount(gene->txList));
    for (bed = gene->txList; bed != NULL; bed = bed->next)
        fprintf(fClus, "%s,", bed->name);
    fprintf(fClus, "\t");

    /* Write out nice value */
    fprintf(fClus, "%s\t", gene->niceTx->name);

    /* Write out coding/noncoding value. */
    fprintf(fClus, "%d\n", gene->isCoding);
    }

/* Close up files. */
carefulClose(&fCan);
carefulClose(&fIso);
carefulClose(&fClus);
}
Exemple #4
0
struct trackDb *showTrackField(struct grp *selGroup, char *trackVar, char *trackScript,
                               boolean disableNoGenome)
/* Show track control. Returns selected track. */
{
struct trackDb *track, *selTrack = NULL;
if (trackScript == NULL)
    trackScript = "";
if (sameString(selGroup->name, "allTables"))
    {
    char *selDb = findSelDb();
    struct slName *dbList = getDbListForGenome(), *db;
    hPrintf("<B>database:</B>\n");
    hPrintf("<SELECT NAME=%s %s>\n", trackVar, trackScript);
    for (db = dbList; db != NULL; db = db->next)
	{
	hPrintf(" <OPTION VALUE=%s%s>%s\n", db->name,
		(sameString(db->name, selDb) ? " SELECTED" : ""),
		db->name);
	}
    hPrintf("</SELECT>\n");
    }
else
    {
    boolean allTracks = sameString(selGroup->name, "allTracks");
    hPrintf("<B>track:</B>\n");
    hPrintf("<SELECT NAME=\"%s\" %s>\n", trackVar, trackScript);
    if (allTracks)
        {
	selTrack = findSelectedTrack(fullTrackList, NULL, trackVar);
	slSort(&fullTrackList, trackDbCmpShortLabel);
	}
    else
	{
	selTrack = findSelectedTrack(fullTrackList, selGroup, trackVar);
	}
    boolean selTrackIsDisabled = FALSE;
    struct trackDb *firstEnabled = NULL;
    for (track = fullTrackList; track != NULL; track = track->next)
	{
	if (allTracks || sameString(selGroup->name, track->grp))
	    {
	    hPrintf(" <OPTION VALUE=\"%s\"", track->track);
            if (cartTrackDbIsNoGenome(database, track->table))
                hPrintf(NO_GENOME_CLASS);
            if (disableNoGenome && isNoGenomeDisabled(database, track->table))
                {
                hPrintf(" DISABLED");
                if (track == selTrack)
                    selTrackIsDisabled = TRUE;
                }
            else if (firstEnabled == NULL)
                firstEnabled = track;
            if (track == selTrack && !selTrackIsDisabled)
                hPrintf(" SELECTED");
            hPrintf(">%s</OPTION>", track->shortLabel);
	    }
	}
    if (selTrackIsDisabled)
        selTrack = firstEnabled;
    hPrintf("</SELECT>\n");
    }
hPrintf("\n");
return selTrack;
}
Exemple #5
0
static void showLinkedTables(struct joiner *joiner, struct dbTable *inList,
	char *varPrefix, char *buttonName, char *buttonText)
/* Print section with list of linked tables and check boxes to turn them
 * on. */
{
struct dbTable *outList = NULL, *out, *in;
char dtName[256];
struct hash *uniqHash = newHash(0);
struct hash *inHash = newHash(8);

/* Build up list of tables we link to in outList. */
for (in = inList; in != NULL; in = in->next)
    {
    struct sqlConnection *conn = NULL;
    if (!trackHubDatabase(database))
	conn = hAllocConn(in->db);
    struct joinerPair *jpList, *jp;

    /* Keep track of tables in inList. */
    safef(dtName, sizeof(dtName), "%s.%s", inList->db, inList->table);
    hashAdd(inHash, dtName, NULL);

    /* First table in input is not allowed in output. */
    if (in == inList)
        hashAdd(uniqHash, dtName, NULL);

    /* Scan through joining information and add tables,
     * avoiding duplicate additions. */
    jpList = joinerRelate(joiner, in->db, in->table);
    for (jp = jpList; jp != NULL; jp = jp->next)
        {
	safef(dtName, sizeof(dtName), "%s.%s",
		jp->b->database, jp->b->table);
	if (!hashLookup(uniqHash, dtName) &&
	   !cartTrackDbIsAccessDenied(jp->b->database, jp->b->table))
	    {
	    hashAdd(uniqHash, dtName, NULL);
	    out = dbTableNew(jp->b->database, jp->b->table);
	    slAddHead(&outList, out);
	    }
	}
    joinerPairFreeList(&jpList);
    hFreeConn(&conn);
    }
slSort(&outList, dbTableCmp);

/* Print html. */
if (outList != NULL)
    {
    webNewSection("Linked Tables");
    hTableStart();
    for (out = outList; out != NULL; out = out->next)
	{
	struct sqlConnection *conn = hAllocConn(out->db);
	struct asObject *asObj = asForTable(conn, out->table);
	char *var = dbTableVar(varPrefix, out->db, out->table);
	hPrintf("<TR>");
	hPrintf("<TD>");
	cgiMakeCheckBox(var, varOn(var));
	hPrintf("</TD>");
	hPrintf("<TD>%s</TD>", out->db);
	hPrintf("<TD>%s</TD>", out->table);
	hPrintf("<TD>");
	if (asObj != NULL)
	    hPrintf("%s", asObj->comment);
	else
	    hPrintf("&nbsp;");
	hPrintf("</TD>");
	hPrintf("</TR>");
	hFreeConn(&conn);
	}
    hTableEnd();
    hPrintf("<BR>");

    cgiMakeButton(buttonName, buttonText);
    }
}
Exemple #6
0
int main(int argc, char *argv[])
{
struct hash *bacHash;
char line[1024];
int lineCount;
char *words[256];
int wordCount;
int fileIx;
char *fileName;
FILE *f;

if (argc < 2)
    usage();
bacHash = newHash(16);

for (fileIx = 1; fileIx < argc; ++fileIx)
    {
    fileName = argv[fileIx];
    uglyf("Processing %s\n", fileName);
    f = mustOpen(fileName, "r");
    lineCount = 0;
    while (fgets(line, sizeof(line), f))
        {
        ++lineCount;
        wordCount = chopLine(line, words);
        if (wordCount == ArraySize(words))
            errAbort("Too many words line %d of %s\n", lineCount, fileName);
        if (wordCount != 0)
            {
            char *bacName;
            int cIx;
            struct contigTrack *ctList = NULL, *ct;
            struct bacTrack *bt;
            struct hashEl *hel;

            /* Check line syntax and parse it. */
            if (!sameString(words[1], "glues"))
                errAbort("Bad format line %d of %s\n", lineCount, fileName);
            bacName = words[2];
            for (cIx = 4; cIx < wordCount; cIx += 5)
                {
                char *parts[3];
                int partCount;

                AllocVar(ct);
                ct->ix = atoi(words[cIx]);
                ct->strand = words[cIx+1][0];
                ct->dir = words[cIx+2][0];
                partCount = chopString(words[cIx+3], "(-)", parts, ArraySize(parts));
                if (partCount != 2)
                    errAbort("Bad format line %d of %s\n", lineCount, fileName);
                ct->start = atoi(parts[0]);
                ct->end = atoi(parts[1]);
                ct->cookedScore = atof(words[cIx+4]);
                slAddHead(&ctList, ct);                
                }
            slSort(&ctList, cmpContigTrack);
        
            /* Lookup bacTrack and make it if new. */
            hel = hashLookup(bacHash, bacName);
            if (hel == NULL)
                {
                AllocVar(bt);
                hel = hashAdd(bacHash, bacName, bt);
                bt->name = hel->name;
                slAddHead(&bacList, bt);
                }
            else
                {
                bt = hel->val;
                }
            
            /* Process pairs into bacTrack. */
            addPairs(bt, ctList);
            slFreeList(&ctList);
            }
        }
    fclose(f);
    }
slSort(&bacList, cmpBacTrack);

printStats();
return 0;
}
Exemple #7
0
/*	Start at the calculated median point, scan through the
 *	coordinates and adjust the start and end of the clustered region
 *	to include the appropriate section.
 */
static int extendLimits(struct coordEl **coordListPt, unsigned median,
    unsigned querySize, unsigned *startExtended, unsigned *endExtended,
    char *ctgName, int partsConsidered)
{
struct coordEl *coord;
unsigned halfLength = querySize / 2;
boolean firstCoordinate = TRUE;
int partsUsed = 0;
int partsNotUsed = 0;
char *cloneName = (char *)NULL;
boolean tooManyParts = FALSE;

if (halfLength > median)
    *startExtended = 0;
else
    *startExtended = median - halfLength;

*endExtended = median + halfLength;
verbose(2,"# starting limits: %u - %u\n", *startExtended, *endExtended);

/*	sort the list descending by end coordinates	*/
slSort(coordListPt,endDescending);

if (coordListPt) coord = *coordListPt;
else coord = NULL;

/*	Walk through this list extending the start.
 *	Same discussion as below, although reverse the sense of start
 *	and end.  Here the list is sorted in descending order by end
 *	coordinate.  Those end coordinates are compared with the
 *	extending start coordinate to move it out.
 */
verbose(2,"# after end sort\n");

firstCoordinate = TRUE;
while (coord != NULL)
    {
    if (firstCoordinate)
	{
	if (*endExtended > coord->end)
	    {
	    *endExtended = coord->end;
	    verbose(2,"# end brought in to: %u\n", *endExtended);
	    }
	firstCoordinate = FALSE;
	}
    verbose(2,"# %s %u - %u %u %u %c\n", coord->name, coord->start, coord->end, *startExtended, coord->qSize, (coord->strand == 1) ? '+' : '-');
    if (coord->end < *startExtended)
	{
	unsigned gap = *startExtended - coord->end;

	if (gap > maxGap)
	    {
	    verbose(2,"# more than max Gap encountered: %u\n", gap);
	    break;	/*	exit this while loop	*/
	    }
	*startExtended = coord->start;
	verbose(2,"# start extended to: %u\n", *startExtended);
	}
    else if (coord->start < *startExtended)
	{
	*startExtended = coord->start;
	verbose(2,"# start extended to: %u\n", *startExtended);
	}

    coord = coord->next;
    }

/*	sort the list by start coordinates	*/
slSort(coordListPt,startCompare);

if (coordListPt) coord = *coordListPt;
else coord = NULL;

/*	Walk through this list extending the end.  The list is in order
 *	by start coordinates.  Going down that list checking the
 *	extended end with these start coordinates, eventually we reach a
 *	point where the start coordinates are past the end leaving a
 *	gap.  As long as the gap is within the specified maxGap limit,
 *	then it is OK to jump to that next piece.  The new end becomes
 *	the end of this new piece.
 *	And secondly, even if the starts aren't past the extending end,
 *	the piece under examination may have a new end that is longer,
 *	in which case the extending end moves to that point.
 */
verbose(2,"# extending end\n");

/*	The first coordinate check will ensure that the extended start
 *	coordinate is not less than the smallest start coordinate.
 *	Thus, only the actual part coverage will determine the maximum
 *	limits and we won't go beyond the parts.
 */
firstCoordinate = TRUE;
while (coord != NULL)
    {
    if (firstCoordinate)
	{
	if (*startExtended < coord->start)
	    {
	    *startExtended = coord->start;
	    verbose(2,"# start brought in to: %u\n", *startExtended);
	    }
	firstCoordinate = FALSE;
	}
    verbose(2,"# %s %u %u - %u %u %c\n", coord->name, *endExtended, coord->start, coord->end, coord->qSize, (coord->strand == 1) ? '+' : '-');
    if (coord->start > *endExtended)
	{
	unsigned gap = coord->start - *endExtended;

	if (gap > maxGap)
	    {
	    verbose(2,"# more than max Gap encountered: %u\n", gap);
	    break;	/*	exit this while loop	*/
	    }
	*endExtended = coord->end;
	verbose(2,"# end extended to: %u\n", *endExtended);
	}
    else if (coord->end > *endExtended)
	{
	*endExtended = coord->end;
	verbose(2,"# end extended to: %u\n", *endExtended);
	}
    coord = coord->next;
    }

/*	Let's count the number of parts included in the decided range */
if (coordListPt) coord = *coordListPt;
else coord = NULL;

partsUsed = 0;
partsNotUsed = 0;
while (coord != NULL)
{
if ( (coord->start >= *startExtended) && (coord->end <= *endExtended))
    ++partsUsed;
else
    ++partsNotUsed;
if (coord->next == NULL)
    cloneName = cloneString(coord->name);
coord = coord->next;
}

if (partsUsed < 1)
{
verbose(1,"# ERROR %s %s - no parts found in the answer, %d considered\n",
    ctgName, cloneName, partsNotUsed);
} else if (partsUsed > partsConsidered)
{
tooManyParts = TRUE;
verbose(1,"# ERROR %s %s too many parts used: %d > %d considered\n",
    ctgName, cloneName, partsUsed, partsConsidered);
} else if ((partsUsed + partsNotUsed) < 1)
{
verbose(1,"# ERROR %s %s - no parts found in the answer, used or unused\n",
    ctgName, cloneName);
} else
{
verbose(2,"# %s %s total parts considered %d, parts used %d, parts unused %d, fraction %% %7.2f\n",
	ctgName, cloneName, partsUsed+partsNotUsed, partsUsed, partsNotUsed,
	100.0 * (double) partsUsed / (double) (partsUsed+partsNotUsed) );
}

/*	If agp output, we need to do it here	*/
if (agp)
    {
    if (coordListPt) coord = *coordListPt;
    else coord = NULL;

    int cloneCount = 0;
    while (coord != NULL)
	{
	if ( (coord->start >= *startExtended) && (coord->end <= *endExtended))
	    {
	    ++cloneCount;
    /*	+1 to the start for 1 relative coordinates in the AGP file */
    /*	The status D will be fixed later	*/
    /*	These ones with tooManyParts need to be fixed elsewhere	*/
    /*	If allowDuplicates is requested, let them be used */
    if(tooManyParts && (! allowDuplicates) )
	{
	verbose(1,"#AGP %s\t%u\t%u\t%d\tD\t%s\t%u\t%u\t%c\n", ctgName,
	    coord->start+1, coord->end, cloneCount, coord->name,
	    coord->start - *startExtended + 1, coord->end - *startExtended,
	    (coord->strand == 1) ? '+' : '-');
	}
    else
	{
	printf("%s\t%u\t%u\t%d\tD\t%s\t%u\t%u\t%c\n", ctgName, coord->start+1,
	    coord->end, cloneCount, coord->name,
	    coord->start - *startExtended + 1, coord->end - *startExtended,
	    (coord->strand == 1) ? '+' : '-');
	}
	    }
	coord = coord->next;
	}
    }

freeMem(cloneName);
return (partsUsed);
}	/*	static int extendLimits() */
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, 
	struct hash *chromSizeHash, struct lm *lm, 
	int itemsPerSlot, struct bwgSection **pSectionList)
/* Parse out bedGraph section until we get to something that is not in bedGraph format. */
{
/* Set up hash and list to store chromosomes. */
struct hash *chromHash = hashNew(0);
struct bedGraphChrom *chrom, *chromList = NULL;

/* Collect lines in items on appropriate chromosomes. */
struct bwgBedGraphItem *item;
char *line;
while (lineFileNextReal(lf, &line))
    {
    /* Check for end of section. */
    if (stepTypeLine(line))
        {
	lineFileReuse(lf);
	break;
	}

    /* Parse out our line and make sure it has exactly 4 columns. */
    char *words[5];
    int wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 4, wordCount);

    /* Get chromosome. */
    char *chromName = words[0];
    chrom = hashFindVal(chromHash, chromName);
    if (chrom == NULL)
        {
	lmAllocVar(chromHash->lm, chrom);
	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
	chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
	slAddHead(&chromList, chrom);
	}

    /* Convert to item and add to chromosome list. */
    lmAllocVar(lm, item);
    item->start = lineFileNeedNum(lf, words, 1);
    item->end = lineFileNeedNum(lf, words, 2);
    item->val = lineFileNeedDouble(lf, words, 3);

    /* Do sanity checking on coordinates. */
    if (item->start > item->end)
        errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", 
		item->start, item->end, lf->lineIx, lf->fileName);
    if (item->end > chrom->size)
	{
        warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u",
	        lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&chrom->itemList, item);
	}
    }
slSort(&chromList, bedGraphChromCmpName);

for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    slSort(&chrom->itemList, bwgBedGraphItemCmp);

    /* Break up into sections of no more than items-per-slot size. */
    struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
    for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
	{
	/* Find end item of this section, and start item for next section.
	 * Terminate list at end item. */
	int sectionSize = 0;
	int i;
	endItem = startItem;
	for (i=0; i<itemsPerSlot; ++i)
	    {
	    if (nextStartItem == NULL)
		break;
	    endItem = nextStartItem;
	    nextStartItem = nextStartItem->next;
	    ++sectionSize;
	    }
	endItem->next = NULL;

	/* Fill in section and add it to section list. */
	struct bwgSection *section;
	lmAllocVar(lm, section);
	section->chrom = cloneString(chrom->name);
	section->start = startItem->start;
	section->end = endItem->end;
	section->type = bwgTypeBedGraph;
	section->items.bedGraphList = startItem;
	section->itemCount = sectionSize;
	slAddHead(pSectionList, section);
	}
    }

/* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in 
 * hash's memory. */
hashFree(&chromHash);
chromList = NULL;
}
struct bwgSection *bwgParseWig(
	char *fileName,       /* Name of ascii wig file. */
	boolean clipDontDie,  /* Skip items outside chromosome rather than aborting. */
	struct hash *chromSizeHash,  /* If non-NULL items checked to be inside chromosome. */
	int maxSectionSize,   /* Biggest size of a section.  100 - 100,000 is usual range. */
	struct lm *lm)	      /* Memory pool to allocate from. */
/* Parse out ascii wig file - allocating memory in lm. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct bwgSection *sectionList = NULL;

/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);

while (lineFileNextReal(lf, &line))
    {
    verbose(2, "processing %s\n", line);
    if (stringIn("chrom=", line))
	parseSteppedSection(lf, clipDontDie, chromSizeHash, line, lm, maxSectionSize, &sectionList);
    else
        {
	/* Check for bed... */
	char *dupe = cloneString(line);
	char *words[5];
	int wordCount = chopLine(dupe, words);
	if (wordCount != 4)
	    errAbort("Unrecognized line %d of %s:\n%s\n", lf->lineIx, lf->fileName, line);

	/* Parse out a bed graph line just to check numerical format. */
	char *chrom = words[0];
	int start = lineFileNeedNum(lf, words, 1);
	int end = lineFileNeedNum(lf, words, 2);
	double val = lineFileNeedDouble(lf, words, 3);
	verbose(2, "bedGraph %s:%d-%d@%g\n", chrom, start, end, val);

	/* Push back line and call bed parser. */
	lineFileReuse(lf);
	parseBedGraphSection(lf, clipDontDie, chromSizeHash, lm, maxSectionSize, &sectionList);
	}
    }
slSort(&sectionList, bwgSectionCmp);

/* Check for overlap. */
struct bwgSection *section, *nextSection;
for (section = sectionList; section != NULL; section = nextSection)
    {
    nextSection = section->next;
    if (nextSection != NULL)
        {
	if (sameString(section->chrom, nextSection->chrom))
	    {
	    if (section->end > nextSection->start)
	        {
		errAbort("There's more than one value for %s base %d (in coordinates that start with 1).\n",
		    section->chrom, nextSection->start+1);
		}
	    }
	}
    }

return sectionList;
}
Exemple #10
0
void doTransRegCodeProbe(struct trackDb *tdb, char *item,
	char *codeTable, char *motifTable,
	char *tfToConditionTable, char *conditionTable)
/* Display detailed info on a ChIP-chip probe from transRegCode experiments. */
{
char query[256];
struct sqlResult *sr;
char **row;
int rowOffset = hOffsetPastBin(database, seqName, tdb->table);
struct sqlConnection *conn = hAllocConn(database);
struct transRegCodeProbe *probe = NULL;

cartWebStart(cart, database, "ChIP-chip Probe Info");
sqlSafef(query, sizeof(query), "select * from %s where name = '%s'",
	tdb->table, item);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    probe = transRegCodeProbeLoad(row+rowOffset);
sqlFreeResult(&sr);
if (probe != NULL)
    {
    struct tfData *tfList = NULL, *tf;
    struct hash *tfHash = newHash(0);
    struct transRegCode *trc;
    int i;

    /* Print basic info. */
    printf("<B>Name:</B> %s<BR>\n", probe->name);
    printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd,
    	NULL, TRUE, probe->name);

    /* Make up list of all transcriptionFactors. */
    for (i=0; i<probe->tfCount; ++i)
        {
	/* Parse out factor and condition. */
	char *tfName = probe->tfList[i];
	char *condition = strchr(tfName, '_');
	struct tfCond *cond;
	if (condition != NULL)
	    *condition++ = 0;
	else
	    condition = "n/a";
	tf = hashFindVal(tfHash, tfName);
	if (tf == NULL)
	    {
	    AllocVar(tf);
	    hashAddSaveName(tfHash, tfName, tf, &tf->name);
	    slAddHead(&tfList, tf);
	    }
	AllocVar(cond);
	cond->name = cloneString(condition);
	cond->binding = probe->bindVals[i];
	slAddHead(&tf->conditionList, cond);
	}
    slSort(&tfList, tfDataCmpName);

    /* Fold in motif hits in region. */
    if (sqlTableExists(conn, codeTable))
        {
	sr = hRangeQuery(conn, codeTable,
		probe->chrom, probe->chromStart, probe->chromEnd,
		"chipEvidence != 'none'", &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	    {
	    trc = transRegCodeLoad(row+rowOffset);
	    tf = hashFindVal(tfHash, trc->name);
	    if (tf != NULL)
		slAddTail(&tf->trcList, trc);
	    }
	sqlFreeResult(&sr);
	}
    if (tfList == NULL)
	printf("No significant immunoprecipitation.");
    else
	{
	tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable);
	}
    transRegCodeProbeFree(&probe);
    growthConditionSection(conn, conditionTable);
    }
printf("\n<HR>\n");
printTrackHtml(tdb);
hFreeConn(&conn);
}
static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
	int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList)
/* Read the single column data in section until get to end. */
{
struct lm *lmLocal = lmInit(0);

/* Stream through section until get to end of file or next section,
 * adding values from single column to list. */
char *words[2];
char *line;
struct bwgVariableStepItem *item, *itemList = NULL;
int originalSectionSize = 0;
while (lineFileNextReal(lf, &line))
    {
    if (steppedSectionEnd(line, 2))
	{
        lineFileReuse(lf);
	break;
	}
    chopLine(line, words);
    lmAllocVar(lmLocal, item);
    int start = lineFileNeedNum(lf, words, 0);
    if (start <= 0)
	{
	errAbort("line %d of %s: zero or negative chromosome coordinate not allowed",
	    lf->lineIx, lf->fileName);
	}
    item->start = start - 1;
    item->val = lineFileNeedDouble(lf, words, 1);
    if (item->start + span > chromSize)
        {
	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
	    lf->lineIx, lf->fileName, chrom, chromSize, item->start + span);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
        {
	slAddHead(&itemList, item);
	++originalSectionSize;
	}
    }
slSort(&itemList, bwgVariableStepItemCmp);

/* Break up into sections of no more than items-per-slot size. */
int sizeLeft = originalSectionSize;
for (item = itemList; item != NULL; )
    {
    /* Figure out size of this section  */
    int sectionSize = sizeLeft;
    if (sectionSize > itemsPerSlot)
        sectionSize = itemsPerSlot;
    sizeLeft -= sectionSize;

    /* Convert from list to array representation. */
    struct bwgVariableStepPacked *packed, *p;		
    p = lmAllocArray(lm, packed, sectionSize);
    int i;
    for (i=0; i<sectionSize; ++i)
        {
	p->start = item->start;
	p->val = item->val;
	item = item->next;
	++p;
	}

    /* Fill in section and add it to list. */
    struct bwgSection *section;
    lmAllocVar(lm, section);
    section->chrom = chrom;
    section->start = packed[0].start;
    section->end = packed[sectionSize-1].start + span;
    section->type = bwgTypeVariableStep;
    section->items.variableStepPacked = packed;
    section->itemSpan = span;
    section->itemCount = sectionSize;
    slAddHead(pSectionList, section);
    }
lmCleanup(&lmLocal);
}
Exemple #12
0
static void tfBindLevelSection(struct tfData *tfList, struct sqlConnection *conn,
	char *motifTable, char *tfToConditionTable)
/* Print info on individual transcription factors that bind
 * with e-val between minVal and maxVal. */
{
struct tfData  *tf;
struct transRegCode *trc;

webNewSection("Transcription Factors Showing IP Over this Probe ");
hTableStart();
printf("<TR>");
colLabel("Transcription", 1);
colLabel("Growth Condition", 3);
colLabel("Motif Information", 3);
printf("</TR>\n");
printf("<TR>");
colLabel("Factor", 1);
colLabel("Good IP (P<0.001)", 1);
colLabel("Weak IP (P<0.005)", 1);
colLabel("No IP (P>0.005)", 1);
colLabel("Hits", 1);
colLabel("Scores", 1);
colLabel("Conservation (2 max)", 1);
printf("</TR>\n");

for (tf = tfList; tf != NULL; tf = tf->next)
    {
    struct hash *boundHash = newHash(8);
    slSort(&tf->conditionList, tfCondCmpName);
    printf("<TR>");

    /* Print transcription name. */
    printf("<TD>");
    sacCerHgGeneLinkName(conn, tf->name);
    printf("</TD>");

    /* Print stong and weak growth conditions. */
    ipPrintInRange(tf->conditionList, 0.0, 0.002, boundHash);
    ipPrintInRange(tf->conditionList, 0.002, 0.006, boundHash);

    /* Grab list of all conditions tested from database and
     * print out ones not in strong or weak as none. */
         {
	 char query[256], **row;
	 struct sqlResult *sr;
	 boolean isFirst = TRUE;
	 boolean gotAny = FALSE;
	 sqlSafef(query, sizeof(query),
	 	"select growthCondition from %s where name='%s'",
		tfToConditionTable, tf->name);
	 sr = sqlGetResult(conn, query);
	 printf("<TD>");
	 while ((row = sqlNextRow(sr)) != NULL)
	     {
	     if (!hashLookup(boundHash, row[0]))
	         {
		 if (isFirst)
		     isFirst = FALSE;
		 else
		     printf(", ");
		 printf("%s", row[0]);
		 gotAny = TRUE;
		 }
	     }
	 sqlFreeResult(&sr);
	if (!gotAny)
	    printf("&nbsp;");
	 printf("</TD>");
	 }


    /* Print motif info. */
    if (tf->trcList == NULL)
	printf("<TD>0</TD><TD>n/a</TD><TD>n/a</TD>\n");
    else
	{
	printf("<TD>%d</TD>", slCount(tf->trcList));
	/* Print scores. */
	printf("<TD>");
	for (trc = tf->trcList; trc != NULL; trc = trc->next)
	    {
	    double score;
	    if (trc != tf->trcList)
		printf(", ");
	    score = motifScoreHere(
		trc->chrom, trc->chromStart, trc->chromEnd,
		trc->name, motifTable);
	    transRegCodeAnchor(trc);
	    printf("%3.1f</A>", score);
	    }
	printf("</TD><TD>");
	for (trc = tf->trcList; trc != NULL; trc = trc->next)
	    {
	    if (trc != tf->trcList)
		printf(", ");
	    printf("%d", trc->consSpecies);
	    }
	printf("</TD>");
	}
    printf("</TR>\n");
    hashFree(&boundHash);
    }
hTableEnd();
}
struct nameOff *scanIntronFile(char *preIntronQ, char *startIntronQ, 
    char *endIntronQ, char *postIntronQ, boolean invert)
{
char intronFileName[600];
FILE *f;
char lineBuf[4*1024];
char *words[4*128];
int wordCount;
int lineCount = 0;
int preLenQ = strlen(preIntronQ);
int startLenQ = strlen(startIntronQ);
int endLenQ = strlen(endIntronQ);
int postLenQ = strlen(postIntronQ);
char *preIntronF, *startIntronF, *endIntronF, *postIntronF;
int preLenF, startLenF, endLenF, postLenF;
int preIx = 6, startIx = 7, endIx =8, postIx = 9;
struct nameOff *list = NULL, *el;
boolean addIt;
int i;

if (preLenQ > 25 || postLenQ > 25 || startLenQ > 40 || endLenQ > 40)
    {
    errAbort("Can only handle queries up to 25 bases on either side of the intron "
             "and 40 bases inside the intron.");
    }
sprintf(intronFileName, "%s%s", wormCdnaDir(), "introns.txt");
f = mustOpen(intronFileName, "r");
while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    {
    ++lineCount;
    wordCount = chopByWhite(lineBuf, words, ArraySize(words));
    if (wordCount == ArraySize(words))
        {
        warn("May have truncated end of line %d of %s",
            lineCount, intronFileName);
        }
    if (wordCount == 0)
        continue;
    if (wordCount < 11)
        errAbort("Unexpected short line %d of %s", lineCount, intronFileName);
    preIntronF = words[preIx];
    startIntronF = words[startIx];
    endIntronF = words[endIx];
    postIntronF = words[postIx];
    preLenF = strlen(preIntronF);
    startLenF = strlen(startIntronF);
    endLenF = strlen(endIntronF);
    postLenF = strlen(postIntronF);
    addIt = FALSE;
    if (   (  preLenQ == 0 || patMatch(preIntronQ, preIntronF+preLenF-preLenQ+countSpecial(preIntronQ), preLenQ))
        && (startLenQ == 0 || patMatch(startIntronQ, startIntronF, startLenQ))
        && (  endLenQ == 0 || patMatch(endIntronQ, endIntronF+endLenF-endLenQ+countSpecial(endIntronQ), endLenQ))
        && ( postLenQ == 0 || patMatch(postIntronQ, postIntronF, postLenQ)) )
        {
        addIt = TRUE;
        }
    if (invert)
        addIt = !addIt;
    if (addIt)
        {
        addIntronToHistogram(preIntronF+preLenF, startIntronF, endIntronF+endLenF, postIntronF);
        AllocVar(el);
        el->chrom = cloneString(words[1]);
        el->name = cloneString(words[5]);
        el->start = atoi(words[2]);
        el->end = atoi(words[3]);        
        el->cdnaCount = atoi(words[0]);
        memcpy(el->startI, startIntronF, 2);
        memcpy(el->endI, endIntronF + endLenF - 2, 2);
        assert(wordCount == el->cdnaCount + 10);
        for (i=10; i<wordCount; ++i)
            {
            struct slName *name = newSlName(words[i]);
            slAddHead(&el->cdnaNames, name);
            }
        slReverse(&el->cdnaNames);
        assert(slCount(el->cdnaNames) == el->cdnaCount);
        slAddHead(&list, el);
        }
    }
fclose(f);
slSort(&list, cmpCounts);
return list;
}
Exemple #14
0
void doFrame(struct sqlConnection *conn, boolean forceImageToList)
/* Make a html frame page.  Fill frame with thumbnail, control bar,
 * and image panes. */
{
int imageId = cartUsualInt(cart, hgpId, 0);
char *sidUrl = cartSidUrlString(cart);
char *listSpec = cartUsualString(cart, hgpListSpec, "");
struct tempName matchTempName;
char *matchFile = NULL;
struct visiMatch *matchList = visiSearch(conn, listSpec);
#ifdef SOON
if (!cartUsualBoolean(cart, hgpIncludeMutants, FALSE))
    matchList = removeMutants(conn, matchList);
#endif /* SOON */
matchList = onePerImageFile(conn, matchList);
weighMatches(conn, matchList);
slSort(&matchList, visiMatchCmpWeight);
if (forceImageToList)
    {
    if (matchList != NULL)
	imageId = matchList->imageId;
    else
        imageId = 0;
    }

trashDirFile(&matchTempName, "vg", "visiMatch", ".tab");

matchFile = matchTempName.forCgi;
saveMatchFile(matchFile, matchList);
cartSetString(cart, hgpMatchFile, matchFile);
cartSetInt(cart, hgpId, imageId);
//puts("\n");
puts("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\" \"http://www.w3.org/TR/html4/frameset.dtd\">");
printf("<HTML>\n");
printf("<HEAD>\n");
printf("<TITLE>\n");
printf("%s ", hgVisiGeneShortName());
printf("%s",titleMessage);
printf("</TITLE>\n");
printf("</HEAD>\n");


printf("  <frameset rows=\"27,*\">\n");
printf("    <frame name=\"controls\" src=\"%s?%s=go&%s&%s=%d\" noresize marginwidth=\"0\" marginheight=\"0\" frameborder=\"0\">\n",
    hgVisiGeneCgiName(), hgpDoControls, sidUrl, hgpId, imageId);
printf("  <frameset cols=\"230,*\"> \n");
printf("    <frame src=\"%s?%s=go&%s&%s=%d\" noresize frameborder=\"0\" name=\"list\">\n",
    hgVisiGeneCgiName(), hgpDoThumbnails, sidUrl, hgpId, imageId);
printf("    <frame src=\"%s?%s=go&%s&%s=%d\" name=\"image\" noresize frameborder=\"0\">\n",
    hgVisiGeneCgiName(), hgpDoImage, sidUrl, hgpId, imageId);
printf("  </frameset>\n");

printf("  <noframes>\n");
printf("  <body>\n");
printf("  <p>This web page uses frames, but your browser doesn't support them.</p>\n");
printf("  </body>\n");
printf("  </noframes>\n");
printf("</frameset>\n");

printf("</HTML>\n");
}
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
    double minVal,maxVal;
    struct chromGraph *el, *list;
    FILE *f;
    char *tempDir = ".";
    char path[PATH_LEN], gbdbPath[PATH_LEN];
    char *idTable = optionVal("idTable", NULL);
    char *pathPrefix = NULL;

    if (idTable == NULL)
        list = chromGraphLoadAll(fileName);
    else
        list = chromGraphListWithTable(fileName, db, idTable);
    if (list == NULL)
        errAbort("%s is empty", fileName);

    /* Figure out min/max values */
    minVal = maxVal = list->val;
    for (el = list->next; el != NULL; el = el->next)
    {
        if (optionExists("minusLog10"))
        {
            if (el->val == 1)
                el->val = 0;
            else if (el->val > 0)
                el->val = -1 * log(el->val)/log(10);
        }
        if (el->val < minVal)
            minVal = el->val;
        if (el->val > maxVal)
            maxVal = el->val;
    }


    /* Sort and write out temp file. */
    slSort(&list, chromGraphCmp);
    f = hgCreateTabFile(tempDir, track);
    for (el = list; el != NULL; el = el->next)
        chromGraphTabOut(el, f);

    if (doLoad)
    {
        struct dyString *dy = dyStringNew(0);
        struct sqlConnection *conn;

        /* Set up connection to database and create main table. */
        conn = hAllocConn(db);
        sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
        sqlRemakeTable(conn, track, dy->string);

        /* Load main table and clean up file handle. */
        hgLoadTabFile(conn, tempDir, track, &f);
        hgRemoveTabFile(tempDir, track);

        /* If need be create meta table.  If need be delete old row. */
        if (!sqlTableExists(conn, "metaChromGraph"))
            sqlUpdate(conn, metaCreateString);
        else
        {
            dyStringClear(dy);
            sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'",
                              track);
            sqlUpdate(conn, dy->string);
        }

        /* Make chrom graph file */
        safef(path, sizeof(path), "%s.cgb", track);
        chromGraphToBin(list, path);
        safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
        pathPrefix = optionVal("pathPrefix", path);
        safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

        /* Create new line in meta table */
        dyStringClear(dy);
        sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
                          track, minVal, maxVal, gbdbPath);
        sqlUpdate(conn, dy->string);
    }
}
Exemple #16
0
void checkExp(char *bedFileName, char *tNibDir, char *nibList)
{
struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL;
char *row[PSEUDOGENELINK_NUM_COLS] ;
struct pseudoGeneLink *ps;
char *tmpName[512], cmd[512];
struct axt *axtList = NULL, *axt, *mAxt = NULL;
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL;
struct nibInfo *qNib = NULL, *tNib = NULL;
FILE *op;
int ret;

if (nibHash == NULL)
    nibHash = hashNew(0);
while (lineFileNextRow(bf, row, ArraySize(row)))
    {
    struct misMatch *misMatchList = NULL;
    struct binKeeper *bk = NULL;
    struct binElement *el, *elist = NULL;
    struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ;
    struct misMatch *mf = NULL;
    ps = pseudoGeneLinkLoad(row);
    tmpName[0] = cloneString(ps->name);
    chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName));
    verbose(2,"name %s %s:%d-%d\n",
            ps->name, ps->chrom, ps->chromStart,ps->chromEnd);
    /* get expressed retro from hash */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        rPsl = el->val;
        verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd);
        }
    /* find mrnas that overlap parent gene */
    bk = hashFindVal(mrnaHash, ps->gChrom);
    elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        pPsl = el->val;
        verbose(2,"parent %s %s:%d %d,%d\n",
                pPsl->qName, pPsl->tName,pPsl->tStart,
                pPsl->match, pPsl->misMatch);
        }
    /* find self chain */
    bk = hashFindVal(chainHash, ps->chrom);
    elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ;
    slSort(&elist, chainCmpScoreDesc);
    for (el = elist; el != NULL ; el = el->next)
        {
        struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2;
        int qs = chain->qStart;
        int qe = chain->qEnd;
        int id = chain->id;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        if (!sameString(chain->qName , ps->gChrom) || 
                !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd))
            {
            verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd,
                ps->gChrom,ps->gStart,ps->gEnd);
            continue;
            }
        verbose(2,"chain id %d %4.0f",chain->id, chain->score);
        chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, 
            &subChain,  &retChainToFree);
        if (subChain != NULL)
            chain = subChain;
        chainSubsetOnQ(chain, ps->gStart, ps->gEnd, 
            &subChain,  &retChainToFree2);
        if (subChain != NULL)
            chain = subChain;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        verbose(2," %s:%d-%d %s:%d-%d ", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd);
        if (subChain != NULL)
            verbose(2,"subChain %s:%d-%d %s:%d-%d\n",
                    subChain->qName, subChain->qStart, subChain->qEnd, 
                    subChain->tName,subChain->tStart,subChain->tEnd);

	qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName);
	tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName);
	tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+');
	qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand);
	axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart,
	    maxGap, BIGNUM);
        verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList));
        for (axt = axtList; axt != NULL ; axt = axt->next)
            {
            addMisMatch(&misMatchList, axt, chain->qSize);
            }
        verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id);
        chainFree(&retChainToFree);
        chainFree(&retChainToFree2);
        break;
        }
    /* create axt of each expressed retroGene to parent gene */
        /* get alignment for each mrna overlapping retroGene */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ;
    {
    char queryName[512];
    char axtName[512];
    char pslName[512];
    safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom);
    safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom);
    safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom);
    op = fopen(pslName,"w");
    for (el = elist ; el != NULL ; el = el->next)
        {
        psl = el->val;
        pslOutput(psl, op, '\t','\n');
        qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);

        if (qSeq != NULL)
            slAddHead(&seqList, qSeq);
        else
            errAbort("seq %s not found \n", psl->qName);
        }
    fclose(op);
    faWriteAll(queryName, seqList);
    safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName);
    ret = system(cmd);
    if (ret != 0)
        errAbort("ret is %d %s\n",ret,cmd);
    verbose(2, "ret is %d %s\n",ret,cmd);
    af = lineFileOpen(axtName, TRUE);
    while ((axt = axtRead(af)) != NULL)
        slAddHead(&mAxt, axt);
    lineFileClose(&af);
    }
    slReverse(&mAxt);
    /* for each parent/retro pair, count bases matching retro and parent better */
    for (el = elist; el != NULL ; el = el->next)
        {
        int i, scoreRetro=0, scoreParent=0, scoreNeither=0;
        struct dyString *parentMatch = newDyString(16*1024);
        struct dyString *retroMatch = newDyString(16*1024);
        mPsl = el->val;

        if (mAxt != NULL)
            {
            verbose(2,"mrna %s %s:%d %d,%d axt %s\n",
                    mPsl->qName, mPsl->tName,mPsl->tStart,
                    mPsl->match, mPsl->misMatch, 
                    mAxt->qName);
            assert(sameString(mPsl->qName, mAxt->qName));
            for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++)
                {
                int j = mAxt->tStart - mPsl->tStart;
                verbose(5, "listLen = %d\n",slCount(&misMatchList));
                if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL)
                    {
                    if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match retro[%d] %d %c == %c parent %c %d\n",
                                i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], 
                                mf->parentBase, mf->parentLoc);
                        dyStringPrintf(retroMatch, "%d,", mf->retroLoc);
                        scoreRetro++;
                        }
                    else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match parent[%d] %d %c == %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        dyStringPrintf(parentMatch, "%d,", mf->parentLoc);
                        scoreParent++;
                        }
                    else
                        {
                        verbose (3,"match neither[%d] %d %c != %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        scoreNeither++;
                        }
                    }
                }
            verbose(2,"final score %s parent %d retro %d  neither %d\n",
                    mPsl->qName, scoreParent, scoreRetro, scoreNeither);
            fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n",
                    ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, 
                    mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, 
                    scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string);
            mAxt = mAxt->next;
            }
        dyStringFree(&parentMatch);
        dyStringFree(&retroMatch);
        }
    }
}
void writeMergers(struct cdnaAliList *calList, char *cdnaName, char *bacNames[])
/* Write out any mergers indicated by this cdna. This destroys calList. */
{
struct cdnaAliList *startBac, *endBac, *cal, *prevCal, *nextCal;
int bacCount;
int bacIx;
    
    {
    if (sameString(cdnaName, "R08304_AND_R08305"))
        {
        uglyf("Got you %s\n", cdnaName);
        }
    }

slSort(&calList, cmpCal);
for (startBac = calList; startBac != NULL; startBac = endBac)
    {
    /* Scan until find a cal that isn't pointing into the same BAC. */
    bacCount = 1;
    bacIx = startBac->bacIx;
    prevCal = startBac;
    for (cal =  startBac->next; cal != NULL; cal = cal->next)
        {
        if (cal->bacIx != bacIx)
            {
            prevCal->next = NULL;
            break;
            }
        ++bacCount;
        prevCal = cal;
        }
    endBac = cal;
    if (bacCount > 1)
        {
        while (startBac != NULL)
            {
            struct cdnaAliList *clumpList = NULL, *leftoverList = NULL;
            for (cal = startBac; cal != NULL; cal = nextCal)
                {
                nextCal = cal->next;
                if (noMajorOverlap(cal, clumpList))
                    {
                    slAddHead(&clumpList, cal);
                    }
                else
                    {
                    slAddHead(&leftoverList, cal);
                    }
                }
            slReverse(&clumpList);
            slReverse(&leftoverList);
            if (slCount(clumpList) > 1)
                {
                char lastStrand = 0;
                boolean switchedStrand = FALSE;
                if (!allSameContig(clumpList))
                    {
                    fprintf(mergerOut, "%s glues %s contigs", cdnaName, bacNames[bacIx]);
                    lastStrand = clumpList->strand;
                    for (cal = clumpList; cal != NULL; cal = cal->next)
                        {
                        if (cal->strand != lastStrand)
                            switchedStrand = TRUE;
                        fprintf(mergerOut, " %d %c %c' (%d-%d) %3.1f%%", cal->seqIx, cal->strand, 
                            cal->dir,
                            cal->start, cal->end, 100.0*cal->cookedScore);
                        }
                    fprintf(mergerOut, "\n");
                    }
                }
            freeCalList(&clumpList);
            startBac = leftoverList;
            }        
        }
    else
        {
        freeCalList(&startBac);
        }
    }
}
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut)
/* axtChain - Chain together axt alignments.. */
{
struct hash *pairHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct seqPair *spList = NULL, *sp;
FILE *f = mustOpen(chainOut, "w");
char *qName = "",  *tName = "";
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char qStrand = 0, tStrand = 0;
struct chain *chainList = NULL, *chain;
FILE *details = NULL;
struct dnaSeq *seq = NULL;
struct hash *qFaHash = newHash(0);
struct hash *tFaHash = newHash(0);
FILE *faF;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain");

if (detailsName != NULL)
    details = mustOpen(detailsName, "w");
/* Read input file and divide alignments into various parts. */
if (optionExists("psl"))
    spList = readPslBlocks(axtIn, pairHash, f);
else
    spList = readAxtBlocks(axtIn, pairHash, f);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    verbose(1, "reading query fasta sequence from '%s'\n", qNibDir);
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        hashAdd(qFaHash, seq->name, seq);
    fclose(faF);
    }
if (optionExists("faT"))
    {
    faF = mustOpen(tNibDir, "r");
    verbose(1, "reading target fasta sequence from '%s'\n", tNibDir);
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        hashAdd(tFaHash, seq->name, seq);
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    slReverse(&sp->blockList);
    removeExactOverlaps(&sp->blockList);
    verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList));
    if (optionExists("faQ"))
        {
        assert (qFaHash != NULL);
        loadFaSeq(qFaHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand, qNibDir);
        }
    else
	{
        loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand,
		&qName, &qSeq, &qStrand);
        }
    if (optionExists("faT"))
        {
        assert (tFaHash != NULL);
        loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand, tNibDir);
        }
    else
	{
        loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+',
		&tName, &tSeq, &tStrand);
	}
    chainPair(sp, qSeq, tSeq, &chainList, details);
    }
slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}
Exemple #19
0
static struct jsonWrite *rTdbToJw(struct trackDb *tdb, struct hash *fieldHash,
                                  struct hash *excludeTypesHash, int depth, int maxDepth)
/* Recursively build and return a new jsonWrite object with JSON for tdb and its children,
 * or NULL if tdb or all children have been filtered out by excludeTypesHash.
 * If excludeTypesHash is non-NULL, omit any tracks/views/subtracks with type in excludeTypesHash.
 * If fieldHash is non-NULL, include only the field names indexed in fieldHash. */
{
if (maxDepth >= 0 && depth > maxDepth)
    return NULL;
boolean doSubtracks = (tdb->subtracks && fieldOk("subtracks", fieldHash));
// If excludeTypesHash is given and tdb is a leaf track/subtrack, look up the first word
// of tdb->type in excludeTypesHash; if found, return NULL.
if (excludeTypesHash && !doSubtracks)
    {
    char typeCopy[PATH_LEN];
    safecpy(typeCopy, sizeof(typeCopy), tdb->type);
    if (hashLookup(excludeTypesHash, firstWordInLine(typeCopy)))
        return NULL;
    }
boolean gotSomething = !doSubtracks;
struct jsonWrite *jwNew = jsonWriteNew();
jsonWriteObjectStart(jwNew, NULL);
writeTdbSimple(jwNew, tdb, fieldHash);
if (tdb->parent && fieldOk("parent", fieldHash))
    {
    // We can't link to an object in JSON and better not recurse here or else infinite loop.
    if (tdbIsSuperTrackChild(tdb))
        {
        // Supertracks have been omitted from fullTrackList, so add the supertrack object's
        // non-parent/child info here.
        jsonWriteObjectStart(jwNew, "parent");
        writeTdbSimple(jwNew, tdb->parent, fieldHash);
        jsonWriteObjectEnd(jwNew);
        }
    else
        // Just the name so we don't have infinite loops.
        jsonWriteString(jwNew, "parent", tdb->parent->track);
    }
if (doSubtracks)
    {
    jsonWriteListStart(jwNew, "subtracks");
    slSort(&tdb->subtracks, trackDbViewCmp);
    struct trackDb *subTdb;
    for (subTdb = tdb->subtracks;  subTdb != NULL;  subTdb = subTdb->next)
        {
        struct jsonWrite *jwSub = rTdbToJw(subTdb, fieldHash, excludeTypesHash, depth+1, maxDepth);
        if (jwSub)
            {
            gotSomething = TRUE;
            jsonWriteAppend(jwNew, NULL, jwSub);
            jsonWriteFree(&jwSub);
            }
        }
    jsonWriteListEnd(jwNew);
    }
jsonWriteObjectEnd(jwNew);
if (! gotSomething)
    // All children were excluded; clean up and null out jwNew.
    jsonWriteFree(&jwNew);
return jwNew;
}
Exemple #20
0
void bamLoadItemsCore(struct track *tg, boolean isPaired)
/* Load BAM data into tg->items item list, unless zoomed out so far
 * that the data would just end up in dense mode and be super-slow. */
{
/* protect against temporary network error */
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
    {
    struct hash *pairHash = isPaired ? hashNew(18) : NULL;
    int minAliQual = atoi(cartOrTdbString(cart, tg->tdb, BAM_MIN_ALI_QUAL, BAM_MIN_ALI_QUAL_DEFAULT));
    char *colorMode = cartOrTdbString(cart, tg->tdb, BAM_COLOR_MODE, BAM_COLOR_MODE_DEFAULT);
    char *grayMode = cartOrTdbString(cart, tg->tdb, BAM_GRAY_MODE, BAM_GRAY_MODE_DEFAULT);
    char *userTag = cartOrTdbString(cart, tg->tdb, BAM_COLOR_TAG, BAM_COLOR_TAG_DEFAULT);
    int aliQualShadeMin = 0, aliQualShadeMax = 99, baseQualShadeMin = 0, baseQualShadeMax = 40;
    parseIntRangeSetting(tg->tdb, "aliQualRange", &aliQualShadeMin, &aliQualShadeMax);
    parseIntRangeSetting(tg->tdb, "baseQualRange", &baseQualShadeMin, &baseQualShadeMax);
    struct bamTrackData btd = {tg, pairHash, minAliQual, colorMode, grayMode, userTag,
			       aliQualShadeMin, aliQualShadeMax, baseQualShadeMin, baseQualShadeMax};

    char *fileName = trackDbSetting(tg->tdb, "bigDataUrl");
    if (fileName == NULL)
	{
	if (tg->customPt)
	    {
	    errAbort("bamLoadItemsCore: can't find bigDataUrl for custom track %s", tg->track);
	    }
	else
	    {
	    struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
	    fileName = bamFileNameFromTable(conn, tg->table, chromName);
	    hFreeConn(&conn);
	    }
	}

    char *fileName2 = hReplaceGbdb(fileName);

    char posForBam[512];
    safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, winStart, winEnd);
    char *cacheDir =  cfgOption("cramRef");
    char *refUrl = trackDbSetting(tg->tdb, "refUrl");
    if (!isPaired)
	bamFetchPlus(fileName2, posForBam, addBam, &btd, NULL, refUrl, cacheDir);
    else
	{
	char *setting = trackDbSettingClosestToHomeOrDefault(tg->tdb, "pairSearchRange", "20000");
	int pairSearchRange = atoi(setting);
	if (pairSearchRange > 0)
	    safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName,
		  max(0, winStart-pairSearchRange), winEnd+pairSearchRange);
	bamFetchPlus(fileName2, posForBam, addBamPaired, &btd, NULL, refUrl, cacheDir);
	struct hashEl *hel;
	struct hashCookie cookie = hashFirst(btd.pairHash);
	while ((hel = hashNext(&cookie)) != NULL)
	    {
	    struct linkedFeatures *lf = hel->val;
	    if (lf->start < winEnd && lf->end > winStart)
		slAddHead(&(tg->items), lfsFromLf(lf));
	    }
	}
    freez(&fileName2);

    if (tg->visibility != tvDense)
	{
	slReverse(&(tg->items));
	if (isPaired)
	    slSort(&(tg->items), linkedFeaturesSeriesCmp);
	else if (sameString(colorMode, BAM_COLOR_MODE_STRAND))
	    slSort(&(tg->items), linkedFeaturesCmpOri);
	else if (sameString(colorMode, BAM_COLOR_MODE_GRAY) &&
		 sameString(grayMode, BAM_GRAY_MODE_ALI_QUAL))
	    slSort(&(tg->items), linkedFeaturesCmpScore);
	else
	    slSort(&(tg->items), linkedFeaturesCmpStart);
	if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX)
	    {
	    // flag drawItems to make a mapBox for the whole track
	    tg->customInt = 1;
	    tg->mapItem = dontMapItem;
	    }
	}
    }
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    tg->networkErrMsg = cloneString(errCatch->message->string);
    tg->drawItems = bigDrawWarning;
    tg->totalHeight = bigWarnTotalHeight;
    }
errCatchFree(&errCatch);
}
struct mafAli *hgMafFrag(
	char *database,     /* Database, must already have hSetDb to this */
	char *track, 	    /* Name of MAF track */
	char *chrom, 	    /* Chromosome (in database genome) */
	int start, int end, /* start/end in chromosome */
	char strand, 	    /* Chromosome strand. */
	char *outName, 	    /* Optional name to use in first component */
	struct slName *orderList /* Optional order of organisms. */
	)
/* mafFrag- Extract maf sequences for a region from database.
 * This creates a somewhat unusual MAF that extends from start
 * to end whether or not there are actually alignments.  Where
 * there are no alignments (or alignments missing a species)
 * a . character fills in.   The score is always zero, and
 * the sources just indicate the species.  You can mafFree this
 * as normal. */
{
int chromSize = hChromSize(database, chrom);
struct sqlConnection *conn = hAllocConn(database);
struct dnaSeq *native = hChromSeq(database, chrom, start, end);
struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end);
char masterSrc[128];
struct hash *orgHash = newHash(10);
struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL;
int curPos = start, symCount = 0;
struct slName *name;
int order = 0;

/* Check that the mafs are really copacetic, the particular
 * subtype we think is in the database that this (relatively)
 * simple code can handle. */
safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom);
mafCheckFirstComponentSrc(mafList, masterSrc);
mafCheckFirstComponentStrand(mafList, '+');
slSort(&mafList, mafCmp);

/* Prebuild organisms if possible from input orderList. */
for (name = orderList; name != NULL; name = name->next)
    {
    AllocVar(org);
    slAddHead(&orgList, org);
    hashAddSaveName(orgHash, name->name, org, &org->name);
    org->dy = dyStringNew(native->size*1.5);
    org->order = order++;
    if (nativeOrg == NULL)
        nativeOrg = org;
    }
if (orderList == NULL)
    {
    AllocVar(org);
    slAddHead(&orgList, org);
    hashAddSaveName(orgHash, database, org, &org->name);
    org->dy = dyStringNew(native->size*1.5);
    if (nativeOrg == NULL)
        nativeOrg = org;
    }

/* Go through all mafs in window, mostly building up
 * org->dy strings. */
for (maf = mafList; maf != NULL; maf = maf->next)
    {
    struct mafComp *mc, *mcMaster = maf->components;
    struct mafAli *subMaf = NULL;
    order = 0;
    if (curPos < mcMaster->start)
	{
	fillInMissing(nativeOrg, orgList, native, start,
		curPos, mcMaster->start);
	symCount += mcMaster->start - curPos;
	}
    if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst
    						    * backtracking */
	{
	if (mafNeedSubset(maf, masterSrc, curPos, end))
	    {
	    subMaf = mafSubset(maf, masterSrc, curPos, end);
	    if (subMaf == NULL)
	        continue;
	    }
	else
	    subMaf = maf;
	for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order)
	    {
	    /* Extract name up to dot into 'orgName' */
	    char buf[128], *e, *orgName;

	    if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */
		continue;

	    mc->leftStatus = mc->rightStatus = 0; /* squash annotation */

	    e = strchr(mc->src, '.');
	    if (e == NULL)
		orgName = mc->src;
	    else
		{
		int len = e - mc->src;
		if (len >= sizeof(buf))
		    errAbort("organism/database name %s too long", mc->src);
		memcpy(buf, mc->src, len);
		buf[len] = 0;
		orgName = buf;
		}

	    /* Look up dyString corresponding to  org, and create a
	     * new one if necessary. */
	    org = hashFindVal(orgHash, orgName);
	    if (org == NULL)
		{
		if (orderList != NULL)
		   errAbort("%s is not in orderList", orgName);
		AllocVar(org);
		slAddHead(&orgList, org);
		hashAddSaveName(orgHash, orgName, org, &org->name);
		org->dy = dyStringNew(native->size*1.5);
		dyStringAppendMultiC(org->dy, '.', symCount);
		if (nativeOrg == NULL)
		    nativeOrg = org;
		}
	    if (orderList == NULL && order > org->order)
		org->order = order;
	    org->hit = TRUE;

	    /* Fill it up with alignment. */
	    dyStringAppendN(org->dy, mc->text, subMaf->textSize);
	    }
	for (org = orgList; org != NULL; org = org->next)
	    {
	    if (!org->hit)
		dyStringAppendMultiC(org->dy, '.', subMaf->textSize);
	    org->hit = FALSE;
	    }
	symCount += subMaf->textSize;
	curPos = mcMaster->start + mcMaster->size;
	if (subMaf != maf)
	    mafAliFree(&subMaf);
	}
    }
if (curPos < end)
    {
    fillInMissing(nativeOrg, orgList, native, start, curPos, end);
    symCount += end - curPos;
    }
mafAliFreeList(&mafList);

slSort(&orgList, oneOrgCmp);
if (strand == '-')
    {
    for (org = orgList; org != NULL; org = org->next)
	reverseComplement(org->dy->string, org->dy->stringSize);
    }

/* Construct our maf */
AllocVar(maf);
maf->textSize = symCount;
for (org = orgList; org != NULL; org = org->next)
    {
    struct mafComp *mc;
    AllocVar(mc);
    if (org == orgList)
        {
	if (outName != NULL)
	    {
	    mc->src = cloneString(outName);
	    mc->srcSize = native->size;
	    mc->strand = '+';
	    mc->start = 0;
	    mc->size = native->size;
	    }
	else
	    {
	    mc->src = cloneString(masterSrc);
	    mc->srcSize = chromSize;
	    mc->strand = strand;
	    if (strand == '-')
	       reverseIntRange(&start, &end, chromSize);
	    mc->start = start;
	    mc->size = end-start;
	    }
	}
    else
        {
	int size = countAlpha(org->dy->string);
	mc->src = cloneString(org->name);
	mc->srcSize = size;
	mc->strand = '+';
	mc->start = 0;
	mc->size = size;
	}
    mc->text = cloneString(org->dy->string);
    dyStringFree(&org->dy);
    slAddHead(&maf->components, mc);
    }
slReverse(&maf->components);

slFreeList(&orgList);
freeHash(&orgHash);
hFreeConn(&conn);
return maf;
}
void hgFindSpec(char *org, char *database, char *hgFindSpecName, char *sqlFile,
                char *hgRoot, boolean strict)
/* hgFindSpec - Create hgFindSpec table from text files. */
{
    struct hash *uniqHash = newHash(8);
    struct hash *htmlHash = newHash(8);
    struct hgFindSpec *hfsList = NULL, *hfs;
    char rootDir[512], orgDir[512], asmDir[512];
    char tab[512];
    snprintf(tab, sizeof(tab), "%s.tab", hgFindSpecName);

    /* Create track list from hgRoot and hgRoot/org and hgRoot/org/assembly
     * ra format database. */
    sprintf(rootDir, "%s", hgRoot);
    sprintf(orgDir, "%s/%s", hgRoot, org);
    sprintf(asmDir, "%s/%s/%s", hgRoot, org, database);
    layerOn(strict, database, asmDir, uniqHash, htmlHash, FALSE, &hfsList);
    layerOn(strict, database, orgDir, uniqHash, htmlHash, FALSE, &hfsList);
    layerOn(strict, database, rootDir, uniqHash, htmlHash, TRUE, &hfsList);
    slSort(&hfsList, hgFindSpecCmp);
    if (verboseLevel() > 0)
        printf("Loaded %d search specs total\n", slCount(hfsList));

    /* Write to tab-separated file. */
    {
        FILE *f = mustOpen(tab, "w");
        for (hfs = hfsList; hfs != NULL; hfs = hfs->next)
            hgFindSpecTabOut(hfs, f);
        carefulClose(&f);
    }

    /* Update database */
    {
        char *create, *end;
        char query[256];
        struct sqlConnection *conn = sqlConnect(database);

        /* Load in table definition. */
        readInGulp(sqlFile, &create, NULL);
        create = trimSpaces(create);
        create = subTrackName(create, hgFindSpecName);
        end = create + strlen(create)-1;
        if (*end == ';') *end = 0;
        sqlRemakeTable(conn, hgFindSpecName, create);

        /* Load in regular fields. */
        sqlSafef(query, sizeof query, "load data local infile '%s' into table %s", tab,
                 hgFindSpecName);
        sqlUpdate(conn, query);

        /* Load in settings fields. */
        for (hfs = hfsList; hfs != NULL; hfs = hfs->next)
        {
            if (hfs->settingsHash != NULL)
            {
                char *settings = settingsFromHash(hfs->settingsHash);
                updateBigTextField(conn, hgFindSpecName, "searchName",
                                   hfs->searchName,
                                   "searchSettings", settings);
                freeMem(settings);
            }
        }

        sqlDisconnect(&conn);
        if (verboseLevel() > 0)
            printf("Loaded database %s\n", database);
    }
}
Exemple #23
0
/* set the ordering for block were tree must be constructed */
static void orderTreeless(struct malnBlk *blk, struct Genome *treelessRootGenome) {
    sortTreelessRootGenome = treelessRootGenome;
    slSort(&(blk->comps), orderTreelessCmp);
    sortTreelessRootGenome = NULL;
}
struct g2cFile *loadG2cFile(char *fileName)
{
char lineBuf[1024*8];
int lineLen;
char *words[256*8];
int wordCount;
FILE *f;
int lineCount = 0;
struct g2cFile *gf = alloc(sizeof(*gf));
int hitCount = 0;
int cdnaCount = 0;
int geneCount = 0;

gf->name = fileName;
f = mustOpen(fileName, "r");
gf->cdnaHash = newHash(14);
while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    {
    ++lineCount;
    lineLen = strlen(lineBuf);
    if (lineLen >= sizeof(lineBuf) - 1)
        {
        errAbort("%s\nLine %d of %s too long, can only handle %d chars\n",
            lineBuf, lineCount, fileName, sizeof(lineBuf)-1);
        }
    wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words));
    if (wordCount > 0)
        {
        struct gene *gene = alloc(sizeof(*gene));
        char *geneName = words[0];
        int i;
        
        /* Create new gene struct and put it on list. */
        gene->name = cloneString(geneName);
        slAddHead(&gf->geneList, gene);
        ++geneCount;

        /* Put all cdna hits on gene. */
        for (i=1; i<wordCount; ++i)
            {
            struct cdnaHit *hit;
            struct cdnaVal *cdnaVal;
            struct hashEl *hel;
            char *cdnaName = words[i];

            /* Get cdna, or if it's the first time we've seen it
             * make up a data structure for it and hang it on
             * hash list and cdna list. */
            if ((hel = hashLookup(gf->cdnaHash, cdnaName)) == NULL)
                {
                cdnaVal = alloc(sizeof(*cdnaVal));
                hel = hashAdd(gf->cdnaHash, cdnaName, cdnaVal);
                cdnaVal->name = hel->name;
                slAddHead(&gf->cdnaList, cdnaVal);
                ++cdnaCount;
                }
            else
                {
                cdnaVal = hel->val;
                }
            ++cdnaVal->useCount;

            /* Make up new cdna hit and hang it on the gene. */
            hit = alloc(sizeof(*hit));
            hit->hel = hel;
            hit->name = hel->name;
            slAddHead(&gene->hitList, hit);
            ++hitCount;
            }
        slReverse(&gene->hitList);
        }    
    }
slReverse(&gf->geneList);
slSort(&gf->geneList, cmpName);
slSort(&gf->cdnaList, cmpName);
fclose(f);
reportHashStats(gf->cdnaHash);
printf("Loaded %s.  %d genes %d cdnas %d hits\n", fileName,
    geneCount, cdnaCount, hitCount);
return gf;
}
Exemple #25
0
void metaSortTags(struct meta *meta)
/* Do canonical sort so that the first tag stays first but the
 * rest are alphabetical. */
{
    slSort(&meta->tagList->next, metaTagValCmp);
}
void update(struct g2cFile *old, struct g2cFile *up)
{
struct gene *oldGene, *upGene;
struct cdnaHit *oldHit, *upHit;
struct hash *geneHash;
struct hashEl *hel;
int sameHitCount = 0;
int newHitCount = 0;
int newGeneCount = 0;
int updatedGeneCount = 0;
int altCount = 0;
struct geneFamily smallFamily;
struct geneFamily *family;

printf("Updating %s with %s\n", old->name, up->name);

/* Hash the existing gene names for faster lookup. */
geneHash = newHash(12);
for (oldGene = old->geneList; oldGene != NULL; oldGene = oldGene->next)
    hashAdd(geneHash, oldGene->name, oldGene);

for (upGene = up->geneList; upGene != NULL; upGene = upGene->next)
    {
    boolean changedGene = FALSE;
    if (isAltSplicedName(upGene->name))
        {
        family = getAltFamily(geneHash, upGene->name);
        ++altCount;
        }
    else
        {
        hel = hashLookup(geneHash, upGene->name);
        if (hel != NULL)
            {
            smallFamily.gene = hel->val;
            smallFamily.next = NULL;
            family = &smallFamily;
            }
        else
            family = NULL;
        }

    /* Set corresponding gene in old file to NULL until we
     * need to find it. */
    oldGene = NULL;
    for (upHit = upGene->hitList; upHit != NULL; upHit = upHit->next)
        {
        if ((oldHit = findHitInFamily(family, upHit->name)) != NULL)
            ++sameHitCount;
        else
            {
            if (oldGene == NULL)
                {
                /* We haven't found corresponding gene yet.  First
                 * look for it in the family. */
                struct geneFamily *member;
                for (member = family; member != NULL; member = member->next)
                    {
                    if (strcmp(member->gene->name, upGene->name) == 0)
                        {
                        oldGene = member->gene;
                        break;
                        }
                    }
                /* The corresponding gene doesn't exist yet. We
                 * have to make it up and hang it on the genelist
                 * for the file, the hash list, and the family list. */
                if (oldGene == NULL)
                    {
                    oldGene = alloc(sizeof(*oldGene));
                    oldGene->name = upGene->name;
                    slAddHead(&old->geneList, oldGene);
                    hashAdd(geneHash, oldGene->name, oldGene);
                    member = alloc(sizeof(*member));
                    member->gene = oldGene;
                    slAddHead(&family, member);
                    ++newGeneCount;
                    }
                }
            oldHit = alloc(sizeof(*oldHit));
            oldHit->name = upHit->name;
            oldHit->hel = hel;
            slAddHead(&oldGene->hitList, oldHit);
            ++newHitCount;
            changedGene = TRUE;
            }
        }
    if (changedGene)
        ++updatedGeneCount;
    }
slSort(&old->geneList, cmpName);
printf("Updated %d genes (including %d alt spliced ones) with %d cdna hits (%d hits unchanged) %d new genes\n",
    updatedGeneCount, altCount, newHitCount, sameHitCount, newGeneCount);
}
void sortFineAlis(struct fineAli **pAli)
{
    slSort(pAli, cmpFineAli);
}
void doMiddle()
{
struct hash *cvHash = raReadAll((char *)cvFile(), CV_TERM);
struct hashCookie hc = hashFirst(cvHash);
struct hashEl *hEl;
struct slList *termList = NULL;
struct hash *ra;
int totalPrinted = 0;
boolean excludeDeprecated = (cgiOptionalString("deprecated") == NULL);

// Prepare an array of selected terms (if any)
int requestCount = 0;
char **requested = NULL;
char *requestVal = termOpt;
char *queryBy = CV_TERM;
if (tagOpt)
    {
    requestVal = tagOpt;
    queryBy = CV_TAG;
    }
else if (targetOpt)
    {
    requestVal = targetOpt;
    queryBy = CV_TERM;  // request target is special: lookup term, convert to target, display target
    }
else if (labelOpt)
    {
    requestVal = labelOpt;
    queryBy = CV_LABEL;
    }
if (requestVal)
    {
    (void)stripChar(requestVal,'\"');
    requestCount = chopCommas(requestVal,NULL);
    requested = needMem(requestCount * sizeof(char *));
    chopByChar(requestVal,',',requested,requestCount);
    }

char *org = NULL;
// if the org is specified in the type (eg. cell line)
// then use that for the org, otherwise use the command line option,
// otherwise use human.
char *type = findType(cvHash,requested,requestCount,&queryBy, &org, FALSE);
if (org == NULL)
    org = organismOptLower;
if (org == NULL)
    org = ORG_HUMAN;

// Special logic for requesting antibody by target
if (targetOpt && requestCount > 0 && sameWord(queryBy,CV_TERM) && sameWord(type,CV_TERM_ANTIBODY))
    {
    // Several antibodies may have same target.
    // requested target={antibody} and found antibody
    // Must now convert each of the requested terms to its target before displaying all targets
    char **targets = convertAntibodiesToTargets(cvHash,requested,requestCount);
    if (targets != NULL)
        {
        freeMem(requested);
        requested = targets;
        queryBy = CV_TARGET;
        }
    }
//warn("Query by: %s = '%s' type:%s",queryBy,requestVal?requestVal:"all",type);

// Get just the terms that match type and requested, then sort them
if (differentWord(type,CV_TOT) || typeOpt != NULL ) // If type resolves to typeOfTerm and
    {                                               // typeOfTerm was not requested,
    while ((hEl = hashNext(&hc)) != NULL)           // then just show definition
        {
        ra = (struct hash *)hEl->val;
        char *thisType = (char *)cvTermNormalized(hashMustFindVal(ra,CV_TYPE));
        if (differentWord(thisType,type) && (requested == NULL
        ||  differentWord(thisType,CV_TERM_CONTROL)))
            continue;
        // Skip all rows that do not match queryBy param if specified
        if (requested)
            {
            char *val = hashFindVal(ra, queryBy);
            if (val == NULL)
                {
                // Special case for input that has no target
                if (sameString(queryBy, CV_TARGET))
                    val = hashMustFindVal(ra, CV_TERM);
                else
                    continue;
                }
            if (-1 == stringArrayIx(val,requested,requestCount))
                continue;
            }
        else if (excludeDeprecated)
            {
            if (hashFindVal(ra, "deprecated") != NULL)
                continue;
            }
        slAddTail(&termList, ra);
        }
    }
slSort(&termList, termCmp);

boolean described = doTypeDefinition(type,FALSE,(slCount(termList) == 0));
boolean sortable = (slCount(termList) > 5);
if (sortable)
    {
    webIncludeResourceFile("HGStyle.css");
    jsIncludeFile("jquery.js",NULL);
    jsIncludeFile("utils.js",NULL);
    printf("<TABLE class='sortable' border=1 CELLSPACING=0 style='border: 2px outset #006600; "
           "background-color:%s;'>\n",COLOR_BG_DEFAULT);
    }
else
    printf("<TABLE BORDER=1 BGCOLOR=%s CELLSPACING=0 CELLPADDING=2>\n",COLOR_BG_DEFAULT);
if (slCount(termList) > 0)
    {
    doTypeHeader(type, org,sortable);

    // Print out the terms
    while ((ra = slPopHead(&termList)) != NULL)
        {
        if (doTypeRow( ra, org ))
            totalPrinted++;
        }
    }
puts("</TBODY></TABLE><BR>");
if (sortable)
    jsInline("{$(document).ready(function() "
         "{sortTable.initialize($('table.sortable')[0],true,true);});}\n");
if (totalPrinted == 0)
    {
    if (!described)
        warn("Error: Unrecognised type (%s)\n", type);
    }
else if (totalPrinted > 1)
    printf("Total = %d\n", totalPrinted);
}
Exemple #29
0
void hgTrackDb(char *org, char *database, char *trackDbName, char *sqlFile, char *hgRoot,
               boolean strict)
/* hgTrackDb - Create trackDb table from text files. */
{
struct trackDb *td;
char tab[PATH_LEN];
safef(tab, sizeof(tab), "%s.tab", trackDbName);

struct trackDb *tdbList = buildTrackDb(org, database, hgRoot, strict);
tdbList = flatten(tdbList);
slSort(&tdbList, trackDbCmp);
verbose(1, "Loaded %d track descriptions total\n", slCount(tdbList));

/* Write to tab-separated file; hold off on html, since it must be encoded */
    {
    verbose(2, "Starting write of tabs to %s\n", tab);
    FILE *f = mustOpen(tab, "w");
    for (td = tdbList; td != NULL; td = td->next)
        {
        hVarSubstTrackDb(td, database);
        char *hold = td->html;
        td->html = "";
	subChar(td->type, '\t', ' ');	/* Tabs confuse things. */
	subChar(td->shortLabel, '\t', ' ');	/* Tabs confuse things. */
	subChar(td->longLabel, '\t', ' ');	/* Tabs confuse things. */
	trackDbTabOut(td, f);
        td->html = hold;
        }
    carefulClose(&f);
    verbose(2, "Wrote tab representation to %s\n", tab);
    }

/* Update database */
    {
    char *create, *end;
    char query[256];
    struct sqlConnection *conn = sqlConnect(database);

    /* Load in table definition. */
    readInGulp(sqlFile, &create, NULL);
    create = trimSpaces(create);
    create = substituteTrackName(create, trackDbName);
    end = create + strlen(create)-1;
    if (*end == ';') *end = 0;
    sqlRemakeTable(conn, trackDbName, create);

    /* Load in regular fields. */
    sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tab, trackDbName);
    verbose(2, "sending mysql \"%s\"\n", query);
    sqlUpdate(conn, query);
    verbose(2, "done tab file load");

    /* Load in html and settings fields. */
    for (td = tdbList; td != NULL; td = td->next)
	{
        if (isEmpty(td->html))
	    {
	    if (strict && !trackDbLocalSetting(td, "parent") && !trackDbLocalSetting(td, "superTrack") &&
	        !sameString(td->track,"cytoBandIdeo"))
		{
		fprintf(stderr, "Warning: html missing for %s %s %s '%s'\n",org, database, td->track, td->shortLabel);
		}
	    }
	else
	    {
	    updateBigTextField(conn,  trackDbName, "tableName", td->track, "html", td->html);
	    }
	if (td->settingsHash != NULL)
	    {
	    char *settings = settingsFromHash(td->settingsHash);
	    updateBigTextField(conn, trackDbName, "tableName", td->track,
	        "settings", settings);
	    if (showSettings)
		{
		verbose(1, "%s: type='%s';", td->track, td->type);
		if (isNotEmpty(settings))
		    {
		    char *oneLine = replaceChars(settings, "\n", "; ");
		    eraseTrailingSpaces(oneLine);
		    verbose(1, " %s", oneLine);
		    freeMem(oneLine);
		    }
		verbose(1, "\n");
		}
	    freeMem(settings);
	    }
	}

    sqlDisconnect(&conn);
    verbose(1, "Loaded database %s\n", database);
    }
}
Exemple #30
0
int main(int argc, char *argv[])
{
char *outName;
char xaFileName[512];
char region[64];
FILE *xaFile, *out;
struct xaAli *xaList = NULL, *xa;
char *sortBy;
char *subtitle;
int (*cmp)(const void *va, const void *vb);

if (argc != 3)
    {
    usage();
    }
sortBy = argv[1];
outName = argv[2];

if (sameWord(sortBy, "score"))
    {
    cmp = cmpXaScore;
    subtitle = "(sorted by alignment score)";
    }
else if (sameWord(sortBy, "briggsae"))
    {
    cmp = cmpXaQuery;
    subtitle = "(sorted by <I>C. briggsae</I> region)";
    }
else if (sameWord(sortBy, "elegans"))
    {
    cmp = cmpXaTarget;
    subtitle = "(sorted by <I>C. elegans</I> region)";
    }
else
    usage();

/* Read in alignment file. */
sprintf(xaFileName, "%s%s/all%s", wormXenoDir(), "cbriggsae", 
    xaAlignSuffix());
printf("Scanning %s\n", xaFileName);
xaFile = xaOpenVerify(xaFileName);
while ((xa = xaReadNext(xaFile, FALSE)) != NULL)
    {
    xa->milliScore = round(0.001 * xa->milliScore * (xa->tEnd - xa->tStart));
    freeMem(xa->qSym);
    freeMem(xa->tSym);
    freeMem(xa->hSym);
    slAddHead(&xaList, xa);
    }

/* Sort by score. */
printf("Sorting...");
slSort(&xaList, cmp);
printf(" best score %d\n", xaList->milliScore);

/* Write out .html */
printf("Writing %s\n", outName);
out = mustOpen(outName, "w");
htmStart(out, "C. briggsae/C. elegans Homologies");
fprintf(out, "<H2>Regions with Sequenced <I>C. briggsae</I> Homologs</H2>\n");
fprintf(out, "<H3>%s</H3>\n", subtitle);
fprintf(out, "<TT><PRE><B>");
fprintf(out, "Score  <I>C. elegans Region</I>     <I>C. briggsae</I> Region </B>\n");
fprintf(out, "--------------------------------------------------------\n");
for (xa = xaList; xa != NULL; xa = xa->next)
    {
    fprintf(out, "%6d ", xa->milliScore);
    sprintf(region, "%s:%d-%d", xa->target, xa->tStart, xa->tEnd);
    fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s\">%21s</A> %s:%d-%d %c", 
        region, region, xa->query, xa->qStart, xa->qEnd, xa->qStrand);
    fprintf(out, "\n");
    }
htmEnd(out);
return 0;
}