struct chromGraph *chromGraphListWithTable(char *fileName, char *db, char *table)
/* Read the chromGraph file and convert to the  chr,chromStart format. */
{
    struct sqlConnection *conn = sqlConnect(db);
    struct hash *posHash = NULL;
    struct lineFile *lf;
    struct chromGraph *list = NULL;
    char *words[2];
    checkTableForFields(conn, table);
    posHash = posHashFromTable(conn, table);
    sqlDisconnect(&conn);
    lf = lineFileOpen(fileName, TRUE);
    while (lineFileRow(lf, words))
    {
        struct chromGraph *cg;
        /* Look up ID in hash. */
        struct slPair *infoFromHash = hashFindVal(posHash, words[0]);
        if (!infoFromHash)
            warn("%s line %d: %s not found in %s table", fileName, lf->lineIx, words[0], table);
        else
        {
            AllocVar(cg);
            cg->chrom = cloneString(infoFromHash->name);
            cg->chromStart = ptToInt(infoFromHash->val);
            cg->val = sqlDouble(words[1]);
            slAddHead(&list, cg);
        }
    }
    slReverse(&list);
    /* Free stuff up. */
    lineFileClose(&lf);
    hashFreeWithVals(&posHash, slPairFree);
    return list;
}
Пример #2
0
void bwtool_lift(struct hash *options, char *favorites, char *regions, unsigned decimals,
		 enum wigOutType wot, char *bigfile, char *chainfile, char *outputfile)
/* bwtool_lift - main for lifting program */
{
    struct hash *sizeHash = NULL;
    struct hash *chainHash = readLiftOverMapChainHash(chainfile);
    struct hash *gpbw = NULL;
    char *size_file = hashFindVal(options, "sizes");
    char *bad_file = hashFindVal(options, "unlifted");
    if (size_file)
	sizeHash = readCsizeHash(size_file);
    else
	sizeHash = qSizeHash(chainfile);
    gpbw = genomePbw(sizeHash);
    struct metaBig *mb = metaBigOpen_check(bigfile, regions);
    char wigfile[512];
    safef(wigfile, sizeof(wigfile), "%s.tmp.wig", outputfile);
    FILE *out = mustOpen(wigfile, "w");
    struct hashEl *elList = hashElListHash(gpbw);
    struct hashEl *el;
    verbose(2,"starting first pass\n");
    do_pass1(mb, chainHash, gpbw);
    verbose(2, "starting second pass\n");
    do_pass2(mb, chainHash, gpbw);
    verbose(2,"starting final pass\n");
    do_final_pass(mb, chainHash, gpbw, bad_file);
    slSort(&elList, pbwHashElCmp);
    for (el = elList; el != NULL; el = el->next)
    {
	struct perBaseWig *pbw = (struct perBaseWig *)el->val;
	perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, FALSE);
    }
    hashElFreeList(&elList);
    carefulClose(&out);
    hashFreeWithVals(&chainHash, freeChainHashMap);
    hashFreeWithVals(&gpbw, perBaseWigFree);
    writeBw(wigfile, outputfile, sizeHash);
    hashFree(&sizeHash);
    remove(wigfile);
    metaBigClose(&mb);
}
Пример #3
0
static void allMakeDirFasta(char *regionsFile, char *hg18FastaFile, char *dir) {
	FILE *fp, *sq;
	char buf[500], dirName[500], seqName[500], chr1[500], chr2[500];
	int b1, e1, b2, e2, i, len, num;
	char ori1, ori2;
	struct hash *seqHash = NULL;
	struct dnaSeq *seq1, *seq2;
	struct stat st;
	DNA *s1, *s2;

	seqHash = faReadAllIntoHash(hg18FastaFile, dnaMixed);
	if (stat(dir, &st) != 0)
		do_cmd("mkdir %s", dir);

	fp = mustOpen(regionsFile, "r");
	i = 0;
	while (fgets(buf, 500, fp)) {
		if (sscanf(buf, "%[^:]:%d-%d %[^:]:%d-%d [%c %c] (%d)", chr1, &b1, &e1, chr2, &b2, &e2, &ori1, &ori2, &num) != 9)
			errAbort("error: %s", buf);
		++i;
		sprintf(dirName, "%s/R%d", dir, i);
		do_cmd("mkdir %s", dirName);
		sprintf(seqName, "%s/ref.fa", dirName);
		sq = mustOpen(seqName, "w");
		fprintf(sq, ">%s:%d-%d+%s:%d-%d[%c%c](%d)\n", chr1, b1, e1, chr2, b2, e2, ori1, ori2, num);
		seq1 = (struct dnaSeq *)hashFindVal(seqHash, chr1);
		assert(e1 <= seq1->size);
		len = e1 - b1 + 1;
		if (ori1 == '-') {
			s1 = cloneStringZExt(seq1->dna + b1 - 1, len, len+1);
			reverseComplement(s1, len);
			writeSeqWithBreaks(sq, s1, len, 80);
			freeMem(s1);
		}
		else
			writeSeqWithBreaks(sq, seq1->dna + b1 - 1, e1 - b1 + 1, 80);
		seq2 = (struct dnaSeq *)hashFindVal(seqHash, chr2);
		assert(e2 <= seq2->size);
		len = e2 - b2 + 1;
		if (ori2 == '-') {
			s2 = cloneStringZExt(seq2->dna + b2 - 1, len, len+1);
			reverseComplement(s2, len);
			writeSeqWithBreaks(sq, s2, len, 80);
			freeMem(s2);
		}
		else
			writeSeqWithBreaks(sq, seq2->dna + b2 - 1, e2 - b2 + 1, 80);
		fclose(sq);
	}
	fclose(fp);
	hashFreeWithVals(&seqHash, freeDnaSeq);
} 
void liftUpBedFromTable(char *db, char *table, char *inFile, char *outFile)
/* liftUpBedFromTable - Convert coordinate systems using a positional table in the database.. */
{
struct hash *posHash = positionHash(db, table);
struct bedPlus *bpList = bedPlusFileLoad(inFile);
if (optionExists("cds"))
    usingCds = TRUE;
if (optionExists("end"))
    usingEnd = TRUE;
liftAndOutput(posHash, bpList, outFile);
bedPlusFreeList(&bpList);
hashFreeWithVals(&posHash, slRefListAndBedFree);
}
Пример #5
0
void allWriteReadsToDir(char *regionFile, char *dir) {
	FILE *fp, *rd;
	char buf[500], readName[500], fileName[500], chr[50], fub[500];
	char str[2][500];
	char *readStr, *ch;
	int i, b, e, j, k;
	struct slName *ali;
	struct hashEl *el;
	struct rbTree *tr;
	struct range *rg;
	struct hash *localHash = NULL;

	fp = mustOpen(regionFile, "r");
	j = 0;
	while (fgets(buf, 500, fp)) {
		if (sscanf(buf, "%[^\t]\t%[^\t]\t%*s", str[0], str[1]) != 2)
			errAbort("error: %s", buf);
		++j;
		sprintf(fileName, "%s/R%d/reads.fq", dir, j);
		rd = mustOpen(fileName, "w");
		localHash = hashNew(8);
		for (i = 0; i < 2; i++) {
			if (sscanf(str[i], "%[^:]:%d-%d", chr, &b, &e) != 3)
				errAbort("error: %s", str[i]);
			el = hashLookup(aliHash, chr);
			tr = (struct rbTree *)(el->val);
			for (rg = rangeTreeAllOverlapping(tr, b, e); rg; rg = rg->next) {
				for (ali = (struct slName *)(rg->val); ali; ali = ali->next) {
					if (hashLookup(localHash, ali->name))
						continue;
					hashStoreName(localHash, ali->name);
					readStr = (char *)hashFindVal(readsHash, ali->name);
					if(readStr == NULL)
						continue;
					//assert(readStr);
					strcpy(fub, readStr);
					ch = strchr(fub, ' ');
					*ch = '\0';
					fprintf(rd, "@%s\n", ali->name);
					fprintf(rd, "%s\n", fub);
					++ch;
					fprintf(rd, "+%s\n", ali->name);
					fprintf(rd, "%s\n", ch);
         				strcpy(readName, ali->name);
					k = strlen(readName);
					/*
					if (readName[k-1] == '1')
						readName[k-1] = '2';
					else if (readName[k-1] == '2')
						readName[k-1] = '1';
					else
						errAbort("read identifier error: %s", readName);
						
					if (hashLookup(localHash, readName))
						continue;
					hashStoreName(localHash, readName);
					readStr = (char *)hashFindVal(readsHash, readName);

					assert(readStr);
					strcpy(fub, readStr);
					ch = strchr(fub, ' ');
					*ch = '\0';
					fprintf(rd, "@%s\n", readName);
					fprintf(rd, "%s\n", fub);
					++ch;
					fprintf(rd, "+%s\n", readName);
					fprintf(rd, "%s\n", ch); 
					*/
				}
			}
		}
		hashFree(&localHash);
		fclose(rd);
	}
	fclose(fp);
	hashFreeWithVals(&readsHash, freez);
	hashFreeWithVals(&aliHash, rbTreeFree);
}