void doTransRegCodeProbe(struct trackDb *tdb, char *item, char *codeTable, char *motifTable, char *tfToConditionTable, char *conditionTable) /* Display detailed info on a ChIP-chip probe from transRegCode experiments. */ { char query[256]; struct sqlResult *sr; char **row; int rowOffset = hOffsetPastBin(database, seqName, tdb->table); struct sqlConnection *conn = hAllocConn(database); struct transRegCodeProbe *probe = NULL; cartWebStart(cart, database, "ChIP-chip Probe Info"); sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) probe = transRegCodeProbeLoad(row+rowOffset); sqlFreeResult(&sr); if (probe != NULL) { struct tfData *tfList = NULL, *tf; struct hash *tfHash = newHash(0); struct transRegCode *trc; int i; /* Print basic info. */ printf("<B>Name:</B> %s<BR>\n", probe->name); printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd, NULL, TRUE, probe->name); /* Make up list of all transcriptionFactors. */ for (i=0; i<probe->tfCount; ++i) { /* Parse out factor and condition. */ char *tfName = probe->tfList[i]; char *condition = strchr(tfName, '_'); struct tfCond *cond; if (condition != NULL) *condition++ = 0; else condition = "n/a"; tf = hashFindVal(tfHash, tfName); if (tf == NULL) { AllocVar(tf); hashAddSaveName(tfHash, tfName, tf, &tf->name); slAddHead(&tfList, tf); } AllocVar(cond); cond->name = cloneString(condition); cond->binding = probe->bindVals[i]; slAddHead(&tf->conditionList, cond); } slSort(&tfList, tfDataCmpName); /* Fold in motif hits in region. */ if (sqlTableExists(conn, codeTable)) { sr = hRangeQuery(conn, codeTable, probe->chrom, probe->chromStart, probe->chromEnd, "chipEvidence != 'none'", &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { trc = transRegCodeLoad(row+rowOffset); tf = hashFindVal(tfHash, trc->name); if (tf != NULL) slAddTail(&tf->trcList, trc); } sqlFreeResult(&sr); } if (tfList == NULL) printf("No significant immunoprecipitation."); else { tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable); } transRegCodeProbeFree(&probe); growthConditionSection(conn, conditionTable); } printf("\n<HR>\n"); printTrackHtml(tdb); hFreeConn(&conn); }
void bedItemOverlapCount(struct hash *chromHash, char *infile, char *outfile){ unsigned maxChromSize = 0; unitSize *counts = (unitSize *)NULL; FILE *f = mustOpen(outfile, "w"); struct hashCookie hc = hashFirst(chromHash); struct hashEl *hel; while( (hel = hashNext(&hc)) != NULL) { unsigned num = (unsigned) ptToInt(hel->val); maxChromSize = max(num, maxChromSize); } verbose(2,"#\tmaxChromSize: %u\n", maxChromSize); if (maxChromSize < 1) errAbort("maxChromSize is zero ?"); /* Allocate just once for the largest chrom and reuse this array */ counts = needHugeMem(sizeof(unitSize) * maxChromSize); /* Reset the array to be zero to be reused */ memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); unsigned chromSize = 0; char *prevChrom = (char *)NULL; boolean outputToDo = FALSE; struct hash *seenHash = newHash(5); struct lineFile *bf = lineFileOpen(infile , TRUE); struct bed *bed = (struct bed *)NULL; char *row[12]; int numFields = doBed12 ? 12 : 3; while (lineFileNextRow(bf,row, numFields)) { int i; bed = bedLoadN(row, numFields); verbose(3,"#\t%s\t%d\t%d\n",bed->chrom,bed->chromStart, bed->chromEnd); if (prevChrom && differentWord(bed->chrom,prevChrom)) // End a chr { verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); outputToDo = FALSE; memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); /* zero counts */ freez(&prevChrom); // prevChrom is now NULL so it will be caught by next if! } if ((char *)NULL == prevChrom) // begin a chr { if (hashLookup(seenHash, bed->chrom)) errAbort("ERROR:input file not sorted. %s seen before on line %d\n", bed->chrom, bf->lineIx); hashAdd(seenHash, bed->chrom, NULL); prevChrom = cloneString(bed->chrom); chromSize = hashIntVal(chromHash, prevChrom); verbose(2,"#\tchrom %s starting, size %d\n", prevChrom,chromSize); } if (bed->chromEnd > chromSize) { // check for circular chrM if (doBed12 || bed->chromStart>=chromSize || differentWord(bed->chrom,"chrM")) { warn("ERROR: %s\t%d\t%d", bed->chrom, bed->chromStart, bed->chromEnd); errAbort("chromEnd > chromSize ? %d > %d", bed->chromEnd,chromSize); } for (i = bed->chromStart; i < chromSize; ++i) INCWOVERFLOW(counts,i); for (i = 0; i < (bed->chromEnd - chromSize); ++i) INCWOVERFLOW(counts,i); } else if (doBed12) { int *starts = bed->chromStarts; int *sizes = bed->blockSizes; int *endStarts = &bed->chromStarts[bed->blockCount]; for(; starts < endStarts; starts++, sizes++) { unsigned int end = *starts + *sizes + bed->chromStart; for (i = *starts + bed->chromStart; i < end; ++i) INCWOVERFLOW(counts,i); } } else { for (i = bed->chromStart; i < bed->chromEnd; ++i) INCWOVERFLOW(counts, i); } outputToDo = TRUE; bedFree(&bed); // plug the memory leak } lineFileClose(&bf); // Note, next file could be on same chr! if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); if (doOutBounds) fprintf(stderr, "min %lu max %lu\n", (unsigned long)overMin, (unsigned long)overMax); verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); carefulClose(&f); freeMem(counts); freez(&prevChrom); // hashFreeWithVals(&chromHash, freez); freeHash(&seenHash); }
void analyse(int start, int stop) { struct hash *hash; char line[512]; int lineCount = 0; char *words[32]; int wordCount; struct cdnaInfo *cdnaList = NULL; struct cdnaInfo *ci = NULL; int cdnaCount; int maxCdnaCount = stop - start; cdnaCount = 1; if (start > 1) { for (;;) { if (!fgets(line, sizeof(line), inFile)) errAbort("Not %d cDNAs in file, only %d\n", start, cdnaCount); ++lineCount; if (line[0] == '#') /* Skip comments. */ continue; wordCount = chopString(line, whiteSpaceChopper, words, ArraySize(words)); if (wordCount <= 0) /* Skip empty lines. */ continue; if (!differentWord(words[1], "alignments")) { ++cdnaCount; if (cdnaCount >= start) break; } } } cdnaCount = 0; hash = newHash(14); /* Hash table with 16k entries. */ for (;;) { if (!fgets(line, sizeof(line), inFile)) break; ++lineCount; if (line[0] == '#') /* Skip comments. */ continue; wordCount = chopString(line, whiteSpaceChopper, words, ArraySize(words)); if (wordCount <= 0) /* Skip empty lines. */ continue; if (wordCount < 4) /* Everyone else has at least four words. */ { errAbort("Short line %d:\n", lineCount); } if (sameWord(words[1], "Blasting")) { char *cdnaName = words[2]; if ((ci = lookupInfo(hash, cdnaName)) == NULL) { struct hashEl *hel; ci = needMem(sizeof(*ci)); hel = hashAdd(hash, cdnaName, ci); ci->next = cdnaList; cdnaList = ci; ci->ix = atoi(words[0]); ci->name = hel->name; } } else if (sameWord(words[2], "hits")) { /* Newer style - includes cDNA matching range. */ if (ci == NULL) continue; hitLine(ci, lineCount, words[0], words[1], words[3], words[4], words[5], words[9]); } else if (sameWord(words[1], "hits")) /* Older style - no cDNA matching range. */ { if (ci == NULL) continue; hitLine(ci, lineCount, words[0], NULL, words[2], words[3], words[4], words[8]); } else if (sameWord(words[1], "alignments")) { struct dnaSeq *cdnaSeq; struct wormCdnaInfo info; if (ci == NULL) continue; if (differentWord(ci->name, words[3])) errAbort("Line %d - %s is not %s", lineCount, words[3], ci->name); if (!ci->finished) { if (!anyCdnaSeq(ci->name, &cdnaSeq, &info)) { warn("Can't find cDNA %s", ci->name); ci->isDupe = TRUE; } else { ci->baseCount = cdnaSeq->size; ci->baseCrc = dnaCrc(cdnaSeq->dna, cdnaSeq->size); slReverse(&ci->roughAli); ci->roughScore = bestRoughScore(ci->roughAli); filterDupeCdna(ci, cdnaSeq); ci->isBackwards = (info.orientation == '-'); refineAlis(ci, cdnaSeq); ci->fineScore = bestFineScore(ci->fineAli); ci->isEmbryonic = info.isEmbryonic; ci->finished = TRUE; freeDnaSeq(&cdnaSeq); ++cdnaCount; if (cdnaCount >= maxCdnaCount) break; } } } else { errAbort("Can't deal with line %d\n", lineCount); } } slReverse(&cdnaList); doGoodBad(cdnaList); doUnusual(cdnaList); //makeCdnaToGene(cdnaList); /* Clean up. */ /* These two are slow and not really necessary. */ #ifdef FASTIDIOUS slFreeList(&cdnaList); freeHash(&hash); #endif uglyf("Done analyse\n"); }
struct mafAli *hgMafFrag( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ) /* mafFrag- Extract maf sequences for a region from database. * This creates a somewhat unusual MAF that extends from start * to end whether or not there are actually alignments. Where * there are no alignments (or alignments missing a species) * a . character fills in. The score is always zero, and * the sources just indicate the species. You can mafFree this * as normal. */ { int chromSize = hChromSize(database, chrom); struct sqlConnection *conn = hAllocConn(database); struct dnaSeq *native = hChromSeq(database, chrom, start, end); struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end); char masterSrc[128]; struct hash *orgHash = newHash(10); struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL; int curPos = start, symCount = 0; struct slName *name; int order = 0; /* Check that the mafs are really copacetic, the particular * subtype we think is in the database that this (relatively) * simple code can handle. */ safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom); mafCheckFirstComponentSrc(mafList, masterSrc); mafCheckFirstComponentStrand(mafList, '+'); slSort(&mafList, mafCmp); /* Prebuild organisms if possible from input orderList. */ for (name = orderList; name != NULL; name = name->next) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, name->name, org, &org->name); org->dy = dyStringNew(native->size*1.5); org->order = order++; if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, database, org, &org->name); org->dy = dyStringNew(native->size*1.5); if (nativeOrg == NULL) nativeOrg = org; } /* Go through all mafs in window, mostly building up * org->dy strings. */ for (maf = mafList; maf != NULL; maf = maf->next) { struct mafComp *mc, *mcMaster = maf->components; struct mafAli *subMaf = NULL; order = 0; if (curPos < mcMaster->start) { fillInMissing(nativeOrg, orgList, native, start, curPos, mcMaster->start); symCount += mcMaster->start - curPos; } if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst * backtracking */ { if (mafNeedSubset(maf, masterSrc, curPos, end)) { subMaf = mafSubset(maf, masterSrc, curPos, end); if (subMaf == NULL) continue; } else subMaf = maf; for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order) { /* Extract name up to dot into 'orgName' */ char buf[128], *e, *orgName; if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */ continue; mc->leftStatus = mc->rightStatus = 0; /* squash annotation */ e = strchr(mc->src, '.'); if (e == NULL) orgName = mc->src; else { int len = e - mc->src; if (len >= sizeof(buf)) errAbort("organism/database name %s too long", mc->src); memcpy(buf, mc->src, len); buf[len] = 0; orgName = buf; } /* Look up dyString corresponding to org, and create a * new one if necessary. */ org = hashFindVal(orgHash, orgName); if (org == NULL) { if (orderList != NULL) errAbort("%s is not in orderList", orgName); AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, orgName, org, &org->name); org->dy = dyStringNew(native->size*1.5); dyStringAppendMultiC(org->dy, '.', symCount); if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL && order > org->order) org->order = order; org->hit = TRUE; /* Fill it up with alignment. */ dyStringAppendN(org->dy, mc->text, subMaf->textSize); } for (org = orgList; org != NULL; org = org->next) { if (!org->hit) dyStringAppendMultiC(org->dy, '.', subMaf->textSize); org->hit = FALSE; } symCount += subMaf->textSize; curPos = mcMaster->start + mcMaster->size; if (subMaf != maf) mafAliFree(&subMaf); } } if (curPos < end) { fillInMissing(nativeOrg, orgList, native, start, curPos, end); symCount += end - curPos; } mafAliFreeList(&mafList); slSort(&orgList, oneOrgCmp); if (strand == '-') { for (org = orgList; org != NULL; org = org->next) reverseComplement(org->dy->string, org->dy->stringSize); } /* Construct our maf */ AllocVar(maf); maf->textSize = symCount; for (org = orgList; org != NULL; org = org->next) { struct mafComp *mc; AllocVar(mc); if (org == orgList) { if (outName != NULL) { mc->src = cloneString(outName); mc->srcSize = native->size; mc->strand = '+'; mc->start = 0; mc->size = native->size; } else { mc->src = cloneString(masterSrc); mc->srcSize = chromSize; mc->strand = strand; if (strand == '-') reverseIntRange(&start, &end, chromSize); mc->start = start; mc->size = end-start; } } else { int size = countAlpha(org->dy->string); mc->src = cloneString(org->name); mc->srcSize = size; mc->strand = '+'; mc->start = 0; mc->size = size; } mc->text = cloneString(org->dy->string); dyStringFree(&org->dy); slAddHead(&maf->components, mc); } slReverse(&maf->components); slFreeList(&orgList); freeHash(&orgHash); hFreeConn(&conn); return maf; }
/* main function */ int main_bismark (int argc, char *argv[]) { char *output, *outReportfile, *outCpGfile, *outbedGraphfile, *row[100], *samfilecopy; char *forwardcg, *forwardchg, *forwardchh, *forwardread, *forwardread1; char *reversecg, *reversechg, *reversechh, *reverseread, *reverseread1; unsigned long long int *cnt; unsigned long long int *cnt2 = NULL; int optSam = 0, c, optaddChr = 0, optStats = 0, optBis = 0, optFull = 0, optKeep = 0; unsigned int optisize = 500; int optcov = 5; char *optoutput = NULL; struct hash *cpgHash = newHash(0); struct hash *chgHash = newHash(0); struct hash *chhHash = newHash(0); time_t start_time, end_time; start_time = time(NULL); while ((c = getopt(argc, argv, "SCsbFBo:c:I:h?")) >= 0) { switch (c) { case 'S': optSam = 1; break; case 'C': optaddChr = 1; break; case 's': optStats = 1; break; case 'b': optBis = 1; break; case 'F': optFull = 1; break; case 'B': optKeep = 1; break; case 'c': optcov = (int)strtol(optarg, 0, 0); break; case 'I': optisize = (unsigned int)strtol(optarg, 0, 0); break; case 'o': optoutput = strdup(optarg); break; case 'h': case '?': return bismark_usage(); break; default: return 1; } } if (optind + 3 > argc) return bismark_usage(); char *chr_size_file = argv[optind]; char *cpg_bed_file = argv[optind+1]; char *sam_file = argv[optind+2]; fprintf(stderr, "* CpG file provided: %s\n", cpg_bed_file); fprintf(stderr, "* Insert size cutoff: %u\n", optisize); fprintf(stderr, "* Read coverage threshold: %i\n", optcov); struct hash *chrHash = hashNameIntFile(chr_size_file); samfilecopy = cloneString(sam_file); int numFields = chopByChar(samfilecopy, ',', row, ArraySize(row)); fprintf(stderr, "* Provided %i BAM/SAM file(s)\n", numFields); if(optFull) { fprintf(stderr, "* Warning: will run in Full mode, 8 track files and 1 report file will be generated\n"); fprintf(stderr, "* Warning: will output stats over each C (in CHG)\n"); fprintf(stderr, "* Warning: will output stats over each C (in CHH)\n"); optStats = 0; optBis = 1; } if(optStats) { fprintf(stderr, "* Warning: will report stats only as -s specified\n"); } // if use select bismark like output, read cpgHash at each C stats if(optBis) { fprintf(stderr, "* Warning: will output stats over each C (in CpG)\n"); cpgHash = cpgBed2BinKeeperHashBismark(chrHash, cpg_bed_file); }else{ fprintf(stderr, "* Warning: will output stats over each CpG\n"); cpgHash = cpgBed2BinKeeperHash(chrHash, cpg_bed_file); } if(optoutput) { output = optoutput; } else { output = cloneString(get_filename_without_ext(basename(row[0]))); } if(asprintf(&outCpGfile, "%s.CpG.bedGraph", output) < 0) errAbort("Mem Error.\n"); if(asprintf(&outbedGraphfile, "%s.density.bedGraph", output) < 0) errAbort("Mem Error.\n"); if (asprintf(&outReportfile, "%s.report", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&forwardcg, "%s.forward.CG.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&forwardchg, "%s.forward.CHG.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&forwardchh, "%s.forward.CHH.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&forwardread, "%s.forward.Density.bed", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&forwardread1, "%s.forward.Density.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&reversecg, "%s.reverse.CG.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&reversechg, "%s.reverse.CHG.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&reversechh, "%s.reverse.CHH.bedGraph", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&reverseread, "%s.reverse.Density.bed", output) < 0) errAbort("Preparing output wrong"); if (asprintf(&reverseread1, "%s.reverse.Density.bedGraph", output) < 0) errAbort("Preparing output wrong"); //sam file to bed file //fprintf(stderr, "* Parsing the SAM/BAM file\n"); cnt = bismarkBamParse(sam_file, chrHash, cpgHash, chgHash, chhHash, forwardread, reverseread, optSam, optaddChr, optFull, optisize); //write to file if (optFull){ fprintf(stderr, "* Output CpG methylation calls\n"); writecpgBismarkLite(cpgHash, forwardcg, reversecg, optcov); fprintf(stderr, "* Output CHG methylation calls\n"); writecpgBismarkLiteHash(chgHash, forwardchg, reversechg, optcov); fprintf(stderr, "* Output CHH methylation calls\n"); writecpgBismarkLiteHash(chhHash, forwardchh, reversechh, optcov); fprintf(stderr, "* Sorting methylation calls\n"); sortBedfile(forwardcg); sortBedfile(reversecg); sortBedfile(forwardchg); sortBedfile(reversechg); sortBedfile(forwardchh); sortBedfile(reversechh); fprintf(stderr, "* Sorting density bed\n"); sortBedfile(forwardread); sortBedfile(reverseread); fprintf(stderr, "* Generating density bedGraph\n"); bedItemOverlapCount(chrHash, forwardread, forwardread1); bedItemOverlapCount(chrHash, reverseread, reverseread1); }else{ cnt2 = writecpgBismark(cpgHash, outbedGraphfile, outCpGfile, optStats, optcov); //sort output if(!optStats) { fprintf(stderr, "* Sorting output density\n"); sortBedfile(outbedGraphfile); } //sort output if(!optStats) { fprintf(stderr, "* Sorting output CpG methylation call\n"); sortBedfile(outCpGfile); } } //generate bigWig //fprintf(stderr, "* Generating bigWig\n"); //bigWigFileCreate(outbedGraphfile, chr_size_file, 256, 1024, 0, 1, outbigWigfile); //bedGraphToBigWig(outbedGraphfile, chr_size_file, outbigWigfile); //write report file fprintf(stderr, "* Preparing report file\n"); writeReportBismark(outReportfile, cnt, cnt2, numFields, row, optBis, hashIntSum(chrHash)); if(!optKeep){ fprintf(stderr, "* Deleting (huge) density bed files\n"); unlink(forwardread); unlink(reverseread); } //cleaning hashFree(&chrHash); hashFree(&cpgHash); hashFree(&chgHash); hashFree(&chhHash); free(outCpGfile); free(outbedGraphfile); //free(outbigWigfile); free(outReportfile); free(samfilecopy); free(forwardcg); free(forwardchg); free(forwardchh); free(forwardread); free(forwardread1); free(reversecg); free(reversechg); free(reversechh); free(reverseread); free(reverseread1); end_time = time(NULL); fprintf(stderr, "* Done, time used %.0f seconds.\n", difftime(end_time, start_time)); return 0; }
void readCloneNames(struct lineFile *clf) /* read internal BAC clone names and Sanger sts names */ { struct alias *a = NULL; struct sanger *s = NULL; char *words[4], *name = NULL, *sanger = NULL, *extName = NULL; int i, rel; char sep = '|'; boolean found = FALSE, posFound = FALSE; /* alias hash is keyed by Sanger sts name */ aliasHash = newHash(16); /* hash of Sanger names keyed by external name */ sangerByExtNameHash = newHash(16); /* Read in all rows */ while (lineFileChopCharNext(clf, sep, words, 5)) { name = cloneString(words[0]); sanger = cloneString(words[1]); if (!sameString(words[2], "")) rel = sqlUnsigned(words[2]); else rel = 3; /* find external name for this internal name from the extNameHash */ if ((extName = hashFindVal(extNameHash, name)) == NULL) { /* if not found in BAC hash, then need to use internal name to make extName */ extName = translateName(name, FALSE); } if ((a = hashFindVal(aliasHash, sanger)) == NULL) { /* allocate memory for alias struct */ AllocVar(a); /* allocate memory for UniSTS IDs, aliases, internal and external names and relations */ /* and initialize the arrays */ AllocArray(a->uniStsId, (sizeof(char *) * NUMSANGER)); AllocArray(a->aliases, (sizeof(char *) * NUMALIASES)); AllocArray(a->extName, (sizeof(char *) * MAXSANGER)); AllocArray(a->intName, (sizeof(char *) * MAXSANGER)); AllocArray(a->relation, (sizeof(int) * MAXSANGER)); for (i = 0; i < NUMSANGER; i++) { a->uniStsId[i] = NULL; } for (i = 0; i < MAXSANGER; i++) { a->extName[i] = NULL; a->intName[i] = NULL; a->relation[i] = -1; } for (i = 0; i < NUMALIASES; i++) { a->aliases[i] = NULL; } } /* find empty slot in arrays to add external and internal names */ posFound = FALSE; for (i = 0; i < NUMALIASES && (!posFound); i++) { if (a->extName[i] == NULL) { posFound = TRUE; a->extName[i] = cloneString(extName); if (a->intName[i] == NULL) a->intName[i] = cloneString(name); else errAbort("For marker %s, the empty slot in the intName array is not the same as that for the extName array in the alias struct\n", extName); if (a->relation[i] == -1) a->relation[i] = rel; else errAbort("For marker %s, the empty slot in the relation array is not the same as that for the extName array in the alias struct\n", extName); } } a->sangerName = cloneString(sanger); a->primer1 = NULL; a->primer2 = NULL; /* add this alias struct to the hash keyed by sanger name */ hashAdd(aliasHash, sanger, a); /* add sanger name to hash keyed by external name */ if ((s = hashFindVal(sangerByExtNameHash, extName)) == NULL) { /* allocate memory for struct with array of Sanger names */ AllocVar(s); /* initialize the array */ for (i = 0; i < MAXSANGER; i++) { s->sangerName[i] = NULL; } } found = FALSE; for (i = 0; i < MAXSANGER && (!found); i++) { if (s->sangerName[i] == NULL) { found = TRUE; s->sangerName[i] = cloneString(sanger); } } /* add this list of sanger names to a hash keyed by external name, extName */ hashAdd(sangerByExtNameHash, extName, s); } }
static void clusterClone(int argc, char *argv[]) { int i; for (i=1; i < argc; ++i) { struct lineFile *lf; struct psl *psl; unsigned tSize; char *prevAccPart = (char *)NULL; char *prevAccName = (char *)NULL; char *prevTargetName = (char *)NULL; struct hashEl *el; struct hash *chrHash = newHash(0); struct hash *coordHash = newHash(0); struct coordEl *coord; struct coordEl **coordListPt = (struct coordEl **) NULL; unsigned querySize = 0; int partCount = 0; int partsConsidered = 0; verbose(2,"#\tprocess: %s\n", argv[i]); lf=pslFileOpen(argv[i]); while ((struct psl *)NULL != (psl = pslNext(lf)) ) { char *accName = (char *)NULL; char *targetName = (char *)NULL; int chrCount = 0; double percentCoverage; accName = cloneString(psl->qName); if ((char *)NULL == prevAccPart) { prevAccPart = cloneString(psl->qName); /* first time */ querySize = psl->qSize; ++partsConsidered; } chopSuffixAt(accName,'_'); if ((char *)NULL == prevAccName) prevAccName = cloneString(accName); /* first time */ if ((char *)NULL == prevTargetName) prevTargetName = cloneString(psl->tName); /* first time */ /* encountered a new accession name, process the one we * were working on */ if (differentWord(accName, prevAccName)) { if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found in %d parts considered\n", prevTargetName, prevAccName, partsConsidered); freeMem(prevAccName); prevAccName = cloneString(accName); freeHash(&chrHash); freeHash(&coordHash); chrHash = newHash(0); coordHash = newHash(0); querySize = 0; partCount = 0; partsConsidered = 0; } tSize = psl->tEnd - psl->tStart; percentCoverage = 100.0*((double)(tSize+1)/(psl->qSize + 1)); if (differentWord(psl->qName, prevAccPart)) { ++partsConsidered; querySize += psl->qSize; freeMem(prevAccPart); prevAccPart = cloneString(psl->qName); } targetName = cloneString(psl->tName); if (differentWord(targetName, prevTargetName)) { freeMem(prevTargetName); prevTargetName = cloneString(targetName); } /* keep a hash of chrom names encountered */ el = hashLookup(chrHash, targetName); if (el == NULL) { if (percentCoverage > minCover) { hashAddInt(chrHash, targetName, 1); chrCount = 1; } else { hashAddInt(chrHash, targetName, 0); chrCount = 0; } } else { if (percentCoverage > minCover) { chrCount = ptToInt(el->val) + 1; el->val=intToPt(chrCount); } } AllocVar(coord); coord->start = psl->tStart; coord->end = psl->tEnd; coord->qSize = psl->qSize; coord->strand = sameWord(psl->strand,"+") ? 1 : 0; /* when coverage is sufficient */ if (percentCoverage > minCover) { ++partCount; coord->name = cloneString(psl->qName); /* for each chrom name, accumulate a list of coordinates */ el = hashLookup(coordHash, targetName); if (el == NULL) { AllocVar(coordListPt); hashAdd(coordHash, targetName, coordListPt); } else { coordListPt = el->val; } slAddHead(coordListPt,coord); verbose(2,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } else { verbose(3,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } freeMem(accName); freeMem(targetName); pslFree(&psl); } if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found\n", prevTargetName, prevAccName); freeMem(prevAccName); freeHash(&chrHash); freeHash(&coordHash); lineFileClose(&lf); } } /* static void clusterClone() */
void hgFindSpec(char *org, char *database, char *hgFindSpecName, char *sqlFile, char *hgRoot, boolean strict) /* hgFindSpec - Create hgFindSpec table from text files. */ { struct hash *uniqHash = newHash(8); struct hash *htmlHash = newHash(8); struct hgFindSpec *hfsList = NULL, *hfs; char rootDir[512], orgDir[512], asmDir[512]; char tab[512]; snprintf(tab, sizeof(tab), "%s.tab", hgFindSpecName); /* Create track list from hgRoot and hgRoot/org and hgRoot/org/assembly * ra format database. */ sprintf(rootDir, "%s", hgRoot); sprintf(orgDir, "%s/%s", hgRoot, org); sprintf(asmDir, "%s/%s/%s", hgRoot, org, database); layerOn(strict, database, asmDir, uniqHash, htmlHash, FALSE, &hfsList); layerOn(strict, database, orgDir, uniqHash, htmlHash, FALSE, &hfsList); layerOn(strict, database, rootDir, uniqHash, htmlHash, TRUE, &hfsList); slSort(&hfsList, hgFindSpecCmp); if (verboseLevel() > 0) printf("Loaded %d search specs total\n", slCount(hfsList)); /* Write to tab-separated file. */ { FILE *f = mustOpen(tab, "w"); for (hfs = hfsList; hfs != NULL; hfs = hfs->next) hgFindSpecTabOut(hfs, f); carefulClose(&f); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = subTrackName(create, hgFindSpecName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, hgFindSpecName, create); /* Load in regular fields. */ sqlSafef(query, sizeof query, "load data local infile '%s' into table %s", tab, hgFindSpecName); sqlUpdate(conn, query); /* Load in settings fields. */ for (hfs = hfsList; hfs != NULL; hfs = hfs->next) { if (hfs->settingsHash != NULL) { char *settings = settingsFromHash(hfs->settingsHash); updateBigTextField(conn, hgFindSpecName, "searchName", hfs->searchName, "searchSettings", settings); freeMem(settings); } } sqlDisconnect(&conn); if (verboseLevel() > 0) printf("Loaded database %s\n", database); } }
struct g2cFile *loadG2cFile(char *fileName) { char lineBuf[1024*8]; int lineLen; char *words[256*8]; int wordCount; FILE *f; int lineCount = 0; struct g2cFile *gf = alloc(sizeof(*gf)); int hitCount = 0; int cdnaCount = 0; int geneCount = 0; gf->name = fileName; f = mustOpen(fileName, "r"); gf->cdnaHash = newHash(14); while (fgets(lineBuf, sizeof(lineBuf), f) != NULL) { ++lineCount; lineLen = strlen(lineBuf); if (lineLen >= sizeof(lineBuf) - 1) { errAbort("%s\nLine %d of %s too long, can only handle %d chars\n", lineBuf, lineCount, fileName, sizeof(lineBuf)-1); } wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words)); if (wordCount > 0) { struct gene *gene = alloc(sizeof(*gene)); char *geneName = words[0]; int i; /* Create new gene struct and put it on list. */ gene->name = cloneString(geneName); slAddHead(&gf->geneList, gene); ++geneCount; /* Put all cdna hits on gene. */ for (i=1; i<wordCount; ++i) { struct cdnaHit *hit; struct cdnaVal *cdnaVal; struct hashEl *hel; char *cdnaName = words[i]; /* Get cdna, or if it's the first time we've seen it * make up a data structure for it and hang it on * hash list and cdna list. */ if ((hel = hashLookup(gf->cdnaHash, cdnaName)) == NULL) { cdnaVal = alloc(sizeof(*cdnaVal)); hel = hashAdd(gf->cdnaHash, cdnaName, cdnaVal); cdnaVal->name = hel->name; slAddHead(&gf->cdnaList, cdnaVal); ++cdnaCount; } else { cdnaVal = hel->val; } ++cdnaVal->useCount; /* Make up new cdna hit and hang it on the gene. */ hit = alloc(sizeof(*hit)); hit->hel = hel; hit->name = hel->name; slAddHead(&gene->hitList, hit); ++hitCount; } slReverse(&gene->hitList); } } slReverse(&gf->geneList); slSort(&gf->geneList, cmpName); slSort(&gf->cdnaList, cmpName); fclose(f); reportHashStats(gf->cdnaHash); printf("Loaded %s. %d genes %d cdnas %d hits\n", fileName, geneCount, cdnaCount, hitCount); return gf; }
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut) /* axtChain - Chain together axt alignments.. */ { struct hash *pairHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct seqPair *spList = NULL, *sp; FILE *f = mustOpen(chainOut, "w"); char *qName = "", *tName = ""; struct dnaSeq *qSeq = NULL, *tSeq = NULL; char qStrand = 0, tStrand = 0; struct chain *chainList = NULL, *chain; FILE *details = NULL; struct dnaSeq *seq = NULL; struct hash *qFaHash = newHash(0); struct hash *tFaHash = newHash(0); FILE *faF; boolean qIsTwoBit = twoBitIsFile(qNibDir); boolean tIsTwoBit = twoBitIsFile(tNibDir); axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain"); if (detailsName != NULL) details = mustOpen(detailsName, "w"); /* Read input file and divide alignments into various parts. */ if (optionExists("psl")) spList = readPslBlocks(axtIn, pairHash, f); else spList = readAxtBlocks(axtIn, pairHash, f); if (optionExists("faQ")) { faF = mustOpen(qNibDir, "r"); verbose(1, "reading query fasta sequence from '%s'\n", qNibDir); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) hashAdd(qFaHash, seq->name, seq); fclose(faF); } if (optionExists("faT")) { faF = mustOpen(tNibDir, "r"); verbose(1, "reading target fasta sequence from '%s'\n", tNibDir); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) hashAdd(tFaHash, seq->name, seq); fclose(faF); } for (sp = spList; sp != NULL; sp = sp->next) { slReverse(&sp->blockList); removeExactOverlaps(&sp->blockList); verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList)); if (optionExists("faQ")) { assert (qFaHash != NULL); loadFaSeq(qFaHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand, qNibDir); } else { loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } if (optionExists("faT")) { assert (tFaHash != NULL); loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand, tNibDir); } else { loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', &tName, &tSeq, &tStrand); } chainPair(sp, qSeq, tSeq, &chainList, details); } slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { assert(chain->qStart == chain->blockList->qStart && chain->tStart == chain->blockList->tStart); chainWrite(chain, f); } carefulClose(&f); }
void bioImageLoad(char *setRaFile, char *itemTabFile) /* bioImageLoad - Load data into bioImage database. */ { struct hash *raHash = raReadSingle(setRaFile); struct hash *rowHash; struct lineFile *lf = lineFileOpen(itemTabFile, TRUE); char *line, *words[256]; struct sqlConnection *conn = sqlConnect(database); int rowSize; int submissionSetId; struct hash *fullDirHash = newHash(0); struct hash *screenDirHash = newHash(0); struct hash *thumbDirHash = newHash(0); struct hash *treatmentHash = newHash(0); struct hash *bodyPartHash = newHash(0); struct hash *sliceTypeHash = newHash(0); struct hash *imageTypeHash = newHash(0); struct hash *sectionSetHash = newHash(0); struct dyString *dy = dyStringNew(0); /* Read first line of tab file, and from it get all the field names. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s appears to be empty", lf->fileName); if (line[0] != '#') errAbort("First line of %s needs to start with #, and then contain field names", lf->fileName); rowHash = hashRowOffsets(line+1); rowSize = rowHash->elCount; if (rowSize >= ArraySize(words)) errAbort("Too many fields in %s", lf->fileName); /* Check that have all required fields */ { char *fieldName; int i; for (i=0; i<ArraySize(requiredSetFields); ++i) { fieldName = requiredSetFields[i]; if (!hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s", fieldName, setRaFile); } for (i=0; i<ArraySize(requiredItemFields); ++i) { fieldName = requiredItemFields[i]; if (!hashLookup(rowHash, fieldName)) errAbort("Field %s is not in %s", fieldName, itemTabFile); } for (i=0; i<ArraySize(requiredFields); ++i) { fieldName = requiredFields[i]; if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile); } } /* Create/find submission record. */ submissionSetId = saveSubmissionSet(conn, raHash); /* Process rest of tab file. */ while (lineFileNextRowTab(lf, words, rowSize)) { int fullDir = cachedId(conn, "location", "name", fullDirHash, "fullDir", raHash, rowHash, words); int screenDir = cachedId(conn, "location", "name", screenDirHash, "screenDir", raHash, rowHash, words); int thumbDir = cachedId(conn, "location", "name", thumbDirHash, "thumbDir", raHash, rowHash, words); int bodyPart = cachedId(conn, "bodyPart", "name", bodyPartHash, "bodyPart", raHash, rowHash, words); int sliceType = cachedId(conn, "sliceType", "name", sliceTypeHash, "sliceType", raHash, rowHash, words); int imageType = cachedId(conn, "imageType", "name", imageTypeHash, "imageType", raHash, rowHash, words); int treatment = cachedId(conn, "treatment", "conditions", treatmentHash, "treatment", raHash, rowHash, words); char *fileName = getVal("fileName", raHash, rowHash, words, NULL); char *submitId = getVal("submitId", raHash, rowHash, words, NULL); char *taxon = getVal("taxon", raHash, rowHash, words, NULL); char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL); char *age = getVal("age", raHash, rowHash, words, NULL); char *sectionSet = getVal("sectionSet", raHash, rowHash, words, ""); char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0"); char *gene = getVal("gene", raHash, rowHash, words, ""); char *locusLink = getVal("locusLink", raHash, rowHash, words, ""); char *refSeq = getVal("refSeq", raHash, rowHash, words, ""); char *genbank = getVal("genbank", raHash, rowHash, words, ""); char *priority = getVal("priority", raHash, rowHash, words, "200"); int sectionId = 0; int oldId; // char *xzy = getVal("xzy", raHash, rowHash, words, xzy); if (sectionSet[0] != 0 && !sameString(sectionSet, "0")) { struct hashEl *hel = hashLookup(sectionSetHash, sectionSet); if (hel != NULL) sectionId = ptToInt(hel->val); else { sqlUpdate(conn, "insert into sectionSet values(default)"); sectionId = sqlLastAutoId(conn); hashAdd(sectionSetHash, sectionSet, intToPt(sectionId)); } } dyStringClear(dy); dyStringAppend(dy, "select id from image "); dyStringPrintf(dy, "where fileName = '%s' ", fileName); dyStringPrintf(dy, "and fullLocation = %d", fullDir); oldId = sqlQuickNum(conn, dy->string); if (oldId != 0) { if (replace) { dyStringClear(dy); dyStringPrintf(dy, "delete from image where id = %d", oldId); sqlUpdate(conn, dy->string); } else errAbort("%s is already in database line %d of %s", fileName, lf->lineIx, lf->fileName); } dyStringClear(dy); dyStringAppend(dy, "insert into image set\n"); dyStringPrintf(dy, " id = default,\n"); dyStringPrintf(dy, " fileName = '%s',\n", fileName); dyStringPrintf(dy, " fullLocation = %d,\n", fullDir); dyStringPrintf(dy, " screenLocation = %d,\n", screenDir); dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir); dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId); dyStringPrintf(dy, " sectionSet = %d,\n", sectionId); dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx); dyStringPrintf(dy, " submitId = '%s',\n", submitId); dyStringPrintf(dy, " gene = '%s',\n", gene); dyStringPrintf(dy, " locusLink = '%s',\n", locusLink); dyStringPrintf(dy, " refSeq = '%s',\n", refSeq); dyStringPrintf(dy, " genbank = '%s',\n", genbank); dyStringPrintf(dy, " priority = %s,\n", priority); dyStringPrintf(dy, " taxon = %s,\n", taxon); dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo); dyStringPrintf(dy, " age = %s,\n", age); dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart); dyStringPrintf(dy, " sliceType = %d,\n", sliceType); dyStringPrintf(dy, " imageType = %d,\n", imageType); dyStringPrintf(dy, " treatment = %d\n", treatment); sqlUpdate(conn, dy->string); } }
void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { /* struct liftOverChain *chainList = NULL, *chain; */ char *userData; /* char *dataFile; */ char *dataFormat; char *organism; char *db; float minBlocks, minMatch; boolean multiple, fudgeThick; int minSizeQ, minSizeT; boolean refreshOnly = FALSE; /* char *err = NULL; */ struct liftOverChain *chainList = NULL, *choice; cart = theCart; if (cgiOptionalString(HGLFT_ERRORHELP_VAR)) { puts("<PRE>"); puts(liftOverErrHelp()); //system("/usr/bin/cal"); puts("</PRE>"); return; } /* Get data to convert - from userData variable, or if * that is empty from a file. */ if (cartOptionalString(cart, "SubmitFile")) userData = cartOptionalString(cart, HGLFT_DATAFILE_VAR); else userData = cartOptionalString(cart, HGLFT_USERDATA_VAR); dataFormat = cartCgiUsualString(cart, HGLFT_DATAFORMAT_VAR, DEFAULT_FORMAT); cartWebStart(cart, NULL, "Lift Genome Annotations"); getDbAndGenome(cart, &db, &organism, oldVars); chainList = liftOverChainListFiltered(); choice = defaultChoices(chainList, db); if (choice == NULL) errAbort("Sorry, no conversions available from this assembly\n"); minSizeQ = cartCgiUsualInt(cart, HGLFT_MINSIZEQ, choice->minSizeQ); minSizeT = cartCgiUsualInt(cart, HGLFT_MINSIZET, choice->minSizeT); minBlocks = cartCgiUsualDouble(cart, HGLFT_MINBLOCKS, choice->minBlocks); minMatch = cartCgiUsualDouble(cart, HGLFT_MINMATCH, choice->minMatch); fudgeThick = cartCgiUsualBoolean(cart, HGLFT_FUDGETHICK, (choice->fudgeThick[0]=='Y') ? TRUE : FALSE); multiple = cartCgiUsualBoolean(cart, HGLFT_MULTIPLE, (choice->multiple[0]=='Y') ? TRUE : FALSE); refreshOnly = cartCgiUsualInt(cart, HGLFT_REFRESHONLY_VAR, 0); webMain(choice, dataFormat, multiple); liftOverChainFreeList(&chainList); if (!refreshOnly && userData != NULL && userData[0] != '\0') { struct hash *chainHash = newHash(0); char *chainFile; struct tempName oldTn, mappedTn, unmappedTn; FILE *old, *mapped, *unmapped; char *line; int lineSize; char *fromDb, *toDb; int ct = 0, errCt = 0; /* read in user data and save to file */ makeTempName(&oldTn, HGLFT, ".user"); old = mustOpen(oldTn.forCgi, "w"); fputs(userData, old); fputs("\n", old); /* in case user doesn't end last line */ carefulClose(&old); chmod(oldTn.forCgi, 0666); /* setup output files -- one for converted lines, the other * for lines that could not be mapped */ makeTempName(&mappedTn, HGLFT, ".bed"); makeTempName(&unmappedTn, HGLFT, ".err"); mapped = mustOpen(mappedTn.forCgi, "w"); chmod(mappedTn.forCgi, 0666); unmapped = mustOpen(unmappedTn.forCgi, "w"); chmod(unmappedTn.forCgi, 0666); fromDb = cgiString(HGLFT_FROMDB_VAR); toDb = cgiString(HGLFT_TODB_VAR); chainFile = liftOverChainFile(fromDb, toDb); if (chainFile == NULL) errAbort("ERROR: Can't convert from %s to %s: no chain file loaded", fromDb, toDb); readLiftOverMap(chainFile, chainHash); if (sameString(dataFormat, WIGGLE_FORMAT)) /* TODO: implement Wiggle */ {} else if (sameString(dataFormat, POSITION_FORMAT)) { /* minSizeT here and in liftOverChain.c/h has been renamed minChainT in liftOver.c */ /* ignore multiple, it must be false when position is used */ ct = liftOverPositions(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minSizeT, 0, fudgeThick, mapped, unmapped, FALSE, NULL, &errCt); } else if (sameString(dataFormat, BED_FORMAT)) { /* minSizeT here and in liftOverChain.c/h has been renamed minChainT in liftOver.c */ ct = liftOverBed(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minSizeT, 0, fudgeThick, mapped, unmapped, multiple, NULL, &errCt); } else /* programming error */ errAbort("ERROR: Unsupported data format: %s\n", dataFormat); webNewSection("Results"); if (ct) { /* some records succesfully converted */ cgiParagraph(""); printf("Successfully converted %d record", ct); printf("%s: ", ct > 1 ? "s" : ""); printf("<A HREF=%s TARGET=_blank>View Conversions</A>\n", mappedTn.forCgi); } if (errCt) { /* some records not converted */ cgiParagraph(""); printf("Conversion failed on %d record", errCt); printf("%s. ", errCt > 1 ? "s" : ""); printf("<A HREF=%s TARGET=_blank>Display failure file</A> \n", unmappedTn.forCgi); printf("<A HREF=\"../cgi-bin/hgLiftOver?%s=1\" TARGET=_blank>Explain failure messages</A>\n", HGLFT_ERRORHELP_VAR); puts("<P>Failed input regions:\n"); struct lineFile *errFile = lineFileOpen(unmappedTn.forCgi, TRUE); puts("<BLOCKQUOTE><PRE>\n"); while (lineFileNext(errFile, &line, &lineSize)) puts(line); lineFileClose(&errFile); puts("</PRE></BLOCKQUOTE>\n"); } if (sameString(dataFormat, POSITION_FORMAT) && multiple) { puts("<BLOCKQUOTE><PRE>\n"); puts("Note: multiple checkbox ignored since it is not supported for position format."); puts("</PRE></BLOCKQUOTE>\n"); } carefulClose(&unmapped); } webDataFormats(); webDownloads(); cartWebEnd(); }
void faToTwoBit(char **RinFiles, char **RoutFile) /* Convert inFiles in fasta format to outfile in 2 bit * format. */ { struct twoBit *twoBitList = NULL, *twoBit; int i; struct hash *uniqHash = newHash(18); FILE *f; //int inFileCount = RinFileCount[0]; char *inFiles=RinFiles[0]; char *outFile = RoutFile[0]; char *delim = "@"; char *ptr= NULL; //for (i=0; i<inFileCount; ++i) // { i = 0; if ((ptr = strtok(inFiles, delim)) != NULL) { do { i++; char *fileName=ptr; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dnaSeq seq; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { if (seq.size == 0) { warn("Skipping item %s which has no sequence.\n",seq.name); continue; } /* strip off version number */ if (stripVersion) { char *sp = NULL; sp = strchr(seq.name,'.'); if (sp != NULL) *sp = '\0'; } if (hashLookup(uniqHash, seq.name)) { if (!ignoreDups) errAbort("Duplicate sequence name %s", seq.name); else continue; } hashAdd(uniqHash, seq.name, NULL); if (noMaskFT) faToDna(seq.dna, seq.size); else unknownToN(seq.dna, seq.size); twoBit = twoBitFromDnaSeq(&seq, !noMaskFT); slAddHead(&twoBitList, twoBit); } lineFileClose(&lf); } while ((ptr = strtok(NULL, delim)) != NULL); } slReverse(&twoBitList); f = mustOpen(outFile, "wb"); twoBitWriteHeader(twoBitList, f); for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { twoBitWriteOne(twoBit, f); } carefulClose(&f); }
void testOneTable(struct htmlPage *trackPage, char *org, char *db, char *group, char *track, char *table) /* Test stuff on one table if we haven't already tested this table. */ { /* Why declared here and not globally? */ static struct hash *uniqHash = NULL; char fullName[256]; if (uniqHash == NULL) uniqHash = newHash(0); safef(fullName, sizeof(fullName), "%s.%s", db, table); if (!hashLookup(uniqHash, fullName)) { struct htmlPage *tablePage; struct htmlForm *mainForm; hashAdd(uniqHash, fullName, NULL); verbose(1, "Testing %s %s %s %s %s\n", naForNull(org), db, group, track, table); tablePage = quickSubmit(trackPage, org, db, group, track, table, "selectTable", hgtaTable, table); if (!isObsolete(table) && tablePage != NULL) { if ((mainForm = htmlFormGet(tablePage, "mainForm")) == NULL) { qaStatusSoftError(tablesTestList->status, "Couldn't get main form on tablePage for %s %s %s %s", db, group, track, table); } else { testSchema(tablePage, mainForm, org, db, group, track, table); testSummaryStats(tablePage, mainForm, org, db, group, track, table); if (outTypeAvailable(mainForm, "bed")) { if (outTypeAvailable(mainForm, "primaryTable")) { int rowCount; rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); testOneField(tablePage, mainForm, org, db, group, track, table, rowCount); testOutSequence(tablePage, mainForm, org, db, group, track, table, rowCount); testOutBed(tablePage, mainForm, org, db, group, track, table, rowCount); testOutHyperlink(tablePage, mainForm, org, db, group, track, table, rowCount); testOutGff(tablePage, mainForm, org, db, group, track, table); if (rowCount > 0) testOutCustomTrack(tablePage, mainForm, org, db, group, track, table); } } else if (outTypeAvailable(mainForm, "primaryTable")) { /* If BED type is not available then the region will be ignored, and * we'll end up scanning whole table. Make sure table is not huge * before proceeding. */ if (tableSize(db, table) < 500000) { int rowCount; rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); testOneField(tablePage, mainForm, org, db, group, track, table, rowCount); } } } htmlPageFree(&tablePage); } carefulCheckHeap(); } }
static int bedToGffLines(struct bed *bedList, struct slName *exonFramesList, struct hTableInfo *hti, int fieldCount, char *source, boolean gtf2StopCodons) /* Translate a (list of) bed into gff and print out. * Note that field count (perhaps reduced by bitwise intersection) * can in effect override hti. */ { if (! bedList) return 0; struct hash *nameHash = newHash(20); struct bed *bed; struct slName *exonFrames = exonFramesList; int i, exonStart, exonEnd; char txName[256]; int itemCount = 0; static int namelessIx = 0; for (bed = bedList; bed != NULL; bed = bed->next) { /* Enforce unique transcript_ids. */ if (bed->name != NULL) { struct hashEl *hel = hashLookup(nameHash, bed->name); int dupCount = (hel != NULL ? ptToInt(hel->val) : 0); if (dupCount > 0) { safef(txName, sizeof(txName), "%s_dup%d", bed->name, dupCount); hel->val = intToPt(dupCount + 1); } else { safef(txName, sizeof(txName), "%s", bed->name); hashAddInt(nameHash, bed->name, 1); } } else safef(txName, sizeof(txName), "tx%d", ++namelessIx); if (hti->hasBlocks && hti->hasCDS && fieldCount > 4) { /* first pass: compute frames, in order dictated by strand. */ int startIndx = 0, stopIndx = 0; char *frames = NULL; char *ef = NULL; if (exonFramesList) ef = exonFrames->name; frames = computeFrames(bed, ef, &startIndx, &stopIndx); /* second pass: one exon (possibly CDS, start/stop_codon) per block. */ for (i=0; i < bed->blockCount; i++) { exonStart = bed->chromStart + bed->chromStarts[i]; exonEnd = exonStart + bed->blockSizes[i]; if ((exonStart < bed->thickEnd) && (exonEnd > bed->thickStart)) { int exonCdsStart = max(exonStart, bed->thickStart); int exonCdsEnd = min(exonEnd, bed->thickEnd); addCdsStartStop(bed, source, exonCdsStart, exonCdsEnd, frames, i, startIndx, stopIndx, gtf2StopCodons, txName); } addGffLineFromBed(bed, source, "exon", exonStart, exonEnd, '.', txName); } freeMem(frames); } else if (hti->hasBlocks && fieldCount > 4) { for (i=0; i < bed->blockCount; i++) { exonStart = bed->chromStart + bed->chromStarts[i]; exonEnd = exonStart + bed->blockSizes[i]; addGffLineFromBed(bed, source, "exon", exonStart, exonEnd, '.', txName); } } else if (hti->hasCDS && fieldCount > 4) { if (bed->thickStart == 0 && bed->thickEnd == 0) bed->thickStart = bed->thickEnd = bed->chromStart; if (bed->thickStart > bed->chromStart) { addGffLineFromBed(bed, source, "exon", bed->chromStart, bed->thickStart, '.', txName); } if (bed->thickEnd > bed->thickStart) addGffLineFromBed(bed, source, "CDS", bed->thickStart, bed->thickEnd, '0', txName); if (bed->thickEnd < bed->chromEnd) { addGffLineFromBed(bed, source, "exon", bed->thickEnd, bed->chromEnd, '.', txName); } } else { addGffLineFromBed(bed, source, "exon", bed->chromStart, bed->chromEnd, '.', txName); } itemCount++; if (exonFrames) exonFrames = exonFrames->next; } hashFree(&nameHash); return itemCount; }
void update(struct g2cFile *old, struct g2cFile *up) { struct gene *oldGene, *upGene; struct cdnaHit *oldHit, *upHit; struct hash *geneHash; struct hashEl *hel; int sameHitCount = 0; int newHitCount = 0; int newGeneCount = 0; int updatedGeneCount = 0; int altCount = 0; struct geneFamily smallFamily; struct geneFamily *family; printf("Updating %s with %s\n", old->name, up->name); /* Hash the existing gene names for faster lookup. */ geneHash = newHash(12); for (oldGene = old->geneList; oldGene != NULL; oldGene = oldGene->next) hashAdd(geneHash, oldGene->name, oldGene); for (upGene = up->geneList; upGene != NULL; upGene = upGene->next) { boolean changedGene = FALSE; if (isAltSplicedName(upGene->name)) { family = getAltFamily(geneHash, upGene->name); ++altCount; } else { hel = hashLookup(geneHash, upGene->name); if (hel != NULL) { smallFamily.gene = hel->val; smallFamily.next = NULL; family = &smallFamily; } else family = NULL; } /* Set corresponding gene in old file to NULL until we * need to find it. */ oldGene = NULL; for (upHit = upGene->hitList; upHit != NULL; upHit = upHit->next) { if ((oldHit = findHitInFamily(family, upHit->name)) != NULL) ++sameHitCount; else { if (oldGene == NULL) { /* We haven't found corresponding gene yet. First * look for it in the family. */ struct geneFamily *member; for (member = family; member != NULL; member = member->next) { if (strcmp(member->gene->name, upGene->name) == 0) { oldGene = member->gene; break; } } /* The corresponding gene doesn't exist yet. We * have to make it up and hang it on the genelist * for the file, the hash list, and the family list. */ if (oldGene == NULL) { oldGene = alloc(sizeof(*oldGene)); oldGene->name = upGene->name; slAddHead(&old->geneList, oldGene); hashAdd(geneHash, oldGene->name, oldGene); member = alloc(sizeof(*member)); member->gene = oldGene; slAddHead(&family, member); ++newGeneCount; } } oldHit = alloc(sizeof(*oldHit)); oldHit->name = upHit->name; oldHit->hel = hel; slAddHead(&oldGene->hitList, oldHit); ++newHitCount; changedGene = TRUE; } } if (changedGene) ++updatedGeneCount; } slSort(&old->geneList, cmpName); printf("Updated %d genes (including %d alt spliced ones) with %d cdna hits (%d hits unchanged) %d new genes\n", updatedGeneCount, altCount, newHitCount, sameHitCount, newGeneCount); }
void txGeneXref(char *genomeDb, char *uniProtDb, char *genePredFile, char *infoFile, char *pickFile, char *evFile, char *outFile) /* txGeneXref - Make kgXref type table for genes.. */ { /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct hash *geneToProtHash = makeGeneToProtHash(genePredFile); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(pickFile, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); removePickVersions(pick); hashAdd(pickHash, pick->name, pick); } /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); /* Read in info file, and loop through it to make out file. */ struct txInfo *info, *infoList = txInfoLoadAll(infoFile); FILE *f = mustOpen(outFile, "w"); for (info = infoList; info != NULL; info = info->next) { char *kgID = info->name; char *mRNA = ""; char *spID = ""; char *spDisplayID = ""; char *geneSymbol = NULL; char *refseq = ""; char *protAcc = ""; char *description = NULL; char query[256]; char *proteinId = hashMustFindVal(geneToProtHash, info->name); boolean isAb = sameString(info->category, "antibodyParts"); pick = hashFindVal(pickHash, info->name); ev = hashFindVal(evHash, info->name); if (pick != NULL) { /* Fill in the relatively straightforward fields. */ refseq = pick->refSeq; if (info->orfSize > 0) { protAcc = pick->refProt; spID = proteinId; if (sameString(protAcc, spID)) spID = pick->uniProt; if (spID[0] != 0) spDisplayID = spAnyAccToId(uConn, spID); } /* Fill in gene symbol and description from refseq if possible. */ if (refseq[0] != 0) { struct sqlResult *sr; safef(query, sizeof(query), "select name,product from refLink where mrnaAcc='%s'", refseq); sr = sqlGetResult(gConn, query); char **row = sqlNextRow(sr); if (row != NULL) { geneSymbol = cloneString(row[0]); if (!sameWord("unknown protein", row[1])) description = cloneString(row[1]); } sqlFreeResult(&sr); } /* If need be try uniProt for gene symbol and description. */ if (spID[0] != 0 && (geneSymbol == NULL || description == NULL)) { char *acc = spLookupPrimaryAcc(uConn, spID); if (description == NULL) description = spDescription(uConn, acc); if (geneSymbol == NULL) { struct slName *nameList = spGenes(uConn, acc); if (nameList != NULL) geneSymbol = cloneString(nameList->name); slFreeList(&nameList); } } } /* If it's an antibody fragment use that as name. */ if (isAb) { geneSymbol = cloneString("abParts"); description = cloneString("Parts of antibodies, mostly variable regions."); isAb = TRUE; } if (ev == NULL) { mRNA = cloneString(""); if (!isAb) { errAbort("%s is %s but not %s\n", info->name, infoFile, evFile); } } else { mRNA = cloneString(ev->primary); chopSuffix(mRNA); } /* Still no joy? Try genbank RNA records. */ if (geneSymbol == NULL || description == NULL) { if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { char *acc = ev->accs[i]; chopSuffix(acc); if (geneSymbol == NULL) { safef(query, sizeof(query), "select geneName.name from gbCdnaInfo,geneName " "where geneName.id=gbCdnaInfo.geneName and gbCdnaInfo.acc = '%s'", acc); geneSymbol = sqlQuickString(gConn, query); if (geneSymbol != NULL) { if (sameString(geneSymbol, "n/a")) geneSymbol = NULL; } } if (description == NULL) { safef(query, sizeof(query), "select description.name from gbCdnaInfo,description " "where description.id=gbCdnaInfo.description " "and gbCdnaInfo.acc = '%s'", acc); description = sqlQuickString(gConn, query); if (description != NULL) { if (sameString(description, "n/a")) description = NULL; } } } } } if (geneSymbol == NULL) geneSymbol = mRNA; if (description == NULL) description = mRNA; /* Get rid of some characters that will cause havoc downstream. */ stripChar(geneSymbol, '\''); subChar(geneSymbol, '<', '['); subChar(geneSymbol, '>', ']'); /* Abbreviate geneSymbol if too long */ if (strlen(geneSymbol) > 40) strcpy(geneSymbol+37, "..."); fprintf(f, "%s\t", kgID); fprintf(f, "%s\t", mRNA); fprintf(f, "%s\t", spID); fprintf(f, "%s\t", spDisplayID); fprintf(f, "%s\t", geneSymbol); fprintf(f, "%s\t", refseq); fprintf(f, "%s\t", protAcc); fprintf(f, "%s\n", description); } carefulClose(&f); }
struct hash *agpLoadAll(char *agpFile) /* load AGP entries into a hash of AGP lists, one per chromosome */ { struct hash *agpHash = newHash(0); struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *words[9]; int lastPos = 0; int wordCount; struct agpFrag *agpFrag; struct agpGap *agpGap; char *chrom; struct agp *agp; struct hashEl *hel; while ((wordCount = lineFileChopNext(lf, words, ArraySize(words))) != 0) { lineFileExpectAtLeast(lf, 8, wordCount); chrom = words[0]; if (!hashFindVal(agpHash, chrom)) lastPos = 1; AllocVar(agp); if (words[4][0] != 'N' && words[4][0] != 'U') { /* not a gap */ lineFileExpectWords(lf, 9, wordCount); agpFrag = agpFragLoad(words); if (agpFrag->chromStart != lastPos) errAbort( "Frag start (%d, %d) doesn't match previous end line %d of %s\n", agpFrag->chromStart, lastPos, lf->lineIx, lf->fileName); if (agpFrag->chromEnd - agpFrag->chromStart != agpFrag->fragEnd - agpFrag->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agpFrag->chrom, agpFrag->frag, lf->lineIx, lf->fileName); lastPos = agpFrag->chromEnd + 1; agp->entry = agpFrag; agp->isFrag = TRUE; } else { /* gap */ lineFileExpectWords(lf, 8, wordCount); agpGap = agpGapLoad(words); if (agpGap->chromStart != lastPos) errAbort("Gap start (%d, %d) doesn't match previous end line %d of %s\n", agpGap->chromStart, lastPos, lf->lineIx, lf->fileName); lastPos = agpGap->chromEnd + 1; agp->entry = agpGap; agp->isFrag = FALSE; } if ((hel = hashLookup(agpHash, chrom)) == NULL) hashAdd(agpHash, chrom, agp); else slAddHead(&(hel->val), agp); } #ifndef DEBUG { struct hashCookie cookie; struct hashEl *hel; cookie = hashFirst(agpHash); while ((hel = hashNext(&cookie)) != NULL) { struct agp *agpList; agpList = (struct agp *)hel->val; /* for (agp = agpList; agp != NULL; agp = agp->next) printf("isFrag: %d\n", agp->isFrag); */ } } #endif /* reverse AGP lists */ //hashTraverseVals(agpHash, slReverse); #ifndef DEBUG { struct hashCookie cookie; struct hashEl *hel; cookie = hashFirst(agpHash); while ((hel = hashNext(&cookie)) != NULL) { struct agp *agpList; slReverse(&hel->val); agpList = hel->val; /* agpList = (struct agp *)hel->val; slReverse(&agpList); hashRemove(agpHash, hel->name); hashAdd(agpHash, hel->name, agpList); */ /* for (agp = agpList; agp != NULL; agp = agp->next) printf("isFrag: %d\n", agp->isFrag); */ } } #endif return agpHash; }
int main(int argc, char *argv[]) { struct hash *bacHash; char line[1024]; int lineCount; char *words[256]; int wordCount; int fileIx; char *fileName; FILE *f; if (argc < 2) usage(); bacHash = newHash(16); for (fileIx = 1; fileIx < argc; ++fileIx) { fileName = argv[fileIx]; uglyf("Processing %s\n", fileName); f = mustOpen(fileName, "r"); lineCount = 0; while (fgets(line, sizeof(line), f)) { ++lineCount; wordCount = chopLine(line, words); if (wordCount == ArraySize(words)) errAbort("Too many words line %d of %s\n", lineCount, fileName); if (wordCount != 0) { char *bacName; int cIx; struct contigTrack *ctList = NULL, *ct; struct bacTrack *bt; struct hashEl *hel; /* Check line syntax and parse it. */ if (!sameString(words[1], "glues")) errAbort("Bad format line %d of %s\n", lineCount, fileName); bacName = words[2]; for (cIx = 4; cIx < wordCount; cIx += 5) { char *parts[3]; int partCount; AllocVar(ct); ct->ix = atoi(words[cIx]); ct->strand = words[cIx+1][0]; ct->dir = words[cIx+2][0]; partCount = chopString(words[cIx+3], "(-)", parts, ArraySize(parts)); if (partCount != 2) errAbort("Bad format line %d of %s\n", lineCount, fileName); ct->start = atoi(parts[0]); ct->end = atoi(parts[1]); ct->cookedScore = atof(words[cIx+4]); slAddHead(&ctList, ct); } slSort(&ctList, cmpContigTrack); /* Lookup bacTrack and make it if new. */ hel = hashLookup(bacHash, bacName); if (hel == NULL) { AllocVar(bt); hel = hashAdd(bacHash, bacName, bt); bt->name = hel->name; slAddHead(&bacList, bt); } else { bt = hel->val; } /* Process pairs into bacTrack. */ addPairs(bt, ctList); slFreeList(&ctList); } } fclose(f); } slSort(&bacList, cmpBacTrack); printStats(); return 0; }
void gensatImageDownload(char *gensatXml, char *outDir, char *outLog) /* gensatImageDownload - Download images from gensat guided by xml file.. */ { struct xap *xap; struct gsGensatImage *image; char *ftpUri = "ftp://ftp.ncbi.nih.gov/pub/gensat"; char *jpgCgiUri = "http://www.ncbi.nlm.nih.gov/projects/gensat/gensat_img.cgi?action=image&mode=full&fmt=jpeg&id="; char finalJpg[PATH_LEN]; char finalDir[PATH_LEN]; char wgetSource[PATH_LEN]; struct hash *dirHash = newHash(16); struct dyString *mkdir = dyStringNew(0); int imageIx = 0; fLog = mustOpen(outLog, "a"); fprintf(fLog, "starting gensatImageDownload from %s to %s\n", gensatXml, outDir); xap = xapListOpen(gensatXml, "GensatImageSet", gsStartHandler, gsEndHandler); while ((image = xapListNext(xap, "GensatImage")) != NULL) { int id = image->gsGensatImageId->text; char *imageFile = image->gsGensatImageImageInfo->gsGensatImageImageInfoFullImg ->gsGensatImageInfo->gsGensatImageInfoFilename->text; /* Mangle file name a little */ subChar(imageFile, '(', '_'); stripChar(imageFile, ')'); /* Figure out name of jpeg file in outDir. */ verbose(1, "image %d, id %d\n", ++imageIx, id); safef(finalJpg, sizeof(finalJpg), "%s/%s", outDir, imageFile); stripString(finalJpg, ".full"); /* Image magick can't handle two suffixes */ chopSuffix(finalJpg); strcat(finalJpg, ".jpg"); /* Create directory that it goes in if necessary */ splitPath(finalJpg, finalDir, NULL, NULL); if (!hashLookup(dirHash, finalDir)) { hashAdd(dirHash, finalDir, NULL); dyStringClear(mkdir); dyStringPrintf(mkdir, "mkdir -p %s", finalDir); if (system(mkdir->string) != 0) errAbort("Couldn't %s", mkdir->string); } /* Download it - either directly via ftp, or indirectly via cgi. */ if (fileExists(finalJpg)) { verbose(1, "already have %s\n", imageFile); fprintf(fLog, "%s already downloaded\n", finalJpg); } else { if (endsWith(imageFile, ".jpg")) { safef(wgetSource, sizeof(wgetSource), "%s/%s", ftpUri, imageFile); if (safeGetOne(wgetSource, finalJpg)) fprintf(fLog, "Got via ftp %s\n", finalJpg); } else { safef(wgetSource, sizeof(wgetSource), "%s%d", jpgCgiUri, id); if (safeGetOne(wgetSource, finalJpg)) fprintf(fLog, "Got via cgi %s\n", finalJpg); } } } carefulClose(&fLog); }
/* convolve() - perform the task on the input data * I would like to rearrange this business here, and instead of * reading in the data and leaving it in the hash for all other * routines to work with, it would be best to get it immediately * into an array. That makes the work of the other routines much * easier. */ static void convolve(int argc, char *argv[]) { int i; struct lineFile *lf; /* for line file utilities */ for (i = 1; i < argc; ++i) { int lineCount = 0; /* counting input lines */ char *line = (char *)NULL; /* to receive data input line */ char *words[128]; /* to split data input line */ int wordCount = 0; /* result of split */ struct hash *histo0; /* first histogram */ struct hash *histo1; /* second histogram */ int medianBin0 = 0; /* bin at median for histo0 */ double medianLog_2 = -500.0; /* log at median */ int bin = 0; /* 0 to N-1 for N bins */ int convolutions = 0; /* loop counter for # of convolutions */ histo0 = newHash(0); lf = lineFileOpen(argv[i], TRUE); /* input file */ verbose(1, "Processing %s\n", argv[1]); while (lineFileNext(lf, &line, NULL)) { int j; /* loop counter over words */ int inputValuesCount = 0; struct histoGram *hg; /* an allocated hash element */ ++lineCount; chopPrefixAt(line, '#'); /* ignore any comments starting with # */ if (strlen(line) < 3) /* anything left on this line ? */ continue; /* no, go to next line */ wordCount = chopByWhite(line, words, 128); if (wordCount < 1) warn("Expecting at least a word at line %d, file: %s, found %d words", lineCount, argv[i], wordCount); if (wordCount == 128) warn("May have more than 128 values at line %d, file: %s", lineCount, argv[i]); verbose(2, "Input data read from file: %s\n", argv[i]); for (j = 0; j < wordCount; ++j) { char binName[128]; double dataValue; double probInput; double log_2; dataValue = strtod(words[j], NULL); ++inputValuesCount; if (logs) { log_2 = dataValue; probInput = pow(2.0,log_2); } else { if (dataValue > 0.0) { log_2 = log2(dataValue); probInput = dataValue; } else { log_2 = -500.0; /* arbitrary limit */ probInput = pow(2.0,log_2); } } if (log_2 > medianLog_2) { medianLog_2 = log_2; medianBin0 = bin; } verbose(2, "bin %d: %g %0.5g\n", inputValuesCount-1, probInput, log_2); AllocVar(hg); /* the histogram element */ hg->bin = bin; hg->prob = probInput; hg->log_2 = log_2; snprintf(binName, sizeof(binName), "%d", hg->bin); hashAdd(histo0, binName, hg); ++bin; } /* for each word on an input line */ } /* for each line in a file */ /* file read complete, echo input */ if (verboseLevel() >= 2) printHistogram(histo0, medianBin0); /* perform convolutions to specified count * the iteration does histo0 with itself to produce histo1 * Then histo0 is freed and histo1 copied to it for the * next loop. */ for (convolutions = 0; convolutions < convolve_count; ++convolutions) { int medianBin; histo1 = newHash(0); medianBin = iteration(histo0, histo1); if (verboseLevel() >= 2) printHistogram(histo1, medianBin); freeHashAndVals(&histo0); histo0 = histo1; } } /* for each input file */ } /* convolve() */
void txGeneAlias(char *genomeDb, char *uniProtDb, char *xrefFile, char *evFile, char *oldToNew, char *aliasFile, char *protAliasFile) /* txGeneAlias - Make kgAlias and kgProtAlias tables.. */ { /* Read and hash oldToNew */ struct hash *newToOldHash = loadNewToOldHash(oldToNew); /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); struct sqlResult *sr; char **row; char query[256]; /* Open files. */ struct lineFile *lf = lineFileOpen(xrefFile, TRUE); FILE *fAlias = mustOpen(aliasFile, "w"); FILE *fProt = mustOpen(protAliasFile, "w"); /* Stream through xref file, which has much of the info we need, * and which contains a line for each gene. */ char *words[KGXREF_NUM_COLS]; while (lineFileRowTab(lf, words)) { /* Load the xref, and output most of it's fields as aliases. */ struct kgXref *x = kgXrefLoad(words); char *id = x->kgID; outAlias(fAlias, id, x->kgID); outAlias(fAlias, id, x->mRNA); outAlias(fAlias, id, x->spID); outAlias(fAlias, id, x->spDisplayID); outAlias(fAlias, id, x->geneSymbol); outAlias(fAlias, id, x->refseq); outAlias(fAlias, id, x->protAcc); char *old = hashFindVal(newToOldHash, id); if (old != NULL) outAlias(fAlias, id, old); /* If we've got a uniProt ID, use that to get more info from uniProt. */ char *acc = x->spID; if (acc[0] != 0) { /* Get current accession and output a bunch of easy protein aliases. */ acc = spLookupPrimaryAcc(uConn, acc); outProt(fProt, id, acc, acc); outProt(fProt, id, acc, x->spDisplayID); outProt(fProt, id, acc, x->geneSymbol); outProt(fProt, id, acc, x->protAcc); if (old != NULL) outProt(fProt, id, acc, old); /* Throw in old swissProt accessions. */ sqlSafef(query, sizeof(query), "select val from otherAcc where acc = '%s'", acc); sr = sqlGetResult(uConn, query); while ((row = sqlNextRow(sr)) != NULL) { outAlias(fAlias, id, row[0]); outProt(fProt, id, acc, row[0]); } /* Throw in gene names that SwissProt knows about */ struct slName *gene, *geneList = spGenes(uConn, acc); for (gene = geneList; gene != NULL; gene = gene->next) { outAlias(fAlias, id, gene->name); outProt(fProt, id, acc, gene->name); } slFreeList(&geneList); } /* Throw in gene names from genbank. */ /* At some point we may want to restrict this to the primary transcript in a cluster. */ ev = hashFindVal(evHash, id); if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { sqlSafef(query, sizeof(query), "select geneName from gbCdnaInfo where acc='%s'", acc); int nameId = sqlQuickNum(gConn, query); if (nameId != 0) { char name[64]; sqlSafef(query, sizeof(query), "select name from geneName where id=%d", nameId); if (sqlQuickQuery(gConn, query, name, sizeof(name))) outAlias(fAlias, id, name); } } } kgXrefFree(&x); } carefulClose(&fAlias); carefulClose(&fProt); }
void readPatch(char *fileName, struct hash *cloneHash, struct ntContig **retNtList, struct hash **retNtHash) /* Read nt.agp file into clone/hash. */ { struct ntContig *ntList = NULL, *nt = NULL; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; struct agpFrag frag; struct clone *clone, *ntClone, *lastClone = NULL; struct cloneRef *ref; struct hash *ntHash = newHash(0); char cloneName[128]; char fragName[128]; char c; int ntOrder = 0; while (lineFileRow(lf, row)) { agpFragStaticLoad(row, &frag); // file is 1-based but agpFragLoad() now assumes 0-based: frag.chromStart -= 1; frag.fragStart -= 1; if (nt == NULL || !sameString(frag.chrom, nt->name)) { AllocVar(nt); slAddHead(&ntList, nt); if (hashLookup(ntHash, frag.chrom) != NULL) errAbort("NT contig %s repeated line %d of %s", row[0], lf->lineIx, lf->fileName); hashAddSaveName(ntHash, frag.chrom, nt, &nt->name); lastClone = NULL; ntOrder = 0; } strcpy(cloneName, frag.frag); chopSuffix(cloneName); clone = hashMustFindVal(cloneHash, cloneName); clone->ntStart = frag.chromStart; clone->ntEnd = frag.chromEnd; if (clone->nt != NULL) { warn("Clone %s trying to be in two NT contigs (%s and %s) line %d of %s", clone->name, clone->nt->name, nt->name, lf->lineIx, lf->fileName); nt->problem = TRUE; } clone->nt = nt; c = frag.strand[0]; if (c == '-') clone->ntOrientation = -1; else if (c == '+') clone->ntOrientation = +1; else errAbort("Expecting +1 or -1 field 5, line %d, file %s", lf->lineIx, lf->fileName); c = frag.type[0]; if (c == 'F' || c == 'D' || c == 'P') clone->seqType = c; else errAbort("Expecting F, D, or P field 6, line %d, file %s", lf->lineIx, lf->fileName); sprintf(fragName, "%s_1", frag.frag); clone->fragName = cloneString(fragName); clone->goldStart = frag.fragStart; clone->goldEnd = frag.fragEnd; clone->ntOrder = ntOrder++; /* Add ref to NT. */ AllocVar(ref); ref->ref = clone; slAddTail(&nt->cloneList, ref); /* Do a few tests. */ if (clone->goldStart >= clone->goldEnd) { warn("Clone %s end before start (%d before %d) line %d of %s", clone->name, clone->goldStart, clone->goldEnd, lf->lineIx, lf->fileName); nt->problem = TRUE; } if (clone->ntStart >= clone->ntEnd) { warn("Clone %s NT end before NT start line %d of %s", clone->name, lf->lineIx, lf->fileName); nt->problem = TRUE; } if (clone->goldEnd > clone->size) { if (sameString(clone->startFrag, clone->endFrag)) { warn("Clone %s end position %d, clone size %d, line %d of %s", clone->name, clone->goldEnd, clone->size, lf->lineIx, lf->fileName); nt->problem = TRUE; } } if (clone->ntEnd - clone->ntStart != clone->goldEnd - clone->goldStart) { warn("Size not the same in NT contig as in clone %s (%d vs %d) line %d of %s", clone->name, clone->ntEnd - clone->ntStart, clone->goldEnd-clone->goldStart, lf->lineIx, lf->fileName); nt->problem = TRUE; } nt->sumSize += clone->goldEnd - clone->goldStart; ntClone = hashFindVal(cloneHash, nt->name); if (ntClone != NULL && clone->ntEnd > ntClone->size) { warn("Clone %s NT end position %d, NT size %d, line %d of %s", clone->name, clone->ntEnd, ntClone->size, lf->lineIx, lf->fileName); nt->problem = TRUE; } if (ntClone != NULL) nt->size = ntClone->size; else nt->size = clone->size; /* This happens for single-clone NT contigs only. */ if (lastClone != NULL) { if (lastClone->ntEnd != clone->ntStart) { warn("last clone (%s)'s end doesn't match with current clone (%s)'s start line %d of %s", lastClone->name, clone->name, lf->lineIx, lf->fileName); } } lastClone = clone; } lineFileClose(&lf); slReverse(&ntList); for (nt = ntList; nt != NULL; nt = nt->next) { if (nt->sumSize != nt->size) { warn("Sum of fragments of %s is %d, but size is supposed to be %d", nt->name, nt->sumSize, nt->size); nt->problem = TRUE; } } *retNtList = ntList; *retNtHash = ntHash; }
static struct joinedTables *joinedTablesCreate( struct joiner *joiner, char *primaryDb, char *primaryTable, struct joinerDtf *fieldList, struct joinerDtf *filterTables, int maxRowCount, struct region *regionList) /* Create joinedTables structure from fields. */ { struct tableJoiner *tj, *tjList = bundleFieldsIntoTables(fieldList, filterTables); struct joinerPair *routeList = NULL, *route; struct joinedTables *joined = NULL; struct hash *tableHash = newHash(8); int totalKeyCount = 0, totalFieldCount = 0; int curKeyCount = 0, curFieldCount = 0; struct joinerDtf *tableDtfs; for (tj = tjList; tj != NULL; tj = tj->next) { char buf[256]; safef(buf, sizeof(buf), "%s.%s", tj->database, tj->table); hashAdd(tableHash, buf, tj); } orderTables(&tjList, primaryDb, primaryTable); tableDtfs = tableToDtfs(tjList); routeList = joinerFindRouteThroughAll(joiner, tableDtfs); if (routeList == NULL) errAbort("Can't find route from %s to %s via all.joiner", primaryTable, tjList->next->table); addOutKeys(tableHash, routeList, &tjList); /* If first table is non-positional then it will lead to a lot * of n/a's in later fields unless we treat the genome-wide. */ if (!isPositional(tjList->database, tjList->table)) regionList = getRegionsFullGenome(); /* Count up total fields and keys. */ for (tj = tjList; tj != NULL; tj = tj->next) { totalKeyCount += slCount(tj->keysOut); totalFieldCount += slCount(tj->fieldList); } /* Do first table. This one uses identifier hash if any. */ { joined = tjLoadFirst(regionList, tjList, totalFieldCount, totalKeyCount, maxRowCount); curKeyCount = slCount(tjList->keysOut); curFieldCount = slCount(tjList->fieldList); } /* Follow routing list for rest. */ if (!sameString(tjList->database, routeList->a->database)) internalErr(); if (!sameString(tjList->table, routeList->a->table)) internalErr(); for (route = routeList; route != NULL; route = route->next) { struct tableJoiner *tj = findTableJoiner(tjList, route->b->database, route->b->table); struct joinerField *jfA = NULL, *jfB = NULL; if (tj == NULL) internalErr(); jfA = findJoinerField(route->identifier, route->a); if (jfA == NULL) { internalErr(); } jfB = findJoinerField(route->identifier, route->b); if (jfB == NULL) internalErr(); if (!tj->loaded) { int keyIx; struct hash *keyHash = NULL; keyIx = findDtfIndex(joined->keyList, route->a); if (keyIx < 0) internalErr(); keyHash = hashKeyField(joined, keyIx, jfA); tjLoadSome(regionList, joined, curFieldCount, curKeyCount, route->b->field, keyHash, jfB->chopBefore, jfB->chopAfter, tj, isPositional(tj->database, tj->table), FALSE); curKeyCount += slCount(tj->keysOut); curFieldCount += slCount(tj->fieldList); hashFree(&keyHash); } } joinerDtfFreeList(&tableDtfs); hashFree(&tableHash); tableJoinerFreeList(&tjList); return joined; }
void doExpRatio(struct trackDb *tdb, char *item, struct customTrack *ct) /* Generic expression ratio deatils using microarrayGroups.ra file */ /* and not the expRecord tables. */ { char *expScale = trackDbRequiredSetting(tdb, "expScale"); char *expStep = trackDbRequiredSetting(tdb, "expStep"); double maxScore = atof(expScale); double stepSize = atof(expStep); struct bed *bedList; char *itemName = cgiUsualString("i2","none"); char *expName = (item == NULL) ? itemName : item; char *tdbSetting = trackDbSettingOrDefault(tdb, "expColor", "redGreen"); char *colorVal = NULL; enum expColorType colorScheme; char colorVarName[256]; safef(colorVarName, sizeof(colorVarName), "%s.color", tdb->track); colorVal = cartUsualString(cart, colorVarName, tdbSetting); colorScheme = getExpColorType(colorVal); if (sameWord(tdb->grp, "cancerGenomics")) { /* set global flag */ isCancerGenomicsTrack = TRUE; } if (!ct) { genericHeader(tdb, itemName); bedList = loadMsBed(tdb, tdb->table, seqName, winStart, winEnd); } else if (ct->dbTrack) { genericHeader(tdb, itemName); printCustomUrl(tdb, itemName, TRUE); bedList = ctLoadMultScoresBedDb(ct, seqName, winStart, winEnd); } else bedList = bedFilterListInRange(ct->bedList, NULL, seqName, winStart, winEnd); if (bedList == NULL) printf("<b>No Expression Data in this Range.</b>\n"); else if (expName && sameString(expName, "zoomInMore")) printf("<b>Too much data to display in detail in this range.</b>\n"); else { struct microarrayGroups *groupings = NULL; struct maGrouping *combineGroup; struct hash *erHash = newHash(6); int i; if (!ct) { groupings = maGetTrackGroupings(database, tdb); combineGroup = maCombineGroupingFromCart(groupings, cart, tdb->track); } else combineGroup = maGetGroupingFromCt(ct); maBedClumpGivenGrouping(bedList, combineGroup); for (i = 0; i < combineGroup->numGroups; i++) { /* make stupid exprecord hash.perhaps eventually this won't be needed */ char id[16]; struct expRecord *er = basicExpRecord(combineGroup->names[i], i, 2); safef(id, sizeof(id), "%d", i); hashAdd(erHash, id, er); } puts("<h2></h2><p>\n"); msBedPrintTable(bedList, erHash, itemName, expName, -1*maxScore, maxScore, stepSize, 2, msBedDefaultPrintHeader, msBedExpressionPrintRow, printExprssnColorKey, getColorForExprBed, colorScheme); hashTraverseEls(erHash, erHashElFree); hashFree(&erHash); microarrayGroupsFree(&groupings); } puts("<h2></h2><p>\n"); bedFreeList(&bedList); }
void dupeFoo(char *pslName, char *faName, char *regionFile) /* dupeFoo - Do some duplication analysis. */ { struct lineFile *lf; struct frag *fragList = NULL, *frag; struct hash *fragHash = newHash(16); struct psl *psl; int fragCount=0,missCount=0,dupeCount=0,kSub=0, k1=0, k10=0,k100=0,k1000=0,k10000=0,diffChrom=0,distance; /* Read in fragment list and put it in hash. */ fragList = readFragList(faName); for (frag = fragList; frag != NULL; frag = frag->next) hashAdd(fragHash, frag->name, frag); /* Read psl's and store under the fragment the belong to. */ lf = pslFileOpen(pslName); while ((psl = pslNext(lf)) != NULL) { if ((frag = hashFindVal(fragHash, psl->qName)) == NULL) errAbort("Couldn't find %s in %s line %d of %s", psl->qName, faName, lf->lineIx, lf->fileName); slAddHead(&frag->pslList, psl); } lineFileClose(&lf); /* Look through fragments and report missing and dupes. */ for (frag = fragList; frag != NULL; frag = frag->next) { ++fragCount; if ((psl = frag->pslList) == NULL) { ++missCount; printf("missing %s\n", frag->name); } else { for (psl = frag->pslList; psl != NULL; psl = psl->next) { if (sameString(psl->tName, frag->chrom)) { distance = frag->start - psl->tStart; if (distance != 0) { if (distance < 0) distance = -distance; if (distance >= 10000000) ++k10000; else if (distance >= 1000000) ++k1000; else if (distance >= 100000) ++k100; else if (distance >= 10000) ++k10; else if (distance >= 1000) ++k1; else ++kSub; } } else { ++diffChrom; } } } } printPercent("Total", fragCount, fragCount); printPercent("Unaligned", missCount, fragCount); printPercent("Other Chrom", diffChrom, fragCount); printPercent("Same Chrom >10M", k10000, fragCount); printPercent("Same Chrom >1M", k1000, fragCount); printPercent("Same Chrom >10Ok", k100, fragCount); printPercent("Same Chrom >1Ok", k10, fragCount); printPercent("Same Chrom >1k", k1, fragCount); printPercent("Self-overlap", kSub, fragCount); writeRegions(fragList, regionFile); }
static void showLinkedTables(struct joiner *joiner, struct dbTable *inList, char *varPrefix, char *buttonName, char *buttonText) /* Print section with list of linked tables and check boxes to turn them * on. */ { struct dbTable *outList = NULL, *out, *in; char dtName[256]; struct hash *uniqHash = newHash(0); struct hash *inHash = newHash(8); /* Build up list of tables we link to in outList. */ for (in = inList; in != NULL; in = in->next) { struct sqlConnection *conn = NULL; if (!trackHubDatabase(database)) conn = hAllocConn(in->db); struct joinerPair *jpList, *jp; /* Keep track of tables in inList. */ safef(dtName, sizeof(dtName), "%s.%s", inList->db, inList->table); hashAdd(inHash, dtName, NULL); /* First table in input is not allowed in output. */ if (in == inList) hashAdd(uniqHash, dtName, NULL); /* Scan through joining information and add tables, * avoiding duplicate additions. */ jpList = joinerRelate(joiner, in->db, in->table); for (jp = jpList; jp != NULL; jp = jp->next) { safef(dtName, sizeof(dtName), "%s.%s", jp->b->database, jp->b->table); if (!hashLookup(uniqHash, dtName) && !cartTrackDbIsAccessDenied(jp->b->database, jp->b->table)) { hashAdd(uniqHash, dtName, NULL); out = dbTableNew(jp->b->database, jp->b->table); slAddHead(&outList, out); } } joinerPairFreeList(&jpList); hFreeConn(&conn); } slSort(&outList, dbTableCmp); /* Print html. */ if (outList != NULL) { webNewSection("Linked Tables"); hTableStart(); for (out = outList; out != NULL; out = out->next) { struct sqlConnection *conn = hAllocConn(out->db); struct asObject *asObj = asForTable(conn, out->table); char *var = dbTableVar(varPrefix, out->db, out->table); hPrintf("<TR>"); hPrintf("<TD>"); cgiMakeCheckBox(var, varOn(var)); hPrintf("</TD>"); hPrintf("<TD>%s</TD>", out->db); hPrintf("<TD>%s</TD>", out->table); hPrintf("<TD>"); if (asObj != NULL) hPrintf("%s", asObj->comment); else hPrintf(" "); hPrintf("</TD>"); hPrintf("</TR>"); hFreeConn(&conn); } hTableEnd(); hPrintf("<BR>"); cgiMakeButton(buttonName, buttonText); } }
void rcvs(char *codingTable, char *clusterTable) /* rcvs - Compare riken noncoding vs. nonspliced. */ { struct hash *idHash = newHash(16); // Key id1, val id2 struct hash *nonCodingHash = newHash(16); // Key clusterId, value struct hash *splicedHash = newHash(16); // Key id2, present if spliced struct sqlConnection *conn = sqlConnect("mgsc"); struct sqlResult *sr; char **row; char *words[16]; int wordCount; struct lineFile *lf; int codingSpliced = 0; int noncodingSpliced = 0; int codingNonspliced = 0; int noncodingNonspliced = 0; /* Read id's into hash */ sr = sqlGetResult(conn, NOSQLINJ "select id1,id2 from rikenIds"); while ((row = sqlNextRow(sr)) != NULL) hashAdd(idHash, row[0], cloneString(row[1])); sqlFreeResult(&sr); /* Read spliced into hash */ sr = sqlGetResult(conn, NOSQLINJ "select name from rikenOrientInfo where intronOrientation != 0"); while ((row = sqlNextRow(sr)) != NULL) hashAdd(splicedHash, row[0], NULL); sqlFreeResult(&sr); /* Read noncoding clusters into hash */ lf = lineFileOpen(codingTable, TRUE); while (lineFileNextRow(lf, words, 2)) { if (sameString(words[1], "NoPProt")) hashAdd(nonCodingHash, words[0], NULL); } lineFileClose(&lf); /* Stream through cluster table counting and correlating. */ lf = lineFileOpen(clusterTable, TRUE); while (lineFileNextRow(lf, words, 2)) { char *cluster = words[0]; char *id1 = words[1]; char *id2 = hashMustFindVal(idHash, id1); if (hashLookup(nonCodingHash, cluster)) { if (hashLookup(splicedHash, id2)) ++noncodingSpliced; else ++noncodingNonspliced; } else { if (hashLookup(splicedHash, id2)) ++codingSpliced; else ++codingNonspliced; } } printf("noncodingNonspliced %d\n", noncodingNonspliced); printf("noncodingSpliced %d\n", noncodingSpliced); printf("codingNonspliced %d\n", codingNonspliced); printf("codingSpliced %d\n", codingSpliced); printf("total %d\n", noncodingNonspliced + noncodingSpliced + codingNonspliced + codingSpliced); }
void startRedoHash() { redoHash = newHash(12); }
static void tfBindLevelSection(struct tfData *tfList, struct sqlConnection *conn, char *motifTable, char *tfToConditionTable) /* Print info on individual transcription factors that bind * with e-val between minVal and maxVal. */ { struct tfData *tf; struct transRegCode *trc; webNewSection("Transcription Factors Showing IP Over this Probe "); hTableStart(); printf("<TR>"); colLabel("Transcription", 1); colLabel("Growth Condition", 3); colLabel("Motif Information", 3); printf("</TR>\n"); printf("<TR>"); colLabel("Factor", 1); colLabel("Good IP (P<0.001)", 1); colLabel("Weak IP (P<0.005)", 1); colLabel("No IP (P>0.005)", 1); colLabel("Hits", 1); colLabel("Scores", 1); colLabel("Conservation (2 max)", 1); printf("</TR>\n"); for (tf = tfList; tf != NULL; tf = tf->next) { struct hash *boundHash = newHash(8); slSort(&tf->conditionList, tfCondCmpName); printf("<TR>"); /* Print transcription name. */ printf("<TD>"); sacCerHgGeneLinkName(conn, tf->name); printf("</TD>"); /* Print stong and weak growth conditions. */ ipPrintInRange(tf->conditionList, 0.0, 0.002, boundHash); ipPrintInRange(tf->conditionList, 0.002, 0.006, boundHash); /* Grab list of all conditions tested from database and * print out ones not in strong or weak as none. */ { char query[256], **row; struct sqlResult *sr; boolean isFirst = TRUE; boolean gotAny = FALSE; sqlSafef(query, sizeof(query), "select growthCondition from %s where name='%s'", tfToConditionTable, tf->name); sr = sqlGetResult(conn, query); printf("<TD>"); while ((row = sqlNextRow(sr)) != NULL) { if (!hashLookup(boundHash, row[0])) { if (isFirst) isFirst = FALSE; else printf(", "); printf("%s", row[0]); gotAny = TRUE; } } sqlFreeResult(&sr); if (!gotAny) printf(" "); printf("</TD>"); } /* Print motif info. */ if (tf->trcList == NULL) printf("<TD>0</TD><TD>n/a</TD><TD>n/a</TD>\n"); else { printf("<TD>%d</TD>", slCount(tf->trcList)); /* Print scores. */ printf("<TD>"); for (trc = tf->trcList; trc != NULL; trc = trc->next) { double score; if (trc != tf->trcList) printf(", "); score = motifScoreHere( trc->chrom, trc->chromStart, trc->chromEnd, trc->name, motifTable); transRegCodeAnchor(trc); printf("%3.1f</A>", score); } printf("</TD><TD>"); for (trc = tf->trcList; trc != NULL; trc = trc->next) { if (trc != tf->trcList) printf(", "); printf("%d", trc->consSpecies); } printf("</TD>"); } printf("</TR>\n"); hashFree(&boundHash); } hTableEnd(); }