static struct joinedTables *joinedTablesCreate( struct joiner *joiner, char *primaryDb, char *primaryTable, struct joinerDtf *fieldList, struct joinerDtf *filterTables, int maxRowCount, struct region *regionList) /* Create joinedTables structure from fields. */ { struct tableJoiner *tj, *tjList = bundleFieldsIntoTables(fieldList, filterTables); struct joinerPair *routeList = NULL, *route; struct joinedTables *joined = NULL; struct hash *tableHash = newHash(8); int totalKeyCount = 0, totalFieldCount = 0; int curKeyCount = 0, curFieldCount = 0; struct joinerDtf *tableDtfs; for (tj = tjList; tj != NULL; tj = tj->next) { char buf[256]; safef(buf, sizeof(buf), "%s.%s", tj->database, tj->table); hashAdd(tableHash, buf, tj); } orderTables(&tjList, primaryDb, primaryTable); tableDtfs = tableToDtfs(tjList); routeList = joinerFindRouteThroughAll(joiner, tableDtfs); if (routeList == NULL) errAbort("Can't find route from %s to %s via all.joiner", primaryTable, tjList->next->table); addOutKeys(tableHash, routeList, &tjList); /* If first table is non-positional then it will lead to a lot * of n/a's in later fields unless we treat the genome-wide. */ if (!isPositional(tjList->database, tjList->table)) regionList = getRegionsFullGenome(); /* Count up total fields and keys. */ for (tj = tjList; tj != NULL; tj = tj->next) { totalKeyCount += slCount(tj->keysOut); totalFieldCount += slCount(tj->fieldList); } /* Do first table. This one uses identifier hash if any. */ { joined = tjLoadFirst(regionList, tjList, totalFieldCount, totalKeyCount, maxRowCount); curKeyCount = slCount(tjList->keysOut); curFieldCount = slCount(tjList->fieldList); } /* Follow routing list for rest. */ if (!sameString(tjList->database, routeList->a->database)) internalErr(); if (!sameString(tjList->table, routeList->a->table)) internalErr(); for (route = routeList; route != NULL; route = route->next) { struct tableJoiner *tj = findTableJoiner(tjList, route->b->database, route->b->table); struct joinerField *jfA = NULL, *jfB = NULL; if (tj == NULL) internalErr(); jfA = findJoinerField(route->identifier, route->a); if (jfA == NULL) { internalErr(); } jfB = findJoinerField(route->identifier, route->b); if (jfB == NULL) internalErr(); if (!tj->loaded) { int keyIx; struct hash *keyHash = NULL; keyIx = findDtfIndex(joined->keyList, route->a); if (keyIx < 0) internalErr(); keyHash = hashKeyField(joined, keyIx, jfA); tjLoadSome(regionList, joined, curFieldCount, curKeyCount, route->b->field, keyHash, jfB->chopBefore, jfB->chopAfter, tj, isPositional(tj->database, tj->table), FALSE); curKeyCount += slCount(tj->keysOut); curFieldCount += slCount(tj->fieldList); hashFree(&keyHash); } } joinerDtfFreeList(&tableDtfs); hashFree(&tableHash); tableJoinerFreeList(&tjList); return joined; }
struct genoLay *genoLayNew(struct genoLayChrom *chromList, MgFont *font, int picWidth, int betweenChromHeight, int minLeftLabelWidth, int minRightLabelWidth, char *how) /* Figure out layout. For human and most mammals this will be * two columns with sex chromosomes on bottom. This is complicated * by the platypus having a bunch of sex chromosomes. */ { int margin = 3; struct slRef *refList = NULL, *ref, *left, *right; struct genoLayChrom *chrom; struct genoLay *gl; int autoCount, halfCount, bases, chromInLine; int leftLabelWidth=0, rightLabelWidth=0, labelWidth; int spaceWidth = mgFontCharWidth(font, ' '); int extraLabelPadding = 0; int autosomeOtherPixels=0, sexOtherPixels=0; int autosomeBasesInLine=0; /* Maximum bases in a line for autosome. */ int sexBasesInLine=0; /* Bases in line for sex chromsome. */ double sexBasesPerPixel, autosomeBasesPerPixel, basesPerPixel; int pos = margin; int y = 0; int fontHeight = mgFontLineHeight(font); int chromHeight = fontHeight; int lineHeight = chromHeight + betweenChromHeight; boolean allOneLine = FALSE; refList = refListFromSlList(chromList); /* Allocate genoLay object and fill in simple fields. */ AllocVar(gl); gl->chromList = chromList; gl->chromHash = hashNew(0); gl->font = font; gl->picWidth = picWidth; gl->margin = margin; gl->spaceWidth = spaceWidth; gl->lineHeight = lineHeight; gl->betweenChromHeight = betweenChromHeight; gl->betweenChromOffsetY = 0; gl->chromHeight = chromHeight; gl->chromOffsetY = lineHeight - chromHeight; /* Save chromosomes in hash too, for easy access */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) hashAdd(gl->chromHash, chrom->fullName, chrom); if (sameString(how, genoLayOnePerLine)) { gl->leftList = refList; } else if (sameString(how, genoLayAllOneLine)) { gl->bottomList = refList; allOneLine = TRUE; } else { /* Put sex chromosomes on bottom, and rest on left. */ separateSexChroms(refList, &refList, &gl->bottomList); autoCount = slCount(refList); gl->leftList = refList; /* If there are a lot of chromosomes, then move later * (and smaller) chromosomes to a new right column */ if (autoCount > 12) { halfCount = (autoCount+1)/2; ref = slElementFromIx(refList, halfCount-1); gl->rightList = ref->next; ref->next = NULL; slReverse(&gl->rightList); } } if (allOneLine) { unsigned long totalBases = 0, bStart=0, bEnd; int chromCount = 0, chromIx=0; for (ref = gl->bottomList; ref != NULL; ref = ref->next) { chrom = ref->val; totalBases += chrom->size; chromCount += 1; } int availablePixels = picWidth - minLeftLabelWidth - minRightLabelWidth - 2*margin - (chromCount-1); double basesPerPixel = (double)totalBases/availablePixels; gl->picHeight = 2*margin + lineHeight + fontHeight; for (ref = gl->bottomList; ref != NULL; ref = ref->next) { chrom = ref->val; bEnd = bStart + chrom->size; int pixStart = round(bStart / basesPerPixel); int pixEnd = round(bEnd / basesPerPixel); chrom->width = pixEnd - pixStart; chrom->height = lineHeight; chrom->x = pixStart + margin + chromIx + minLeftLabelWidth; chrom->y = 0; chromIx += 1; bStart = bEnd; } gl->lineCount = 1; gl->picHeight = 2*margin + lineHeight + fontHeight + 1; gl->allOneLine = TRUE; gl->leftLabelWidth = minLeftLabelWidth; gl->rightLabelWidth = minRightLabelWidth; gl->basesPerPixel = basesPerPixel; gl->pixelsPerBase = 1.0/basesPerPixel; } else { /* Figure out space needed for autosomes. */ left = gl->leftList; right = gl->rightList; while (left || right) { bases = 0; chromInLine = 0; if (left) { chrom = left->val; labelWidth = mgFontStringWidth(font, chrom->shortName) + spaceWidth; if (leftLabelWidth < labelWidth) leftLabelWidth = labelWidth; bases = chrom->size; left = left->next; } if (right) { chrom = right->val; labelWidth = mgFontStringWidth(font, chrom->shortName) + spaceWidth; if (rightLabelWidth < labelWidth) rightLabelWidth = labelWidth; bases += chrom->size; right = right->next; } if (autosomeBasesInLine < bases) autosomeBasesInLine = bases; gl->lineCount += 1; } /* Figure out space needed for bottom chromosomes. */ if (gl->bottomList) { gl->lineCount += 1; sexOtherPixels = spaceWidth + 2*margin; for (ref = gl->bottomList; ref != NULL; ref = ref->next) { chrom = ref->val; sexBasesInLine += chrom->size; labelWidth = mgFontStringWidth(font, chrom->shortName) + spaceWidth; if (ref == gl->bottomList ) { if (leftLabelWidth < labelWidth) leftLabelWidth = labelWidth; sexOtherPixels = leftLabelWidth; } else if (ref->next == NULL) { if (rightLabelWidth < labelWidth) rightLabelWidth = labelWidth; sexOtherPixels += rightLabelWidth + spaceWidth; } else { sexOtherPixels += labelWidth + spaceWidth; } } } /* Do some adjustments if side labels are bigger than needed for * chromosome names. */ if (leftLabelWidth < minLeftLabelWidth) { extraLabelPadding += (minLeftLabelWidth - leftLabelWidth); leftLabelWidth = minLeftLabelWidth; } if (rightLabelWidth < minRightLabelWidth) { extraLabelPadding += (minRightLabelWidth - rightLabelWidth); rightLabelWidth = minRightLabelWidth; } sexOtherPixels += extraLabelPadding; /* Figure out the number of bases needed per pixel. */ autosomeOtherPixels = 2*margin + spaceWidth + leftLabelWidth + rightLabelWidth; basesPerPixel = autosomeBasesPerPixel = autosomeBasesInLine/(picWidth-autosomeOtherPixels); if (gl->bottomList) { sexBasesPerPixel = sexBasesInLine/(picWidth-sexOtherPixels); if (sexBasesPerPixel > basesPerPixel) basesPerPixel = sexBasesPerPixel; } /* Save positions and sizes of some things in layout structure. */ gl->leftLabelWidth = leftLabelWidth; gl->rightLabelWidth = rightLabelWidth; gl->basesPerPixel = basesPerPixel; gl->pixelsPerBase = 1.0/basesPerPixel; /* Set pixel positions for left autosomes */ for (ref = gl->leftList; ref != NULL; ref = ref->next) { chrom = ref->val; chrom->x = leftLabelWidth + margin; chrom->y = y; chrom->width = round(chrom->size/basesPerPixel); chrom->height = lineHeight; y += lineHeight; } /* Set pixel positions for right autosomes */ y = 0; for (ref = gl->rightList; ref != NULL; ref = ref->next) { chrom = ref->val; chrom->width = round(chrom->size/basesPerPixel); chrom->height = lineHeight; chrom->x = picWidth - margin - rightLabelWidth - chrom->width; chrom->y = y; y += lineHeight; } gl->picHeight = 2*margin + lineHeight * gl->lineCount; y = gl->picHeight - margin - lineHeight; /* Set pixel positions for sex chromosomes */ for (ref = gl->bottomList; ref != NULL; ref = ref->next) { chrom = ref->val; chrom->y = y; chrom->width = round(chrom->size/basesPerPixel); chrom->height = lineHeight; if (ref == gl->bottomList) chrom->x = leftLabelWidth + margin; else if (ref->next == NULL) chrom->x = picWidth - margin - rightLabelWidth - chrom->width; else chrom->x = 2*spaceWidth+mgFontStringWidth(font,chrom->shortName) + pos; pos = chrom->x + chrom->width; } } return gl; }
struct fullExperiment *getFullExperimentList(struct sqlConnection *conn, struct edwExperiment *eeList, char *assembly, struct hash **retHash) /* Given list of edwExperiments, return list of ones replicated with full file sets on * both replicates. If optional retHash is non-NULL then return a hash full of same * experiments keyed by experiment accession */ { /* Build up a list of fullExperiments and a hash keyed by name. */ struct hash *hash = hashNew(14); struct fullExperiment *fullList = NULL; struct edwExperiment *ee; for (ee = eeList; ee != NULL; ee = ee->next) { struct fullExperiment *full = hashFindVal(hash, ee->accession); if (full == NULL) { AllocVar(full); full->name = cloneString(ee->accession); full->exp = ee; slAddHead(&fullList, full); hashAdd(hash, full->name, full); } } uglyf("Got %d in eeList, %d in fullList, %d in hash\n", slCount(eeList), slCount(fullList), hash->elCount); /* Build up SQL query to efficiently fetch all good files and valid files from our experiment */ struct dyString *q = dyStringNew(16*1024); sqlDyStringPrintf(q, "select edwValidFile.*,edwFile.*,eapOutput.* " " from edwValidFile,edwFile,eapOutput " " where edwValidFile.fileId = edwFile.id and edwFile.id = eapOutput.fileId " " and edwFile.deprecated='' and edwFile.errorMessage='' " " and edwValidFile.ucscDb != 'centro.hg19' " " and edwValidFile.ucscDb like '%%%s' and edwValidFile.experiment in (" , assembly); for (ee = eeList; ee != NULL; ee = ee->next) { dyStringPrintf(q, "'%s'", ee->accession); if (ee->next != NULL) dyStringAppendC(q, ','); } dyStringAppendC(q, ')'); /* Loop through this making up vFiles that ultimately are attached to replicates. */ int vCount = 0; struct sqlResult *sr = sqlGetResult(conn, q->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { ++vCount; struct edwValidFile *valid = edwValidFileLoad(row); fixOutputType(valid); struct edwFile *file = edwFileLoad(row + EDWVALIDFILE_NUM_COLS); struct eapOutput *eapOutput = eapOutputLoad(row + EDWVALIDFILE_NUM_COLS + EDWFILE_NUM_COLS); struct vFile *vf = vFileNew(file, valid, eapOutput); struct fullExperiment *full = hashMustFindVal(hash, valid->experiment); struct replicate *rep = findOrMakeReplicate(valid->replicate, &full->repList); char *format = valid->format; if (sameString(format, "bam")) slAddHead(&rep->bamList, vf); else if (sameString(format, "bigWig")) slAddHead(&rep->bigWigList, vf); else if (sameString(format, "narrowPeak") && !sameString(valid->outputType, "replicated_narrowPeak")) slAddHead(&rep->narrowList, vf); else if (sameString(format, "broadPeak") && !sameString(valid->outputType, "replicated_broadPeak")) slAddHead(&rep->broadList, vf); } sqlFreeResult(&sr); uglyf("Got %d vFiles\n", vCount); dyStringFree(&q); /* Free hash or return it, and return list. */ if (retHash == NULL) hashFree(&hash); else *retHash = hash; return fullList; }
void knownToVisiGene(char *database) /* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */ { char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, outTable); struct sqlConnection *hConn = sqlConnect(database); struct sqlConnection *iConn = sqlConnect(visiDb); struct sqlResult *sr; char **row; struct hash *geneImageHash = newHash(18); struct hash *locusLinkImageHash = newHash(18); struct hash *refSeqImageHash = newHash(18); struct hash *genbankImageHash = newHash(18); struct hash *probeImageHash = newHash(18); struct hash *knownToLocusLinkHash = newHash(18); struct hash *knownToRefSeqHash = newHash(18); struct hash *knownToGeneHash = newHash(18); struct hash *favorHugoHash = newHash(18); struct hash *knownToProbeHash = newHash(18); struct hash *knownToAllProbeHash = newHash(18); struct genePred *knownList = NULL, *known; struct hash *dupeHash = newHash(17); probesDb = optionVal("probesDb", database); struct sqlConnection *probesConn = sqlConnect(probesDb); vgProbes = sqlTableExists(probesConn,"vgProbes"); vgAllProbes = sqlTableExists(probesConn,"vgAllProbes"); /* Go through and make up hashes of images keyed by various fields. */ sr = sqlGetResult(iConn, "NOSQLINJ select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank" ",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id" " from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap" " where image.imageFile = imageFile.id" " and image.id = imageProbe.image" " and imageProbe.probe = probe.id" " and probe.gene = gene.id" " and image.submissionSet=submissionSet.id" " and vgPrbMap.probe = probe.id"); while ((row = sqlNextRow(sr)) != NULL) { int id = sqlUnsigned(row[0]); float priority = atof(row[1]); int privateUser = sqlSigned(row[7]); char vgPrb_Id[256]; safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]); int geneId = sqlUnsigned(row[9]); if (privateUser == 0) { addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id); addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]); addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]); addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]); addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]); } } verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d" ", genbankImageHash %d probeImageHash %d\n", geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, genbankImageHash->elCount, probeImageHash->elCount); sqlFreeResult(&sr); /* Build up list of known genes. */ sr = sqlGetResult(hConn, "NOSQLINJ select * from knownGene"); while ((row = sqlNextRow(sr)) != NULL) { struct genePred *known = genePredLoad(row); if (!hashLookup(dupeHash, known->name)) { hashAdd(dupeHash, known->name, NULL); slAddHead(&knownList, known); } } slReverse(&knownList); sqlFreeResult(&sr); verbose(2, "Got %d known genes\n", slCount(knownList)); /* Build up hashes from knownGene to other things. */ if (vgProbes) bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash); if (vgAllProbes) bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash); foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE); foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE); foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE); foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE); foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE); verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount, knownToProbeHash->elCount, knownToAllProbeHash->elCount); /* Try and find an image for each gene. */ for (known = knownList; known != NULL; known = known->next) { char *name = known->name; struct prioritizedImage *best = NULL; { best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash); if (!best) best = bestImage(name, knownToRefSeqHash, refSeqImageHash); if (!best) { best = hashFindVal(genbankImageHash, name); } if (!best) best = bestImage(name, knownToGeneHash, geneImageHash); if (vgProbes && !best) best = bestImage(name, knownToProbeHash, probeImageHash); if (vgAllProbes && !best) best = bestImage(name, knownToAllProbeHash, probeImageHash); } if (best) { fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId); } } createTable(hConn, outTable); hgLoadTabFile(hConn, tempDir, outTable, &f); hgRemoveTabFile(tempDir, outTable); }
void ffaToFa(char *inFile, char *outDir, char *outTabName) /* convert Greg Schulers .ffa fasta files to our .fa files */ { struct lineFile *in; FILE *out = NULL, *tab; int lineSize; char *line; char ucscName[128]; char path[512]; static char lastPath[512]; int outFileCount = 0; struct hash *uniqClone = newHash(16); struct hash *uniqFrag = newHash(19); boolean ignore = FALSE; makeDir(outDir); errLog = mustOpen("ffaToFa.err", "w"); tab = mustOpen(outTabName, "w"); printf("Converting %s", inFile); fflush(stdout); if (sameString(inFile, "stdin")) in = lineFileStdin(TRUE); else in = lineFileOpen(inFile, TRUE); while (lineFileNext(in, &line, &lineSize)) { if (line[0] == '>') { ignore = FALSE; gsToUcsc(line+1, ucscName); faRecNameToFaFileName(outDir, ucscName, path); if (hashLookup(uniqFrag, ucscName)) { ignore = TRUE; warn("Duplicate %s in %s, ignoring all but first", ucscName, inFile); } else { hashAdd(uniqFrag, ucscName, NULL); } if (!sameString(path, lastPath)) { strcpy(lastPath, path); carefulClose(&out); if (hashLookup(uniqClone, path)) { warn("Duplicate %s in %s ignoring all but first", ucscName, inFile); } else { hashAdd(uniqClone, path, NULL); out = mustOpen(path, "w"); ++outFileCount; if ((outFileCount&7) == 0) { putc('.', stdout); fflush(stdout); } } } if (out != NULL && !ignore) { fprintf(out, ">%s\n", ucscName); fprintf(tab, "%s\t%s\n", ucscName, line+1); } } else { if (out != NULL && !ignore) { fputs(line, out); fputc('\n', out); } } } carefulClose(&out); fclose(tab); lineFileClose(&in); printf("Made %d .fa files in %s\n", outFileCount, outDir); }
void txGeneCanonical(char *codingCluster, char *infoFile, char *noncodingGraph, char *genesBed, char *nearCoding, char *outCanonical, char *outIsoforms, char *outClusters) /* txGeneCanonical - Pick a canonical version of each gene - that is the form * to use when just interested in a single splicing varient. Produces final * transcript clusters as well. */ { /* Read in input into lists in memory. */ struct txCluster *coding, *codingList = txClusterLoadAll(codingCluster); struct txGraph *graph, *graphList = txGraphLoadAll(noncodingGraph); struct bed *bed, *nextBed, *bedList = bedLoadNAll(genesBed, 12); struct txInfo *info, *infoList = txInfoLoadAll(infoFile); struct bed *nearList = bedLoadNAll(nearCoding, 12); /* Make hash of all beds. */ struct hash *bedHash = hashNew(18); for (bed = bedList; bed != NULL; bed = bed->next) hashAdd(bedHash, bed->name, bed); /* Make has of all info. */ struct hash *infoHash = hashNew(18); for (info = infoList; info != NULL; info = info->next) hashAdd(infoHash, info->name, info); /* Make a binKeeper structure that we'll populate with coding genes. */ struct hash *sizeHash = minChromSizeFromBeds(bedList); struct hash *keeperHash = minChromSizeKeeperHash(sizeHash); /* Make list of coding genes and toss them into binKeeper. * This will eat up bed list, but bedHash is ok. */ struct gene *gene, *geneList = NULL; for (coding = codingList; coding != NULL; coding = coding->next) { gene = geneFromCluster(coding, bedHash, infoHash); slAddHead(&geneList, gene); struct binKeeper *bk = hashMustFindVal(keeperHash, gene->chrom); binKeeperAdd(bk, gene->start, gene->end, gene); } /* Go through near-coding genes and add them to the coding gene * they most overlap. */ for (bed = nearList; bed != NULL; bed = nextBed) { nextBed = bed->next; gene = mostOverlappingGene(keeperHash, bed); if (gene == NULL) errAbort("%s is near coding, but doesn't overlap any coding!?", bed->name); geneAddBed(gene, bed); } /* Add non-coding genes. */ for (graph = graphList; graph != NULL; graph = graph->next) { gene = geneFromGraph(graph, bedHash); slAddHead(&geneList, gene); } /* Sort so it all looks nicer. */ slSort(&geneList, geneCmp); /* Open up output files. */ FILE *fCan = mustOpen(outCanonical, "w"); FILE *fIso = mustOpen(outIsoforms, "w"); FILE *fClus = mustOpen(outClusters, "w"); /* Loop through, making up gene name, and writing output. */ int geneId = 0; for (gene = geneList; gene != NULL; gene = gene->next) { /* Make up name. */ char name[16]; safef(name, sizeof(name), "g%05d", ++geneId); /* Reverse transcript list just to make it look better. */ slReverse(&gene->txList); /* Write out canonical file output */ bed = hashMustFindVal(bedHash, gene->niceTx->name); fprintf(fCan, "%s\t%d\t%d\t%d\t%s\t%s\n", bed->chrom, bed->chromStart, bed->chromEnd, geneId, gene->niceTx->name, gene->niceTx->name); /* Write out isoforms output. */ for (bed = gene->txList; bed != NULL; bed = bed->next) fprintf(fIso, "%d\t%s\n", geneId, bed->name); /* Write out cluster output, starting with bed 6 standard fields. */ fprintf(fClus, "%s\t%d\t%d\t%s\t%d\t%c\t", gene->chrom, gene->start, gene->end, name, 0, gene->strand); /* Write out thick-start/thick end. */ if (gene->isCoding) { int thickStart = gene->end, thickEnd = gene->start; for (bed = gene->txList; bed != NULL; bed = bed->next) { if (bed->thickStart < bed->thickEnd) { thickStart = min(thickStart, bed->thickStart); thickEnd = max(thickEnd, bed->thickEnd); } } fprintf(fClus, "%d\t%d\t", thickStart, thickEnd); } else { fprintf(fClus, "%d\t%d\t", gene->start, gene->start); } /* We got no rgb value, just write out zero. */ fprintf(fClus, "0\t"); /* Get exons from exonTree. */ struct range *exon, *exonList = rangeTreeList(gene->exonTree); fprintf(fClus, "%d\t", slCount(exonList)); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(fClus, "%d,", exon->start - gene->start); fprintf(fClus, "\t"); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(fClus, "%d,", exon->end - exon->start); fprintf(fClus, "\t"); /* Write out associated transcripts. */ fprintf(fClus, "%d\t", slCount(gene->txList)); for (bed = gene->txList; bed != NULL; bed = bed->next) fprintf(fClus, "%s,", bed->name); fprintf(fClus, "\t"); /* Write out nice value */ fprintf(fClus, "%s\t", gene->niceTx->name); /* Write out coding/noncoding value. */ fprintf(fClus, "%d\n", gene->isCoding); } /* Close up files. */ carefulClose(&fCan); carefulClose(&fIso); carefulClose(&fClus); }
void gffFileAddRow(struct gffFile *gff, int baseOffset, char *words[], int wordCount, char *fileName, int lineIx) /* Process one row of GFF file (a non-comment line parsed by tabs normally). */ { struct hashEl *hel; struct gffLine *gl; if (wordCount < 8) gffSyntaxError(fileName, lineIx, "Word count less than 8 "); AllocVar(gl); if ((hel = hashLookup(gff->seqHash, words[0])) == NULL) { struct gffSeqName *el; AllocVar(el); hel = hashAdd(gff->seqHash, words[0], el); el->name = hel->name; slAddHead(&gff->seqList, el); } gl->seq = hel->name; if ((hel = hashLookup(gff->sourceHash, words[1])) == NULL) { struct gffSource *el; AllocVar(el); hel = hashAdd(gff->sourceHash, words[1], el); el->name = hel->name; slAddHead(&gff->sourceList, el); } gl->source = hel->name; if ((hel = hashLookup(gff->featureHash, words[2])) == NULL) { struct gffFeature *el; AllocVar(el); hel = hashAdd(gff->featureHash, words[2], el); el->name = hel->name; slAddHead(&gff->featureList, el); } gl->feature = hel->name; if (!isdigit(words[3][0]) || !isdigit(words[4][0])) gffSyntaxError(fileName, lineIx, "col 3 or 4 not a number "); gl->start = atoi(words[3])-1 + baseOffset; gl->end = atoi(words[4]) + baseOffset; gl->score = atof(words[5]); gl->strand = words[6][0]; gl->frame = words[7][0]; if (wordCount >= 9) { if (!gff->typeKnown) { gff->typeKnown = TRUE; gff->isGtf = isGtfGroup(words[8]); } if (gff->isGtf) { parseGtfEnd(words[8], gff, gl, fileName, lineIx); } else { char *tnName = gffTnName(gl->seq, trimSpaces(words[8])); if ((hel = hashLookup(gff->groupHash, tnName)) == NULL) { struct gffGroup *group; AllocVar(group); hel = hashAdd(gff->groupHash, tnName, group); group->name = hel->name; group->seq = gl->seq; group->source = gl->source; slAddHead(&gff->groupList, group); } gl->group = hel->name; } } slAddHead(&gff->lineList, gl); }
void cartJsonRegisterHandler(struct cartJson *cj, char *command, CartJsonHandler *handler) /* Associate handler with command; when javascript sends command, handler will be executed. */ { hashAdd(cj->handlerHash, command, handler); }
void doEnrichments(struct sqlConnection *conn, struct cdwFile *ef, char *path, struct hash *assemblyToTarget) /* Calculate enrichments on for all targets file. The targetList and the * grtList are in the same order. */ { /* Get validFile from database. */ struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; /* We can only work if have validFile table entry */ if (!isEmpty(vf->enrichedIn) && !sameWord(vf->ucscDb, "unknown") && !isEmpty(vf->ucscDb) && !sameWord(vf->format, "unknown")) { /* Get our assembly */ char *format = vf->format; char *ucscDb = vf->ucscDb; char *targetName = cdwSimpleAssemblyName(ucscDb); struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetName); struct target *targetList = hashFindVal(assemblyToTarget, assembly->name); if (targetList == NULL) { targetList = targetsForAssembly(conn, assembly); if (targetList == NULL) errAbort("No targets for assembly %s", assembly->name); hashAdd(assemblyToTarget, assembly->name, targetList); } /* Loop through targetList zeroing out existing ovelaps. */ struct target *target; boolean allSkip = TRUE; for (target = targetList; target != NULL; target = target->next) { target->overlapBases = target->uniqOverlapBases = 0; target->skip = enrichmentExists(conn, ef, target->target); if (!target->skip) allSkip = FALSE; } /* Do a big dispatch based on format. */ if (!allSkip) { if (sameString(format, "fastq")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bigWig")) doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList); else if (startsWith("bed_", format)) doEnrichmentsFromBed(conn, ef, vf, assembly, targetList); else if (cdwIsSupportedBigBedFormat(format) || sameString(format, "bigBed")) doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gtf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gff")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bam")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "vcf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "idat")) verbose(2, "Ignoring idat %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "customTrack")) verbose(2, "Ignoring customTrack %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "rcc")) verbose(2, "Ignoring rcc %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "bam.bai")) verbose(2, "Ignoring bam.bai %s, in doEnrichments - just and index file.", ef->cdwFileName); else if (sameString(format, "vcf.gz.tbi")) verbose(2, "Ignoring vcf.gz.tbi %s, in doEnrichments - just and index file.", ef->cdwFileName); else if (sameString(format, "unknown")) verbose(2, "Unknown format in doEnrichments(%s), that's ok.", ef->cdwFileName); else errAbort("Unrecognized format %s in doEnrichments(%s)", format, path); } /* Clean up and go home. */ cdwAssemblyFree(&assembly); } cdwValidFileFree(&vf); }
void readCloneNames(struct lineFile *clf) /* read internal BAC clone names and Sanger sts names */ { struct alias *a = NULL; struct sanger *s = NULL; char *words[4], *name = NULL, *sanger = NULL, *extName = NULL; int i, rel; char sep = '|'; boolean found = FALSE, posFound = FALSE; /* alias hash is keyed by Sanger sts name */ aliasHash = newHash(16); /* hash of Sanger names keyed by external name */ sangerByExtNameHash = newHash(16); /* Read in all rows */ while (lineFileChopCharNext(clf, sep, words, 5)) { name = cloneString(words[0]); sanger = cloneString(words[1]); if (!sameString(words[2], "")) rel = sqlUnsigned(words[2]); else rel = 3; /* find external name for this internal name from the extNameHash */ if ((extName = hashFindVal(extNameHash, name)) == NULL) { /* if not found in BAC hash, then need to use internal name to make extName */ extName = translateName(name, FALSE); } if ((a = hashFindVal(aliasHash, sanger)) == NULL) { /* allocate memory for alias struct */ AllocVar(a); /* allocate memory for UniSTS IDs, aliases, internal and external names and relations */ /* and initialize the arrays */ AllocArray(a->uniStsId, (sizeof(char *) * NUMSANGER)); AllocArray(a->aliases, (sizeof(char *) * NUMALIASES)); AllocArray(a->extName, (sizeof(char *) * MAXSANGER)); AllocArray(a->intName, (sizeof(char *) * MAXSANGER)); AllocArray(a->relation, (sizeof(int) * MAXSANGER)); for (i = 0; i < NUMSANGER; i++) { a->uniStsId[i] = NULL; } for (i = 0; i < MAXSANGER; i++) { a->extName[i] = NULL; a->intName[i] = NULL; a->relation[i] = -1; } for (i = 0; i < NUMALIASES; i++) { a->aliases[i] = NULL; } } /* find empty slot in arrays to add external and internal names */ posFound = FALSE; for (i = 0; i < NUMALIASES && (!posFound); i++) { if (a->extName[i] == NULL) { posFound = TRUE; a->extName[i] = cloneString(extName); if (a->intName[i] == NULL) a->intName[i] = cloneString(name); else errAbort("For marker %s, the empty slot in the intName array is not the same as that for the extName array in the alias struct\n", extName); if (a->relation[i] == -1) a->relation[i] = rel; else errAbort("For marker %s, the empty slot in the relation array is not the same as that for the extName array in the alias struct\n", extName); } } a->sangerName = cloneString(sanger); a->primer1 = NULL; a->primer2 = NULL; /* add this alias struct to the hash keyed by sanger name */ hashAdd(aliasHash, sanger, a); /* add sanger name to hash keyed by external name */ if ((s = hashFindVal(sangerByExtNameHash, extName)) == NULL) { /* allocate memory for struct with array of Sanger names */ AllocVar(s); /* initialize the array */ for (i = 0; i < MAXSANGER; i++) { s->sangerName[i] = NULL; } } found = FALSE; for (i = 0; i < MAXSANGER && (!found); i++) { if (s->sangerName[i] == NULL) { found = TRUE; s->sangerName[i] = cloneString(sanger); } } /* add this list of sanger names to a hash keyed by external name, extName */ hashAdd(sangerByExtNameHash, extName, s); } }
int main(int argc, char *argv[]) { struct hash *bacHash; char line[1024]; int lineCount; char *words[256]; int wordCount; int fileIx; char *fileName; FILE *f; if (argc < 2) usage(); bacHash = newHash(16); for (fileIx = 1; fileIx < argc; ++fileIx) { fileName = argv[fileIx]; uglyf("Processing %s\n", fileName); f = mustOpen(fileName, "r"); lineCount = 0; while (fgets(line, sizeof(line), f)) { ++lineCount; wordCount = chopLine(line, words); if (wordCount == ArraySize(words)) errAbort("Too many words line %d of %s\n", lineCount, fileName); if (wordCount != 0) { char *bacName; int cIx; struct contigTrack *ctList = NULL, *ct; struct bacTrack *bt; struct hashEl *hel; /* Check line syntax and parse it. */ if (!sameString(words[1], "glues")) errAbort("Bad format line %d of %s\n", lineCount, fileName); bacName = words[2]; for (cIx = 4; cIx < wordCount; cIx += 5) { char *parts[3]; int partCount; AllocVar(ct); ct->ix = atoi(words[cIx]); ct->strand = words[cIx+1][0]; ct->dir = words[cIx+2][0]; partCount = chopString(words[cIx+3], "(-)", parts, ArraySize(parts)); if (partCount != 2) errAbort("Bad format line %d of %s\n", lineCount, fileName); ct->start = atoi(parts[0]); ct->end = atoi(parts[1]); ct->cookedScore = atof(words[cIx+4]); slAddHead(&ctList, ct); } slSort(&ctList, cmpContigTrack); /* Lookup bacTrack and make it if new. */ hel = hashLookup(bacHash, bacName); if (hel == NULL) { AllocVar(bt); hel = hashAdd(bacHash, bacName, bt); bt->name = hel->name; slAddHead(&bacList, bt); } else { bt = hel->val; } /* Process pairs into bacTrack. */ addPairs(bt, ctList); slFreeList(&ctList); } } fclose(f); } slSort(&bacList, cmpBacTrack); printStats(); return 0; }
void txGeneXref(char *genomeDb, char *uniProtDb, char *genePredFile, char *infoFile, char *pickFile, char *evFile, char *outFile) /* txGeneXref - Make kgXref type table for genes.. */ { /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct hash *geneToProtHash = makeGeneToProtHash(genePredFile); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(pickFile, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); removePickVersions(pick); hashAdd(pickHash, pick->name, pick); } /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); /* Read in info file, and loop through it to make out file. */ struct txInfo *info, *infoList = txInfoLoadAll(infoFile); FILE *f = mustOpen(outFile, "w"); for (info = infoList; info != NULL; info = info->next) { char *kgID = info->name; char *mRNA = ""; char *spID = ""; char *spDisplayID = ""; char *geneSymbol = NULL; char *refseq = ""; char *protAcc = ""; char *description = NULL; char query[256]; char *proteinId = hashMustFindVal(geneToProtHash, info->name); boolean isAb = sameString(info->category, "antibodyParts"); pick = hashFindVal(pickHash, info->name); ev = hashFindVal(evHash, info->name); if (pick != NULL) { /* Fill in the relatively straightforward fields. */ refseq = pick->refSeq; if (info->orfSize > 0) { protAcc = pick->refProt; spID = proteinId; if (sameString(protAcc, spID)) spID = pick->uniProt; if (spID[0] != 0) spDisplayID = spAnyAccToId(uConn, spID); } /* Fill in gene symbol and description from refseq if possible. */ if (refseq[0] != 0) { struct sqlResult *sr; safef(query, sizeof(query), "select name,product from refLink where mrnaAcc='%s'", refseq); sr = sqlGetResult(gConn, query); char **row = sqlNextRow(sr); if (row != NULL) { geneSymbol = cloneString(row[0]); if (!sameWord("unknown protein", row[1])) description = cloneString(row[1]); } sqlFreeResult(&sr); } /* If need be try uniProt for gene symbol and description. */ if (spID[0] != 0 && (geneSymbol == NULL || description == NULL)) { char *acc = spLookupPrimaryAcc(uConn, spID); if (description == NULL) description = spDescription(uConn, acc); if (geneSymbol == NULL) { struct slName *nameList = spGenes(uConn, acc); if (nameList != NULL) geneSymbol = cloneString(nameList->name); slFreeList(&nameList); } } } /* If it's an antibody fragment use that as name. */ if (isAb) { geneSymbol = cloneString("abParts"); description = cloneString("Parts of antibodies, mostly variable regions."); isAb = TRUE; } if (ev == NULL) { mRNA = cloneString(""); if (!isAb) { errAbort("%s is %s but not %s\n", info->name, infoFile, evFile); } } else { mRNA = cloneString(ev->primary); chopSuffix(mRNA); } /* Still no joy? Try genbank RNA records. */ if (geneSymbol == NULL || description == NULL) { if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { char *acc = ev->accs[i]; chopSuffix(acc); if (geneSymbol == NULL) { safef(query, sizeof(query), "select geneName.name from gbCdnaInfo,geneName " "where geneName.id=gbCdnaInfo.geneName and gbCdnaInfo.acc = '%s'", acc); geneSymbol = sqlQuickString(gConn, query); if (geneSymbol != NULL) { if (sameString(geneSymbol, "n/a")) geneSymbol = NULL; } } if (description == NULL) { safef(query, sizeof(query), "select description.name from gbCdnaInfo,description " "where description.id=gbCdnaInfo.description " "and gbCdnaInfo.acc = '%s'", acc); description = sqlQuickString(gConn, query); if (description != NULL) { if (sameString(description, "n/a")) description = NULL; } } } } } if (geneSymbol == NULL) geneSymbol = mRNA; if (description == NULL) description = mRNA; /* Get rid of some characters that will cause havoc downstream. */ stripChar(geneSymbol, '\''); subChar(geneSymbol, '<', '['); subChar(geneSymbol, '>', ']'); /* Abbreviate geneSymbol if too long */ if (strlen(geneSymbol) > 40) strcpy(geneSymbol+37, "..."); fprintf(f, "%s\t", kgID); fprintf(f, "%s\t", mRNA); fprintf(f, "%s\t", spID); fprintf(f, "%s\t", spDisplayID); fprintf(f, "%s\t", geneSymbol); fprintf(f, "%s\t", refseq); fprintf(f, "%s\t", protAcc); fprintf(f, "%s\n", description); } carefulClose(&f); }
void rcvs(char *codingTable, char *clusterTable) /* rcvs - Compare riken noncoding vs. nonspliced. */ { struct hash *idHash = newHash(16); // Key id1, val id2 struct hash *nonCodingHash = newHash(16); // Key clusterId, value struct hash *splicedHash = newHash(16); // Key id2, present if spliced struct sqlConnection *conn = sqlConnect("mgsc"); struct sqlResult *sr; char **row; char *words[16]; int wordCount; struct lineFile *lf; int codingSpliced = 0; int noncodingSpliced = 0; int codingNonspliced = 0; int noncodingNonspliced = 0; /* Read id's into hash */ sr = sqlGetResult(conn, NOSQLINJ "select id1,id2 from rikenIds"); while ((row = sqlNextRow(sr)) != NULL) hashAdd(idHash, row[0], cloneString(row[1])); sqlFreeResult(&sr); /* Read spliced into hash */ sr = sqlGetResult(conn, NOSQLINJ "select name from rikenOrientInfo where intronOrientation != 0"); while ((row = sqlNextRow(sr)) != NULL) hashAdd(splicedHash, row[0], NULL); sqlFreeResult(&sr); /* Read noncoding clusters into hash */ lf = lineFileOpen(codingTable, TRUE); while (lineFileNextRow(lf, words, 2)) { if (sameString(words[1], "NoPProt")) hashAdd(nonCodingHash, words[0], NULL); } lineFileClose(&lf); /* Stream through cluster table counting and correlating. */ lf = lineFileOpen(clusterTable, TRUE); while (lineFileNextRow(lf, words, 2)) { char *cluster = words[0]; char *id1 = words[1]; char *id2 = hashMustFindVal(idHash, id1); if (hashLookup(nonCodingHash, cluster)) { if (hashLookup(splicedHash, id2)) ++noncodingSpliced; else ++noncodingNonspliced; } else { if (hashLookup(splicedHash, id2)) ++codingSpliced; else ++codingNonspliced; } } printf("noncodingNonspliced %d\n", noncodingNonspliced); printf("noncodingSpliced %d\n", noncodingSpliced); printf("codingNonspliced %d\n", codingNonspliced); printf("codingSpliced %d\n", codingSpliced); printf("total %d\n", noncodingNonspliced + noncodingSpliced + codingNonspliced + codingSpliced); }
void dupeFoo(char *pslName, char *faName, char *regionFile) /* dupeFoo - Do some duplication analysis. */ { struct lineFile *lf; struct frag *fragList = NULL, *frag; struct hash *fragHash = newHash(16); struct psl *psl; int fragCount=0,missCount=0,dupeCount=0,kSub=0, k1=0, k10=0,k100=0,k1000=0,k10000=0,diffChrom=0,distance; /* Read in fragment list and put it in hash. */ fragList = readFragList(faName); for (frag = fragList; frag != NULL; frag = frag->next) hashAdd(fragHash, frag->name, frag); /* Read psl's and store under the fragment the belong to. */ lf = pslFileOpen(pslName); while ((psl = pslNext(lf)) != NULL) { if ((frag = hashFindVal(fragHash, psl->qName)) == NULL) errAbort("Couldn't find %s in %s line %d of %s", psl->qName, faName, lf->lineIx, lf->fileName); slAddHead(&frag->pslList, psl); } lineFileClose(&lf); /* Look through fragments and report missing and dupes. */ for (frag = fragList; frag != NULL; frag = frag->next) { ++fragCount; if ((psl = frag->pslList) == NULL) { ++missCount; printf("missing %s\n", frag->name); } else { for (psl = frag->pslList; psl != NULL; psl = psl->next) { if (sameString(psl->tName, frag->chrom)) { distance = frag->start - psl->tStart; if (distance != 0) { if (distance < 0) distance = -distance; if (distance >= 10000000) ++k10000; else if (distance >= 1000000) ++k1000; else if (distance >= 100000) ++k100; else if (distance >= 10000) ++k10; else if (distance >= 1000) ++k1; else ++kSub; } } else { ++diffChrom; } } } } printPercent("Total", fragCount, fragCount); printPercent("Unaligned", missCount, fragCount); printPercent("Other Chrom", diffChrom, fragCount); printPercent("Same Chrom >10M", k10000, fragCount); printPercent("Same Chrom >1M", k1000, fragCount); printPercent("Same Chrom >10Ok", k100, fragCount); printPercent("Same Chrom >1Ok", k10, fragCount); printPercent("Same Chrom >1k", k1, fragCount); printPercent("Self-overlap", kSub, fragCount); writeRegions(fragList, regionFile); }
void analyse(int start, int stop) { struct hash *hash; char line[512]; int lineCount = 0; char *words[32]; int wordCount; struct cdnaInfo *cdnaList = NULL; struct cdnaInfo *ci = NULL; int cdnaCount; int maxCdnaCount = stop - start; cdnaCount = 1; if (start > 1) { for (;;) { if (!fgets(line, sizeof(line), inFile)) errAbort("Not %d cDNAs in file, only %d\n", start, cdnaCount); ++lineCount; if (line[0] == '#') /* Skip comments. */ continue; wordCount = chopString(line, whiteSpaceChopper, words, ArraySize(words)); if (wordCount <= 0) /* Skip empty lines. */ continue; if (!differentWord(words[1], "alignments")) { ++cdnaCount; if (cdnaCount >= start) break; } } } cdnaCount = 0; hash = newHash(14); /* Hash table with 16k entries. */ for (;;) { if (!fgets(line, sizeof(line), inFile)) break; ++lineCount; if (line[0] == '#') /* Skip comments. */ continue; wordCount = chopString(line, whiteSpaceChopper, words, ArraySize(words)); if (wordCount <= 0) /* Skip empty lines. */ continue; if (wordCount < 4) /* Everyone else has at least four words. */ { errAbort("Short line %d:\n", lineCount); } if (sameWord(words[1], "Blasting")) { char *cdnaName = words[2]; if ((ci = lookupInfo(hash, cdnaName)) == NULL) { struct hashEl *hel; ci = needMem(sizeof(*ci)); hel = hashAdd(hash, cdnaName, ci); ci->next = cdnaList; cdnaList = ci; ci->ix = atoi(words[0]); ci->name = hel->name; } } else if (sameWord(words[2], "hits")) { /* Newer style - includes cDNA matching range. */ if (ci == NULL) continue; hitLine(ci, lineCount, words[0], words[1], words[3], words[4], words[5], words[9]); } else if (sameWord(words[1], "hits")) /* Older style - no cDNA matching range. */ { if (ci == NULL) continue; hitLine(ci, lineCount, words[0], NULL, words[2], words[3], words[4], words[8]); } else if (sameWord(words[1], "alignments")) { struct dnaSeq *cdnaSeq; struct wormCdnaInfo info; if (ci == NULL) continue; if (differentWord(ci->name, words[3])) errAbort("Line %d - %s is not %s", lineCount, words[3], ci->name); if (!ci->finished) { if (!anyCdnaSeq(ci->name, &cdnaSeq, &info)) { warn("Can't find cDNA %s", ci->name); ci->isDupe = TRUE; } else { ci->baseCount = cdnaSeq->size; ci->baseCrc = dnaCrc(cdnaSeq->dna, cdnaSeq->size); slReverse(&ci->roughAli); ci->roughScore = bestRoughScore(ci->roughAli); filterDupeCdna(ci, cdnaSeq); ci->isBackwards = (info.orientation == '-'); refineAlis(ci, cdnaSeq); ci->fineScore = bestFineScore(ci->fineAli); ci->isEmbryonic = info.isEmbryonic; ci->finished = TRUE; freeDnaSeq(&cdnaSeq); ++cdnaCount; if (cdnaCount >= maxCdnaCount) break; } } } else { errAbort("Can't deal with line %d\n", lineCount); } } slReverse(&cdnaList); doGoodBad(cdnaList); doUnusual(cdnaList); //makeCdnaToGene(cdnaList); /* Clean up. */ /* These two are slow and not really necessary. */ #ifdef FASTIDIOUS slFreeList(&cdnaList); freeHash(&hash); #endif uglyf("Done analyse\n"); }
static void clusterClone(int argc, char *argv[]) { int i; for (i=1; i < argc; ++i) { struct lineFile *lf; struct psl *psl; unsigned tSize; char *prevAccPart = (char *)NULL; char *prevAccName = (char *)NULL; char *prevTargetName = (char *)NULL; struct hashEl *el; struct hash *chrHash = newHash(0); struct hash *coordHash = newHash(0); struct coordEl *coord; struct coordEl **coordListPt = (struct coordEl **) NULL; unsigned querySize = 0; int partCount = 0; int partsConsidered = 0; verbose(2,"#\tprocess: %s\n", argv[i]); lf=pslFileOpen(argv[i]); while ((struct psl *)NULL != (psl = pslNext(lf)) ) { char *accName = (char *)NULL; char *targetName = (char *)NULL; int chrCount = 0; double percentCoverage; accName = cloneString(psl->qName); if ((char *)NULL == prevAccPart) { prevAccPart = cloneString(psl->qName); /* first time */ querySize = psl->qSize; ++partsConsidered; } chopSuffixAt(accName,'_'); if ((char *)NULL == prevAccName) prevAccName = cloneString(accName); /* first time */ if ((char *)NULL == prevTargetName) prevTargetName = cloneString(psl->tName); /* first time */ /* encountered a new accession name, process the one we * were working on */ if (differentWord(accName, prevAccName)) { if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found in %d parts considered\n", prevTargetName, prevAccName, partsConsidered); freeMem(prevAccName); prevAccName = cloneString(accName); freeHash(&chrHash); freeHash(&coordHash); chrHash = newHash(0); coordHash = newHash(0); querySize = 0; partCount = 0; partsConsidered = 0; } tSize = psl->tEnd - psl->tStart; percentCoverage = 100.0*((double)(tSize+1)/(psl->qSize + 1)); if (differentWord(psl->qName, prevAccPart)) { ++partsConsidered; querySize += psl->qSize; freeMem(prevAccPart); prevAccPart = cloneString(psl->qName); } targetName = cloneString(psl->tName); if (differentWord(targetName, prevTargetName)) { freeMem(prevTargetName); prevTargetName = cloneString(targetName); } /* keep a hash of chrom names encountered */ el = hashLookup(chrHash, targetName); if (el == NULL) { if (percentCoverage > minCover) { hashAddInt(chrHash, targetName, 1); chrCount = 1; } else { hashAddInt(chrHash, targetName, 0); chrCount = 0; } } else { if (percentCoverage > minCover) { chrCount = ptToInt(el->val) + 1; el->val=intToPt(chrCount); } } AllocVar(coord); coord->start = psl->tStart; coord->end = psl->tEnd; coord->qSize = psl->qSize; coord->strand = sameWord(psl->strand,"+") ? 1 : 0; /* when coverage is sufficient */ if (percentCoverage > minCover) { ++partCount; coord->name = cloneString(psl->qName); /* for each chrom name, accumulate a list of coordinates */ el = hashLookup(coordHash, targetName); if (el == NULL) { AllocVar(coordListPt); hashAdd(coordHash, targetName, coordListPt); } else { coordListPt = el->val; } slAddHead(coordListPt,coord); verbose(2,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } else { verbose(3,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } freeMem(accName); freeMem(targetName); pslFree(&psl); } if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found\n", prevTargetName, prevAccName); freeMem(prevAccName); freeHash(&chrHash); freeHash(&coordHash); lineFileClose(&lf); } } /* static void clusterClone() */
void addRedoHash(struct cdnaInfo *ci, char *reason) { struct hashEl *hel; if ((hel = hashLookup(redoHash, ci->name)) == NULL) hashAdd(redoHash, ci->name, reason); }
/* convolve() - perform the task on the input data * I would like to rearrange this business here, and instead of * reading in the data and leaving it in the hash for all other * routines to work with, it would be best to get it immediately * into an array. That makes the work of the other routines much * easier. */ static void convolve(int argc, char *argv[]) { int i; struct lineFile *lf; /* for line file utilities */ for (i = 1; i < argc; ++i) { int lineCount = 0; /* counting input lines */ char *line = (char *)NULL; /* to receive data input line */ char *words[128]; /* to split data input line */ int wordCount = 0; /* result of split */ struct hash *histo0; /* first histogram */ struct hash *histo1; /* second histogram */ int medianBin0 = 0; /* bin at median for histo0 */ double medianLog_2 = -500.0; /* log at median */ int bin = 0; /* 0 to N-1 for N bins */ int convolutions = 0; /* loop counter for # of convolutions */ histo0 = newHash(0); lf = lineFileOpen(argv[i], TRUE); /* input file */ verbose(1, "Processing %s\n", argv[1]); while (lineFileNext(lf, &line, NULL)) { int j; /* loop counter over words */ int inputValuesCount = 0; struct histoGram *hg; /* an allocated hash element */ ++lineCount; chopPrefixAt(line, '#'); /* ignore any comments starting with # */ if (strlen(line) < 3) /* anything left on this line ? */ continue; /* no, go to next line */ wordCount = chopByWhite(line, words, 128); if (wordCount < 1) warn("Expecting at least a word at line %d, file: %s, found %d words", lineCount, argv[i], wordCount); if (wordCount == 128) warn("May have more than 128 values at line %d, file: %s", lineCount, argv[i]); verbose(2, "Input data read from file: %s\n", argv[i]); for (j = 0; j < wordCount; ++j) { char binName[128]; double dataValue; double probInput; double log_2; dataValue = strtod(words[j], NULL); ++inputValuesCount; if (logs) { log_2 = dataValue; probInput = pow(2.0,log_2); } else { if (dataValue > 0.0) { log_2 = log2(dataValue); probInput = dataValue; } else { log_2 = -500.0; /* arbitrary limit */ probInput = pow(2.0,log_2); } } if (log_2 > medianLog_2) { medianLog_2 = log_2; medianBin0 = bin; } verbose(2, "bin %d: %g %0.5g\n", inputValuesCount-1, probInput, log_2); AllocVar(hg); /* the histogram element */ hg->bin = bin; hg->prob = probInput; hg->log_2 = log_2; snprintf(binName, sizeof(binName), "%d", hg->bin); hashAdd(histo0, binName, hg); ++bin; } /* for each word on an input line */ } /* for each line in a file */ /* file read complete, echo input */ if (verboseLevel() >= 2) printHistogram(histo0, medianBin0); /* perform convolutions to specified count * the iteration does histo0 with itself to produce histo1 * Then histo0 is freed and histo1 copied to it for the * next loop. */ for (convolutions = 0; convolutions < convolve_count; ++convolutions) { int medianBin; histo1 = newHash(0); medianBin = iteration(histo0, histo1); if (verboseLevel() >= 2) printHistogram(histo1, medianBin); freeHashAndVals(&histo0); histo0 = histo1; } } /* for each input file */ } /* convolve() */
static void parseGtfEnd(char *s, struct gffFile *gff, struct gffLine *gl, char *fileName, int lineIx) /* Read the semi-colon separated end bits of a GTF line into gl and * hashes. */ { char *type, *val; struct hashEl *hel; bool gotSemi; for (;;) { gotSemi = FALSE; if ((type = nextWord(&s)) == NULL) break; s = skipLeadingSpaces(s); if (NULL == s || s[0] == 0) errAbort("Unpaired type(%s)/val on end of gtf line %d of %s", type, lineIx, fileName); if (s[0] == '"' || s[0] == '\'') { val = s; readQuotedString(fileName, lineIx, s, val, &s); } else { int len; val = nextWord(&s); len = strlen(val) - 1; if (val[len] == ';') { val[len] = 0; len -= 1; gotSemi = TRUE; } if (len < 0) errAbort("Empty value for %s line %d of %s", type, lineIx, fileName); } if (s != NULL && !gotSemi) { s = strchr(s, ';'); if (s != NULL) ++s; } /* only use the first occurance of gene_id and transcript_id */ if (sameString("gene_id", type) && (gl->geneId == NULL)) { struct gffGeneId *gg; if ((hel = hashLookup(gff->geneIdHash, val)) == NULL) { AllocVar(gg); hel = hashAdd(gff->geneIdHash, val, gg); gg->name = hel->name; slAddHead(&gff->geneIdList, gg); } else { gg = hel->val; } gl->geneId = gg->name; } else if (sameString("transcript_id", type) && (gl->group == NULL)) { struct gffGroup *gg; if ((hel = hashLookup(gff->groupHash, val)) == NULL) { AllocVar(gg); hel = hashAdd(gff->groupHash, val, gg); gg->name = hel->name; gg->seq = gl->seq; gg->source = gl->source; slAddHead(&gff->groupList, gg); } else { gg = hel->val; } gl->group = gg->name; } else if (sameString("exon_id", type)) gl->exonId = gffFileGetStr(gff, val); else if (sameString("exon_number", type)) { if (!isdigit(val[0])) errAbort("Expecting number after exon_number, got %s line %d of %s", val, lineIx, fileName); gl->exonNumber = atoi(val); } else if (sameString("intron_id", type)) gl->intronId = gffFileGetStr(gff, val); else if (sameString("intron_status", type)) gl->intronStatus = gffFileGetStr(gff, val); else if (sameString("protein_id", type)) gl->proteinId = gffFileGetStr(gff, val); else if (sameString("gene_name", type)) gl->geneName = gffFileGetStr(gff, val); else if (sameString("transcript_name", type)) gl->transcriptName = gffFileGetStr(gff, val); } if (gl->group == NULL) { if (gl->geneId == NULL) warn("No gene_id or transcript_id line %d of %s", lineIx, fileName); } }
void flagReported(struct hash* reportedHash, char* key) /* flag an error as having been reported */ { hashAdd(reportedHash, key, NULL); }
void filereader(FILE* in, int bflag, char* sym, int n){ FILE* iq = fopen("iq.txt", "w"); FILE* ob = fopen("ob.txt", "w"); int count = 0; if(!bflag){ char ch; int id; while(!feof(in)){ fscanf(in,"%c",&ch); if(ch == 'A'){ char side; char* symbol = (char*)malloc(sizeof(char*)); int quantity; double price; fscanf(in,"%d %c %s %d %lf\n",&id, &side, symbol, &quantity, &price); if(!strcmp(sym,symbol)){ hashAdd(id, side, symbol, quantity, price); //printf("%ld %c %s %d %lf\n", id, side, symbol, quantity, price); //count++; } }else if(ch == 'X'){ char* symbol = (char*)malloc(sizeof(char*)); fscanf(in,"%d %s\n",&id, symbol); hashDel(id); }else if(ch == 'T'){ int id; char* symbol = (char*)malloc(sizeof(char*)); int quantity; fscanf(in, "%d %s %d\n", &id, symbol, & quantity); if(!strcmp(sym,symbol)){ changeNode(id, quantity); } }else if(ch == 'R'){ int id; char* symbol = (char*)malloc(sizeof(char*)); int quantity; double price; fscanf(in, "%d %s %d %lf\n", &id, symbol, &quantity, &price); changePnode(id, quantity, price); }else if(ch == 'C'){ int id; char* symbol = (char*)malloc(sizeof(char*)); int quantity; fscanf(in, "%d %s %d\n", &id, symbol, & quantity); if(!strcmp(sym,symbol)){ changeNode(id, quantity); } } count++; //printf("count %d\n", count); //printf("n in file %d\n", n); if(count == n){ count = 0; double sellPrice = 0.0; double buyPrice = 0.0; if(sell != NULL) sellPrice = sell->price; if(buy != NULL) buyPrice = buy->price; fprintf(iq, "%lf %lf\n", sellPrice, buyPrice); } } }else{ char c; while(!feof(in)){ fread(&c, sizeof(char), 1,in); if(c == 'A'){ int id; char side; char *symbol = (char *) malloc(sizeof(char *)); int quantity; double price; fread(&id,sizeof(int),1,in); fread(&side,sizeof(char),1,in); fread(symbol,sizeof(char),4,in); symbol[strlen(symbol)] = '\0'; fread(&quantity,sizeof(int),1,in); fread(&price,sizeof(double),1,in); if(!strcmp(sym,symbol)){ hashAdd(id, side, symbol, quantity, price); //printf("%ld %c %s %d %lf\n", id, side, symbol, quantity, price); } } if(c == 'X'){ int id; char *symbol = (char *) malloc(sizeof(char *)); fread(&id,sizeof(int),1,in); fread(symbol,sizeof(char),4,in); symbol[strlen(symbol)] = '\0'; hashDel(id); } if(c == 'T'){ int id; char *symbol = (char *) malloc(sizeof(char *)); int quantity; fread(&id,sizeof(int),1,in); fread(symbol,sizeof(char),4,in); symbol[strlen(symbol)] = '\0'; fread(&quantity,sizeof(int),1,in); changeNode(id, quantity); } if(c == 'C'){ int id; char *symbol = (char *) malloc(sizeof(char *)); int quantity; fread(&id,sizeof(int),1,in); fread(symbol,sizeof(char),4,in); symbol[strlen(symbol)] = '\0'; fread(&quantity,sizeof(int),1,in); changeNode(id,quantity); } if(c == 'R'){ int id; char *symbol = (char *) malloc(sizeof(char *)); int quantity; double price; fread(&id,sizeof(int),1,in); fread(symbol,sizeof(char),4,in); symbol[strlen(symbol)] = '\0'; fread(&quantity,sizeof(int),1,in); fread(&price,sizeof(double),1,in); changePnode(id, quantity, price); } } } printhash(ob); }
void doExpRatio(struct trackDb *tdb, char *item, struct customTrack *ct) /* Generic expression ratio deatils using microarrayGroups.ra file */ /* and not the expRecord tables. */ { char *expScale = trackDbRequiredSetting(tdb, "expScale"); char *expStep = trackDbRequiredSetting(tdb, "expStep"); double maxScore = atof(expScale); double stepSize = atof(expStep); struct bed *bedList; char *itemName = cgiUsualString("i2","none"); char *expName = (item == NULL) ? itemName : item; char *tdbSetting = trackDbSettingOrDefault(tdb, "expColor", "redGreen"); char *colorVal = NULL; enum expColorType colorScheme; char colorVarName[256]; safef(colorVarName, sizeof(colorVarName), "%s.color", tdb->track); colorVal = cartUsualString(cart, colorVarName, tdbSetting); colorScheme = getExpColorType(colorVal); if (sameWord(tdb->grp, "cancerGenomics")) { /* set global flag */ isCancerGenomicsTrack = TRUE; } if (!ct) { genericHeader(tdb, itemName); bedList = loadMsBed(tdb, tdb->table, seqName, winStart, winEnd); } else if (ct->dbTrack) { genericHeader(tdb, itemName); printCustomUrl(tdb, itemName, TRUE); bedList = ctLoadMultScoresBedDb(ct, seqName, winStart, winEnd); } else bedList = bedFilterListInRange(ct->bedList, NULL, seqName, winStart, winEnd); if (bedList == NULL) printf("<b>No Expression Data in this Range.</b>\n"); else if (expName && sameString(expName, "zoomInMore")) printf("<b>Too much data to display in detail in this range.</b>\n"); else { struct microarrayGroups *groupings = NULL; struct maGrouping *combineGroup; struct hash *erHash = newHash(6); int i; if (!ct) { groupings = maGetTrackGroupings(database, tdb); combineGroup = maCombineGroupingFromCart(groupings, cart, tdb->track); } else combineGroup = maGetGroupingFromCt(ct); maBedClumpGivenGrouping(bedList, combineGroup); for (i = 0; i < combineGroup->numGroups; i++) { /* make stupid exprecord hash.perhaps eventually this won't be needed */ char id[16]; struct expRecord *er = basicExpRecord(combineGroup->names[i], i, 2); safef(id, sizeof(id), "%d", i); hashAdd(erHash, id, er); } puts("<h2></h2><p>\n"); msBedPrintTable(bedList, erHash, itemName, expName, -1*maxScore, maxScore, stepSize, 2, msBedDefaultPrintHeader, msBedExpressionPrintRow, printExprssnColorKey, getColorForExprBed, colorScheme); hashTraverseEls(erHash, erHashElFree); hashFree(&erHash); microarrayGroupsFree(&groupings); } puts("<h2></h2><p>\n"); bedFreeList(&bedList); }
void doGenePredNongenomic(struct sqlConnection *conn, int typeIx) /* Get mrna or protein associated with selected genes. */ { /* Note this does do the whole genome at once rather than one * chromosome at a time, but that's ok because the gene prediction * tracks this serves are on the small side. */ char *typeWords[3]; char *table; struct lm *lm = lmInit(64*1024); int fieldCount; struct bed *bed, *bedList = cookedBedsOnRegions(conn, curTable, getRegions(), lm, &fieldCount); int typeWordCount; textOpen(); /* Figure out which table to use. */ if (isRefGeneTrack(curTable)) { if (typeIx == 1) /* Protein */ doRefGeneProteinSequence(conn, bedList); else doRefGeneMrnaSequence(conn, bedList); } else { char *dupType = cloneString(findTypeForTable(database, curTrack, curTable, ctLookupName)); typeWordCount = chopLine(dupType, typeWords); if (typeIx >= typeWordCount) internalErr(); table = typeWords[typeIx]; if (sqlTableExists(conn, table)) { struct sqlResult *sr; char **row; char query[256]; struct hash *hash = newHash(18); boolean gotResults = FALSE; /* Make hash of all id's passing filters. */ for (bed = bedList; bed != NULL; bed = bed->next) hashAdd(hash, bed->name, NULL); /* Scan through table, outputting ones that match. */ sqlSafef(query, sizeof(query), "select name, seq from %s", table); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { if (hashLookup(hash, row[0])) { hPrintf(">%s\n", row[0]); writeSeqWithBreaks(stdout, row[1], strlen(row[1]), 60); gotResults = TRUE; } } sqlFreeResult(&sr); hashFree(&hash); if (!gotResults) hPrintf(NO_RESULTS); } else { internalErr(); } freez(&dupType); } lmCleanup(&lm); }
void bedItemOverlapCount(struct hash *chromHash, char *infile, char *outfile){ unsigned maxChromSize = 0; unitSize *counts = (unitSize *)NULL; FILE *f = mustOpen(outfile, "w"); struct hashCookie hc = hashFirst(chromHash); struct hashEl *hel; while( (hel = hashNext(&hc)) != NULL) { unsigned num = (unsigned) ptToInt(hel->val); maxChromSize = max(num, maxChromSize); } verbose(2,"#\tmaxChromSize: %u\n", maxChromSize); if (maxChromSize < 1) errAbort("maxChromSize is zero ?"); /* Allocate just once for the largest chrom and reuse this array */ counts = needHugeMem(sizeof(unitSize) * maxChromSize); /* Reset the array to be zero to be reused */ memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); unsigned chromSize = 0; char *prevChrom = (char *)NULL; boolean outputToDo = FALSE; struct hash *seenHash = newHash(5); struct lineFile *bf = lineFileOpen(infile , TRUE); struct bed *bed = (struct bed *)NULL; char *row[12]; int numFields = doBed12 ? 12 : 3; while (lineFileNextRow(bf,row, numFields)) { int i; bed = bedLoadN(row, numFields); verbose(3,"#\t%s\t%d\t%d\n",bed->chrom,bed->chromStart, bed->chromEnd); if (prevChrom && differentWord(bed->chrom,prevChrom)) // End a chr { verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); outputToDo = FALSE; memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); /* zero counts */ freez(&prevChrom); // prevChrom is now NULL so it will be caught by next if! } if ((char *)NULL == prevChrom) // begin a chr { if (hashLookup(seenHash, bed->chrom)) errAbort("ERROR:input file not sorted. %s seen before on line %d\n", bed->chrom, bf->lineIx); hashAdd(seenHash, bed->chrom, NULL); prevChrom = cloneString(bed->chrom); chromSize = hashIntVal(chromHash, prevChrom); verbose(2,"#\tchrom %s starting, size %d\n", prevChrom,chromSize); } if (bed->chromEnd > chromSize) { // check for circular chrM if (doBed12 || bed->chromStart>=chromSize || differentWord(bed->chrom,"chrM")) { warn("ERROR: %s\t%d\t%d", bed->chrom, bed->chromStart, bed->chromEnd); errAbort("chromEnd > chromSize ? %d > %d", bed->chromEnd,chromSize); } for (i = bed->chromStart; i < chromSize; ++i) INCWOVERFLOW(counts,i); for (i = 0; i < (bed->chromEnd - chromSize); ++i) INCWOVERFLOW(counts,i); } else if (doBed12) { int *starts = bed->chromStarts; int *sizes = bed->blockSizes; int *endStarts = &bed->chromStarts[bed->blockCount]; for(; starts < endStarts; starts++, sizes++) { unsigned int end = *starts + *sizes + bed->chromStart; for (i = *starts + bed->chromStart; i < end; ++i) INCWOVERFLOW(counts,i); } } else { for (i = bed->chromStart; i < bed->chromEnd; ++i) INCWOVERFLOW(counts, i); } outputToDo = TRUE; bedFree(&bed); // plug the memory leak } lineFileClose(&bf); // Note, next file could be on same chr! if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); if (doOutBounds) fprintf(stderr, "min %lu max %lu\n", (unsigned long)overMin, (unsigned long)overMax); verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); carefulClose(&f); freeMem(counts); freez(&prevChrom); // hashFreeWithVals(&chromHash, freez); freeHash(&seenHash); }
int xmlParse(void* Pool,PXMLNode x,wchar_t *src,int flags) { int sz=xstrlen(src); int i,st,n; wchar_t str[256]; PXMLNode stack[256]; int stackpos=0; PXMLNode p=x,c; PHashLink l; PXMLNode chlds=NULL; if(!sz)return 0; for(i=0;i<sz;i++) { if(xmlIsSpace(src[i]))continue; if(src[i]==L'<' && src[i+1]==L'!' && src[i+2]==L'-' && src[i+3]==L'-' && sz-i>4) { i+=4; st=i; while((src[i]!=L'-' || src[i+1]!=L'-' || src[i+2]!=L'>') && sz-i>3)i++; i+=2; if(sz-i<3)return 0; c=xmlNewChild(Pool,p,xmlComment); c->szCont=xstrndup(Pool,src+st,i-st-3); if(chlds)chlds->pNext=c; else p->pChildren=c; chlds=c; continue; } if(src[i]==L'<') { i++; if(src[i]==L'/') { i++; st=i; i=xmlGetWordEnd(src,i); if(i==-1)return 0; i=xmlSkipSpace(src,i);CHK; if(src[i]!=L'>')return 0; n=i-st; if(!xstrncmp(p->szName,src+st,n))return 0; //i++; p=p->pParent; if(p->eType==xmlRoot)return 1; stackpos--; chlds=stack[stackpos]; continue; } if(WC(src[i])) { st=i; i=xmlGetWordEnd(src,i);CHK; c=xmlNewChild(Pool,p,xmlLeaf); if(p->eType!=xmlRoot)p->eType=xmlNode; n=i-st; if(n>255)n=255; xstrncpy(str,src+st,n); if(p->hChildren==NULL)p->hChildren=hashNew(Pool); l=hashAdd(p->hChildren,str,c); c->szName=l->szKey; if(chlds)chlds->pNext=c; else p->pChildren=c; chlds=c; i=xmlParseTag(Pool,c,&p,src,i); if(i==0)return 0; if(p==c) { stack[stackpos]=chlds; stackpos++; chlds=NULL; } i=xmlSkipSpace(src,i);CHK; st=i; i=xmlSkipTill(src,i,L'<');CHK; if(i>st) { p->szCont=xmlSubst(xstrndup(Pool,src+st,i-st)); } i--; continue; } if(src[i]==L'!' || src[i]==L'?') { st=i+1; if(src[i+1]==L'[') { if(xstrncmp(src+i,L"![CDATA[",8)) { const wchar_t *p=src+i+8; while((p=xstrchr(p,']'))) { if(p[1]==L']' && p[2]==L'>')break; } if(p)i=(int)(p-src+2); }else i=xmlSkipTill(src,i,L'>'); }else { i=xmlSkipTill(src,i,L'>'); } if(i==-1)return 0; c=xmlNewChild(Pool,p,src[st-1]==L'!'?xmlExcl:xmlQuest); c->szCont=xstrndup(Pool,src+st,i-st); if(chlds)chlds->pNext=c; else p->pChildren=c; chlds=c; continue; } }else { st=i; i=xmlSkipTill(src,i,L'<');CHK; c=xmlNewChild(Pool,p,xmlContent); c->szCont=xmlSubst(xstrndup(Pool,src+st,i-st)); if(chlds)chlds->pNext=c; else p->pChildren=c; chlds=c; i--; } } return 1; }
struct joinerDtf *filteringTables() /* Get list of tables we're filtering on as joinerDtf list (with * the field entry NULL). */ { if (!anyFilter()) return NULL; else { struct joinerDtf *dtfList = NULL, *dtf; struct hashEl *varList, *var; struct hash *uniqHash = hashNew(0); int prefixSize = strlen(hgtaFilterVarPrefix); varList = cartFindPrefix(cart, hgtaFilterVarPrefix); for (var = varList; var != NULL; var = var->next) { char *dupe = cloneString(var->name + prefixSize); char *parts[5]; int partCount; char dbTable[256]; char *db, *table, *field, *type; partCount = chopByChar(dupe, '.', parts, ArraySize(parts)); if (partCount != 4) { warn("Part count != expected 4 line %d of %s", __LINE__, __FILE__); continue; } db = parts[0]; table = parts[1]; field = parts[2]; type = parts[3]; safef(dbTable, sizeof(dbTable), "%s.%s", db, table); if (! filteredOrLinked(db, table)) continue; if (!hashLookup(uniqHash, dbTable)) { boolean gotFilter = FALSE; if (sameString(type, filterPatternVar)) { char *pat = trimSpaces(var->val); gotFilter = wildReal(pat); } else if (sameString(type, filterCmpVar)) { char *patVar = filterPatternVarName(db, table, field); char *pat = trimSpaces(cartOptionalString(cart, patVar)); gotFilter = cmpReal(pat, var->val); } else if (sameString(type, filterRawQueryVar)) { char *pat = trimSpaces(var->val); gotFilter = (pat != NULL && pat[0] != 0); } if (gotFilter) { hashAdd(uniqHash, dbTable, NULL); AllocVar(dtf); dtf->database = cloneString(db); dtf->table = cloneString(table); slAddHead(&dtfList, dtf); } } freeMem(dupe); } hashFree(&uniqHash); return dtfList; } }
static struct grp *makeGroupList(char *db, struct trackDb *trackList, struct grp **pHubGrpList, boolean allTablesOk) /* Get list of groups that actually have something in them. */ { struct grp *groupsAll, *groupList = NULL, *group; struct hash *groupsInTrackList = newHash(0); struct hash *groupsInDatabase = newHash(0); struct trackDb *track; /* Stream through track list building up hash of active groups. */ for (track = trackList; track != NULL; track = track->next) { if (!hashLookup(groupsInTrackList,track->grp)) hashAdd(groupsInTrackList, track->grp, NULL); } /* Scan through group table, putting in ones where we have data. */ groupsAll = hLoadGrps(db); for (group = slPopHead(&groupsAll); group != NULL; group = slPopHead(&groupsAll)) { if (hashLookup(groupsInTrackList, group->name)) { slAddTail(&groupList, group); hashAdd(groupsInDatabase, group->name, group); } else grpFree(&group); } /* if we have custom tracks, we want to add the track hubs * after that group */ struct grp *addAfter = NULL; if ((groupList != NULL) && sameString(groupList->name, "user")) addAfter = groupList; /* Add in groups from hubs. */ for (group = slPopHead(pHubGrpList); group != NULL; group = slPopHead(pHubGrpList)) { // if the group isn't represented in any track, don't add it to list if (!hashLookup(groupsInTrackList,group->name)) continue; /* check to see if we're inserting hubs rather than * adding them to the front of the list */ struct grp *newGrp = grpDup(group); if (addAfter != NULL) { newGrp->next = addAfter->next; addAfter->next = newGrp; } else slAddHead(&groupList, newGrp); hashAdd(groupsInDatabase, newGrp->name, newGrp); } /* Do some error checking for tracks with group names that are * not in database. Just warn about them. */ if (!trackHubDatabase(db)) for (track = trackList; track != NULL; track = track->next) { if (!hashLookup(groupsInDatabase, track->grp)) warn("Track %s has group %s, which isn't in grp table", track->table, track->grp); } /* Create dummy group for all tracks. */ AllocVar(group); group->name = cloneString("allTracks"); group->label = cloneString("All Tracks"); slAddTail(&groupList, group); /* Create another dummy group for all tables. */ if (allTablesOk) { AllocVar(group); group->name = cloneString("allTables"); group->label = cloneString("All Tables"); slAddTail(&groupList, group); } hashFree(&groupsInTrackList); hashFree(&groupsInDatabase); return groupList; }
static void showLinkedTables(struct joiner *joiner, struct dbTable *inList, char *varPrefix, char *buttonName, char *buttonText) /* Print section with list of linked tables and check boxes to turn them * on. */ { struct dbTable *outList = NULL, *out, *in; char dtName[256]; struct hash *uniqHash = newHash(0); struct hash *inHash = newHash(8); /* Build up list of tables we link to in outList. */ for (in = inList; in != NULL; in = in->next) { struct sqlConnection *conn = NULL; if (!trackHubDatabase(database)) conn = hAllocConn(in->db); struct joinerPair *jpList, *jp; /* Keep track of tables in inList. */ safef(dtName, sizeof(dtName), "%s.%s", inList->db, inList->table); hashAdd(inHash, dtName, NULL); /* First table in input is not allowed in output. */ if (in == inList) hashAdd(uniqHash, dtName, NULL); /* Scan through joining information and add tables, * avoiding duplicate additions. */ jpList = joinerRelate(joiner, in->db, in->table); for (jp = jpList; jp != NULL; jp = jp->next) { safef(dtName, sizeof(dtName), "%s.%s", jp->b->database, jp->b->table); if (!hashLookup(uniqHash, dtName) && !cartTrackDbIsAccessDenied(jp->b->database, jp->b->table)) { hashAdd(uniqHash, dtName, NULL); out = dbTableNew(jp->b->database, jp->b->table); slAddHead(&outList, out); } } joinerPairFreeList(&jpList); hFreeConn(&conn); } slSort(&outList, dbTableCmp); /* Print html. */ if (outList != NULL) { webNewSection("Linked Tables"); hTableStart(); for (out = outList; out != NULL; out = out->next) { struct sqlConnection *conn = hAllocConn(out->db); struct asObject *asObj = asForTable(conn, out->table); char *var = dbTableVar(varPrefix, out->db, out->table); hPrintf("<TR>"); hPrintf("<TD>"); cgiMakeCheckBox(var, varOn(var)); hPrintf("</TD>"); hPrintf("<TD>%s</TD>", out->db); hPrintf("<TD>%s</TD>", out->table); hPrintf("<TD>"); if (asObj != NULL) hPrintf("%s", asObj->comment); else hPrintf(" "); hPrintf("</TD>"); hPrintf("</TR>"); hFreeConn(&conn); } hTableEnd(); hPrintf("<BR>"); cgiMakeButton(buttonName, buttonText); } }
void submitRefToFiles(struct sqlConnection *conn, struct sqlConnection *conn2, struct sqlConnection *connSp, char *ref, char *fileRoot, char *inJax) /* Create a .ra and a .tab file for given reference. */ { /* Initially the tab file will have some duplicate lines, so * write to temp file, and then filter. */ char raName[PATH_LEN], tabName[PATH_LEN], capName[PATH_LEN]; FILE *ra = NULL, *tab = NULL, *cap = NULL; struct dyString *query = dyStringNew(0); struct sqlResult *sr; char **row; char *pubMed; struct slName *list, *el; boolean gotAny = FALSE; struct hash *uniqImageHash = newHash(0); struct hash *captionHash = newHash(0); int imageWidth = 0, imageHeight = 0; char path[PATH_LEN]; struct dyString *caption = dyStringNew(0); struct dyString *copyright = dyStringNew(0); struct dyString *probeNotes = dyStringNew(0); boolean lookedForCopyright = FALSE; safef(raName, sizeof(raName), "%s.ra", fileRoot); safef(tabName, sizeof(tabName), "%s.tab", fileRoot); safef(capName, sizeof(capName), "%s.txt", fileRoot); tab = mustOpen(tabName, "w"); cap = mustOpen(capName, "w"); sqlDyStringPrintf(query, "select authors,journal,title,year from BIB_Refs where "); sqlDyStringPrintf(query, "_Refs_key = '%s'", ref); sr = sqlGetResultVerbose(conn, query->string); row = sqlNextRow(sr); if (row == NULL) errAbort("Can't find _Refs_key %s in BIB_Refs", ref); /* Make ra file with stuff common to whole submission set. */ ra = mustOpen(raName, "w"); fprintf(ra, "submissionSource MGI\n"); fprintf(ra, "acknowledgement Thanks to the Gene Expression Database group at " "Mouse Genome Informatics (MGI) for collecting, annotating and sharing " "this image. The MGI images were last updated in VisiGene on March 28, 2006. " "Additional and more up to date annotations and images may be available " "directly at <A HREF='http://www.informatics.jax.org' target='_blank'>MGI.</A>\n"); fprintf(ra, "submitSet jax%s\n", ref); fprintf(ra, "taxon 10090\n"); /* Mus musculus taxon */ fprintf(ra, "fullDir http://hgwdev.gi.ucsc.edu/visiGene/full/inSitu/Mouse/jax\n"); fprintf(ra, "thumbDir http://hgwdev.gi.ucsc.edu/visiGene/200/inSitu/Mouse/jax\n"); fprintf(ra, "setUrl http://www.informatics.jax.org/\n"); fprintf(ra, "itemUrl http://www.informatics.jax.org/searches/image.cgi?%%s\n"); fprintf(ra, "abUrl http://www.informatics.jax.org/searches/antibody.cgi?%%s\n"); fprintf(ra, "journal %s\n", row[1]); fprintf(ra, "publication %s\n", row[2]); fprintf(ra, "year %s\n", row[3]); /* The contributor (author) list is in format Kent WJ; Haussler DH; format in * Jackson. We convert it to Kent W.J.,Haussler D.H., format for visiGene. */ fprintf(ra, "contributor "); list = charSepToSlNames(row[0], ';'); for (el = list; el != NULL; el = el->next) { char *lastName = skipLeadingSpaces(el->name); char *initials = strrchr(lastName, ' '); if (initials == NULL) initials = ""; else *initials++ = 0; fprintf(ra, "%s", lastName); if (initials[0] != 0) { char c; fprintf(ra, " "); while ((c = *initials++) != 0) fprintf(ra, "%c.", c); } fprintf(ra, ","); } fprintf(ra, "\n"); slNameFreeList(&list); sqlFreeResult(&sr); /* Add in link to PubMed record on publication. */ dyStringClear(query); sqlDyStringPrintf(query, "select ACC_Accession.accID from ACC_Accession,ACC_LogicalDB " "where ACC_Accession._Object_key = %s " "and ACC_Accession._LogicalDB_key = ACC_LogicalDB._LogicalDB_key " "and ACC_LogicalDB.name = 'PubMed'", ref); pubMed = sqlQuickStringVerbose(conn, query->string); if (pubMed != NULL) fprintf(ra, "pubUrl https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=%s\n", pubMed); freez(&pubMed); dyStringClear(query); sqlDyStringPrintf(query, "select distinct MRK_Marker.symbol as gene," "GXD_Specimen.sex as sex," "GXD_Specimen.age as age," "GXD_Specimen.ageMin as ageMin," "GXD_Specimen.ageMax as ageMax," "IMG_ImagePane.paneLabel as paneLabel," "ACC_Accession.numericPart as fileKey," "IMG_Image._Image_key as imageKey," "GXD_Assay._ProbePrep_key as probePrepKey," "GXD_Assay._AntibodyPrep_key as antibodyPrepKey," "GXD_Assay._ReporterGene_key as reporterGeneKey," "GXD_FixationMethod.fixation as fixation," "GXD_EmbeddingMethod.embeddingMethod as embedding," "GXD_Assay._Assay_key as assayKey," "GXD_Specimen.hybridization as sliceType," "GXD_Specimen._Genotype_key as genotypeKey," "IMG_ImagePane._ImagePane_key as imagePaneKey\n" "from MRK_Marker," "GXD_Assay," "GXD_Specimen," "GXD_InSituResult," "GXD_InSituResultImage," "GXD_FixationMethod," "GXD_EmbeddingMethod," "IMG_ImagePane," "IMG_Image," "ACC_Accession\n" "where MRK_Marker._Marker_key = GXD_Assay._Marker_key " "and GXD_Assay._Assay_key = GXD_Specimen._Assay_key " "and GXD_Specimen._Specimen_key = GXD_InSituResult._Specimen_key " "and GXD_InSituResult._Result_key = GXD_InSituResultImage._Result_key " "and GXD_InSituResultImage._ImagePane_key = IMG_ImagePane._ImagePane_key " "and GXD_FixationMethod._Fixation_key = GXD_Specimen._Fixation_key " "and GXD_EmbeddingMethod._Embedding_key = GXD_Specimen._Embedding_key " "and IMG_ImagePane._Image_key = IMG_Image._Image_key " "and IMG_Image._Image_key = ACC_Accession._Object_key " "and ACC_Accession.prefixPart = 'PIX:' " "and GXD_Assay._ImagePane_key is NULL " ); sqlDyStringPrintf(query, "and GXD_Assay._Refs_key = '%s'", ref); sr = sqlGetResultVerbose(conn, query->string); fprintf(tab, "#"); fprintf(tab, "gene\t"); fprintf(tab, "probeColor\t"); fprintf(tab, "sex\t"); fprintf(tab, "age\t"); fprintf(tab, "ageMin\t"); fprintf(tab, "ageMax\t"); fprintf(tab, "paneLabel\t"); fprintf(tab, "fileName\t"); fprintf(tab, "submitId\t"); fprintf(tab, "fPrimer\t"); fprintf(tab, "rPrimer\t"); fprintf(tab, "abName\t"); fprintf(tab, "abTaxon\t"); fprintf(tab, "abSubmitId\t"); fprintf(tab, "fixation\t"); fprintf(tab, "embedding\t"); fprintf(tab, "bodyPart\t"); fprintf(tab, "sliceType\t"); fprintf(tab, "genotype\t"); fprintf(tab, "strain\t"); fprintf(tab, "priority\t"); fprintf(tab, "captionId\t"); fprintf(tab, "imageWidth\t"); fprintf(tab, "imageHeight\n"); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *sex = row[1]; char *age = row[2]; char *ageMin = row[3]; char *ageMax = row[4]; char *paneLabel = row[5]; char *fileKey = row[6]; char *imageKey = row[7]; char *probePrepKey = row[8]; char *antibodyPrepKey = row[9]; char *reporterGeneKey = row[10]; char *fixation = row[11]; char *embedding = row[12]; char *assayKey = row[13]; char *sliceType = row[14]; char *genotypeKey = row[15]; char *imagePaneKey = row[16]; double calcAge = -1; char *probeColor = ""; char *bodyPart = ""; char *abName = NULL; char *rPrimer = NULL, *fPrimer = NULL; char *genotype = NULL; char *strain = NULL; char *priority = NULL; char abTaxon[32]; char *captionId = ""; char *abSubmitId = NULL; verbose(3, " "); dumpRow(row, 16); if (age == NULL) continue; if (!lookedForCopyright) { struct sqlResult *sr = NULL; char **row; lookedForCopyright = TRUE; dyStringClear(query); sqlDyStringPrintf(query, "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType " "where MGI_Note._Object_key = %s " "and ACC_MGIType.name = 'Image' " "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key " "and MGI_NoteType.noteType='Copyright' " "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key " "and MGI_Note._Note_key = MGI_NoteChunk._Note_key " "order by sequenceNum" , imageKey); sr = sqlGetResultVerbose(conn2, query->string); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(copyright, row[0]); sqlFreeResult(&sr); verbose(2,"imageKey=%s\n",imageKey); if (copyright->stringSize != 0) { fprintf(ra, "copyright %s\n", copyright->string); } } /* Massage sex */ { if (sameString(sex, "Male")) sex = "male"; else if (sameString(sex, "Female")) sex = "female"; else sex = ""; } /* Massage age */ { char *embryoPat = "embryonic day "; char *newbornPat = "postnatal newborn"; char *dayPat = "postnatal day "; char *weekPat = "postnatal week "; char *adultPat = "postnatal adult"; double calcMinAge = atof(ageMin); double calcMaxAge = atof(ageMax); double mouseBirthAge = 21.0; //double mouseAdultAge = 63.0; /* Relative to conception, not birth */ if (age[0] == 0) { warn("age null, ageMin %s, ageMax %s\n", ageMin, ageMax); calcAge = (calcMinAge + calcMaxAge) * 0.5; } else if (startsWith(embryoPat, age)) calcAge = atof(age+strlen(embryoPat)); else if (sameString(newbornPat, age)) calcAge = mouseBirthAge; else if (startsWith(dayPat, age)) calcAge = atof(age+strlen(dayPat)) + mouseBirthAge; else if (startsWith(weekPat, age)) calcAge = 7.0 * atof(age+strlen(weekPat)) + mouseBirthAge; else if (sameString(adultPat, age) && calcMaxAge - calcMinAge > 1000 && calcMinAge < 365) calcAge = 365; /* Most adult mice are relatively young */ else { warn("Calculating age from %s", age); calcAge = (calcMinAge + calcMaxAge) * 0.5; } if (calcAge < calcMinAge) calcAge = calcMinAge; if (calcAge > calcMaxAge) calcAge = calcMaxAge; } /* Massage probeColor */ { if (!isStrNull(reporterGeneKey)) { /* Fixme: make sure that reporterGene's end up in probeType table. */ char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select term from VOC_Term where _Term_key = %s", reporterGeneKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find _ReporterGene_key %s in VOC_Term", reporterGeneKey); else if (sameString(name, "GFP")) probeColor = "green"; else if (sameString(name, "lacZ")) probeColor = "blue"; else warn("Don't know color of reporter gene %s", name); freez(&name); } if (!isStrNull(probePrepKey)) { char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select GXD_VisualizationMethod.visualization " "from GXD_VisualizationMethod,GXD_ProbePrep " "where GXD_ProbePrep._ProbePrep_key = %s " "and GXD_ProbePrep._Visualization_key = GXD_VisualizationMethod._Visualization_key" , probePrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find visualization from _ProbePrep_key %s", probePrepKey); probeColor = colorFromLabel(name, gene); freez(&name); if (probeColor[0] == 0) { dyStringClear(query); sqlDyStringPrintf(query, "select GXD_Label.label from GXD_Label,GXD_ProbePrep " "where GXD_ProbePrep._ProbePrep_key = %s " "and GXD_ProbePrep._Label_key = GXD_Label._Label_key" , probePrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find label from _ProbePrep_key %s", probePrepKey); probeColor = colorFromLabel(name, gene); } freez(&name); } if (!isStrNull(antibodyPrepKey) && probeColor[0] == 0 ) { char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select GXD_Label.label from GXD_Label,GXD_AntibodyPrep " "where GXD_AntibodyPrep._AntibodyPrep_key = %s " "and GXD_AntibodyPrep._Label_key = GXD_Label._Label_key" , antibodyPrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find label from _AntibodyPrep_key %s", antibodyPrepKey); probeColor = colorFromLabel(name, gene); freez(&name); } } /* Get abName, abTaxon, abSubmitId */ abTaxon[0] = 0; if (!isStrNull(antibodyPrepKey)) { struct sqlResult *sr = NULL; int orgKey = 0; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select antibodyName,_Organism_key,GXD_Antibody._Antibody_key " "from GXD_AntibodyPrep,GXD_Antibody " "where GXD_AntibodyPrep._AntibodyPrep_key = %s " "and GXD_AntibodyPrep._Antibody_key = GXD_Antibody._Antibody_key" , antibodyPrepKey); sr = sqlGetResultVerbose(conn2, query->string); row = sqlNextRow(sr); if (row != NULL) { abName = cloneString(row[0]); orgKey = atoi(row[1]); abSubmitId = cloneString(row[2]); } sqlFreeResult(&sr); if (orgKey > 0) { char *latinName = NULL, *commonName = NULL; int spTaxon = 0; dyStringClear(query); sqlDyStringPrintf(query, "select latinName from MGI_Organism " "where _Organism_key = %d", orgKey); latinName = sqlQuickStringVerbose(conn2, query->string); if (latinName != NULL && !sameString(latinName, "Not Specified") && !sameString(latinName, "Not Applicable")) { char *e = strchr(latinName, '/'); if (e != NULL) *e = 0; /* Chop off / and after. */ spTaxon = spBinomialToTaxon(connSp, latinName); } else { dyStringClear(query); sqlDyStringPrintf(query, "select commonName from MGI_Organism " "where _Organism_key = %d", orgKey); commonName = sqlQuickStringVerbose(conn2, query->string); if (commonName != NULL && !sameString(commonName, "Not Applicable") && !sameString(commonName, "Not Specified")) { spTaxon = spCommonToTaxon(connSp, commonName); } } if (spTaxon != 0) safef(abTaxon, sizeof(abTaxon), "%d", spTaxon); freez(&latinName); freez(&commonName); } } if (abName == NULL) abName = cloneString(""); if (abSubmitId == NULL) abSubmitId = cloneString(""); /* Get rPrimer, lPrimer */ if (!isStrNull(probePrepKey)) { struct sqlResult *sr = NULL; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select primer1sequence,primer2sequence " "from PRB_Probe,GXD_ProbePrep " "where PRB_Probe._Probe_key = GXD_ProbePrep._Probe_key " "and GXD_ProbePrep._ProbePrep_key = %s" , probePrepKey); sr = sqlGetResultVerbose(conn2, query->string); row = sqlNextRow(sr); if (row != NULL) { fPrimer = cloneString(row[0]); rPrimer = cloneString(row[1]); } sqlFreeResult(&sr); } /* Note Jackson database actually stores the primers very * erratically. In all the cases I can find for in situs * the primers are actually stored in free text in the PRB_Notes * e.g. ... primers CGCGGATCCAGGGGAAACAGAAGGGCTGCG and CCCAAGCTTAGACTGTACAGGCTGAGCC ... */ if (fPrimer == NULL || fPrimer[0]==0) { struct sqlResult *sr = NULL; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select PRB_Notes.note from GXD_ProbePrep, PRB_Notes" " where GXD_ProbePrep._ProbePrep_key = %s" " and GXD_ProbePrep._Probe_key = PRB_Notes._Probe_key" " order by PRB_Notes.sequenceNum" , probePrepKey); sr = sqlGetResultVerbose(conn2, query->string); dyStringClear(probeNotes); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(probeNotes, row[0]); sqlFreeResult(&sr); if (probeNotes->stringSize > 0) { char f[256]; char r[256]; int i = 0; char *s = strstr(probeNotes->string," primers "); if (s) { s += strlen(" primers "); i = 0; while (strchr("ACGT",*s) && (i<sizeof(f))) f[i++] = *s++; f[i]=0; if (strstr(s," and ")==s) { s += strlen(" and "); i = 0; while (strchr("ACGT",*s) && (i<sizeof(r))) r[i++] = *s++; r[i]=0; if (strlen(f) >= 10 && strlen(r) >= 10) { fPrimer = cloneString(f); rPrimer = cloneString(r); } else { verbose(1, "bad primer parse:_ProbePrep_key=%s fPrimer=[%s], rPrimer=[%s]\n", probePrepKey,f,r); } } } } } if (fPrimer == NULL) fPrimer = cloneString(""); if (rPrimer == NULL) rPrimer = cloneString(""); fixation = blankOutUnknown(fixation); embedding = blankOutUnknown(embedding); /* Massage body part and slice type. We only handle whole mounts. */ if (sameString(sliceType, "whole mount")) { bodyPart = "whole"; priority = "100"; } else { sliceType = ""; priority = "1000"; } genotypeAndStrainFromKey(genotypeKey, conn2, &genotype, &strain); if (isStrNull(paneLabel)) paneLabel = cloneString(""); /* trying to suppress nulls in output */ stripChar(paneLabel, '"'); /* Get rid of a difficult quote to process. */ /* Fetch image dimensions from file. */ imageWidth=0; imageHeight=0; safef(path, sizeof(path), "%s/%s.jpg", inJax, fileKey); if (fileExists(path)) jpegSize(path,&imageWidth,&imageHeight); /* will errAbort if no valid .jpeg exists */ else warn("Picture Missing! %s ",path); /* Deal caption if any. Most of the work only happens the * first time see the image. */ if (!hashLookup(uniqImageHash, imageKey)) { struct sqlResult *sr = NULL; char **row; hashAdd(uniqImageHash, imageKey, NULL); dyStringClear(caption); dyStringClear(query); sqlDyStringPrintf(query, "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType " "where MGI_Note._Object_key = %s " "and ACC_MGIType.name = 'Image' " "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key " "and MGI_NoteType.noteType='Caption' " "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key " "and MGI_Note._Note_key = MGI_NoteChunk._Note_key " "order by sequenceNum" , imageKey); sr = sqlGetResultVerbose(conn2, query->string); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(caption, row[0]); sqlFreeResult(&sr); if (caption->stringSize > 0) { subChar(caption->string, '\t', ' '); subChar(caption->string, '\n', ' '); fprintf(cap, "%s\t%s\n", imageKey, caption->string); hashAdd(captionHash, imageKey, imageKey); } } if (hashLookup(captionHash, imageKey)) captionId = imageKey; else captionId = ""; fprintf(tab, "%s\t", gene); fprintf(tab, "%s\t", probeColor); fprintf(tab, "%s\t", sex); fprintf(tab, "%3.2f\t", calcAge); fprintf(tab, "%s\t", ageMin); fprintf(tab, "%s\t", ageMax); fprintf(tab, "%s\t", paneLabel); /* may have to change NULL to empty string or "0" ? */ fprintf(tab, "%s.jpg\t", fileKey); fprintf(tab, "%s\t", imageKey); fprintf(tab, "%s\t", fPrimer); fprintf(tab, "%s\t", rPrimer); fprintf(tab, "%s\t", abName); fprintf(tab, "%s\t", abTaxon); fprintf(tab, "%s\t", abSubmitId); fprintf(tab, "%s\t", fixation); fprintf(tab, "%s\t", embedding); fprintf(tab, "%s\t", bodyPart); fprintf(tab, "%s\t", sliceType); fprintf(tab, "%s\t", genotype); fprintf(tab, "%s\t", strain); fprintf(tab, "%s\t", priority); fprintf(tab, "%s\t", captionId); fprintf(tab, "%d\t", imageWidth); fprintf(tab, "%d\n", imageHeight); printExpression(tab, conn2, imagePaneKey, assayKey); gotAny = TRUE; freez(&genotype); freez(&abName); freez(&abSubmitId); freez(&rPrimer); freez(&fPrimer); } sqlFreeResult(&sr); carefulClose(&ra); carefulClose(&tab); carefulClose(&cap); if (!gotAny) { remove(raName); remove(capName); remove(tabName); } dyStringFree(&probeNotes); dyStringFree(©right); dyStringFree(&caption); dyStringFree(&query); hashFree(&uniqImageHash); hashFree(&captionHash); }
void txGeneAlias(char *genomeDb, char *uniProtDb, char *xrefFile, char *evFile, char *oldToNew, char *aliasFile, char *protAliasFile) /* txGeneAlias - Make kgAlias and kgProtAlias tables.. */ { /* Read and hash oldToNew */ struct hash *newToOldHash = loadNewToOldHash(oldToNew); /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); struct sqlResult *sr; char **row; char query[256]; /* Open files. */ struct lineFile *lf = lineFileOpen(xrefFile, TRUE); FILE *fAlias = mustOpen(aliasFile, "w"); FILE *fProt = mustOpen(protAliasFile, "w"); /* Stream through xref file, which has much of the info we need, * and which contains a line for each gene. */ char *words[KGXREF_NUM_COLS]; while (lineFileRowTab(lf, words)) { /* Load the xref, and output most of it's fields as aliases. */ struct kgXref *x = kgXrefLoad(words); char *id = x->kgID; outAlias(fAlias, id, x->kgID); outAlias(fAlias, id, x->mRNA); outAlias(fAlias, id, x->spID); outAlias(fAlias, id, x->spDisplayID); outAlias(fAlias, id, x->geneSymbol); outAlias(fAlias, id, x->refseq); outAlias(fAlias, id, x->protAcc); char *old = hashFindVal(newToOldHash, id); if (old != NULL) outAlias(fAlias, id, old); /* If we've got a uniProt ID, use that to get more info from uniProt. */ char *acc = x->spID; if (acc[0] != 0) { /* Get current accession and output a bunch of easy protein aliases. */ acc = spLookupPrimaryAcc(uConn, acc); outProt(fProt, id, acc, acc); outProt(fProt, id, acc, x->spDisplayID); outProt(fProt, id, acc, x->geneSymbol); outProt(fProt, id, acc, x->protAcc); if (old != NULL) outProt(fProt, id, acc, old); /* Throw in old swissProt accessions. */ sqlSafef(query, sizeof(query), "select val from otherAcc where acc = '%s'", acc); sr = sqlGetResult(uConn, query); while ((row = sqlNextRow(sr)) != NULL) { outAlias(fAlias, id, row[0]); outProt(fProt, id, acc, row[0]); } /* Throw in gene names that SwissProt knows about */ struct slName *gene, *geneList = spGenes(uConn, acc); for (gene = geneList; gene != NULL; gene = gene->next) { outAlias(fAlias, id, gene->name); outProt(fProt, id, acc, gene->name); } slFreeList(&geneList); } /* Throw in gene names from genbank. */ /* At some point we may want to restrict this to the primary transcript in a cluster. */ ev = hashFindVal(evHash, id); if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { sqlSafef(query, sizeof(query), "select geneName from gbCdnaInfo where acc='%s'", acc); int nameId = sqlQuickNum(gConn, query); if (nameId != 0) { char name[64]; sqlSafef(query, sizeof(query), "select name from geneName where id=%d", nameId); if (sqlQuickQuery(gConn, query, name, sizeof(name))) outAlias(fAlias, id, name); } } } kgXrefFree(&x); } carefulClose(&fAlias); carefulClose(&fProt); }