int main(int argc, char *argv[]) /* Process command line. */ /* argv[1] is database */ /* argv[2] is chrom */ /* argv[3] is nib file */ /* argv[4] is output file */ { database = argv[1]; if (!hDbIsActive(database)) { printf("Currently no support for %s\n", database); return -1; } hSetDb(database); chromName = argv[2]; // testFB(); snpMask(argv[3], argv[4]); return 0; }
char *checkParams(char *database, char *prefix, char *type) /* If we don't have valid CGI parameters, quit with a Bad Request HTTP response. */ { pushWarnHandler(htmlVaBadRequestAbort); pushAbortHandler(htmlVaBadRequestAbort); if(prefix == NULL || database == NULL) errAbort("%s", "Missing prefix and/or db CGI parameter"); if (! hDbIsActive(database)) errAbort("'%s' is not a valid, active database", htmlEncode(database)); if (isNotEmpty(type) && differentString(type, ALT_OR_PATCH)) errAbort("'%s' is not a valid type", type); char *table = NULL; if (! sameOk(type, ALT_OR_PATCH)) { struct sqlConnection *conn = hAllocConn(database); table = connGeneSuggestTable(conn); hFreeConn(&conn); if(table == NULL) errAbort("gene autosuggest is not supported for db '%s'", database); } popWarnHandler(); popAbortHandler(); return table; }
void verifyGreatAssemblies() { // First read in the assembly name and description information into name lists struct slName* supportedAssemblies = NULL; struct lineFile *lf = lineFileOpen(greatData, TRUE); int fieldCount = 1; char* row[fieldCount]; int wordCount; while ((wordCount = lineFileChopTab(lf, row)) != 0) { if (wordCount != fieldCount) errAbort("The %s file is not properly formatted.\n", greatData); slNameAddHead(&supportedAssemblies, row[0]); } lineFileClose(&lf); boolean invalidAssembly = TRUE; struct slName* currAssembly; for (currAssembly = supportedAssemblies; currAssembly != NULL; currAssembly = currAssembly->next) { if (!hDbIsActive(currAssembly->name)) { errAbort("Assembly %s in supported assembly file is not an active assembly.\n", currAssembly->name); } if (sameOk(database, currAssembly->name)) { invalidAssembly = FALSE; break; } } if (invalidAssembly) { slReverse(&supportedAssemblies); currAssembly = supportedAssemblies; struct dyString* dy = dyStringNew(0); addAssemblyToSupportedList(dy, currAssembly->name); currAssembly = currAssembly->next; while (currAssembly != NULL) { dyStringAppend(dy, ", "); if (currAssembly->next == NULL) dyStringAppend(dy, "and "); addAssemblyToSupportedList(dy, currAssembly->name); currAssembly = currAssembly->next; } hPrintf("<script type='text/javascript'>\n"); hPrintf("function logSpecies() {\n"); hPrintf("try {\n"); hPrintf("var r = new XMLHttpRequest();\n"); hPrintf("r.open('GET', 'http://great.stanford.edu/public/cgi-bin/logSpecies.php?species=%s');\n", database); hPrintf("r.send(null);\n"); hPrintf("} catch (err) { }\n"); hPrintf("}\n"); hPrintf("window.onload = logSpecies;\n"); hPrintf("</script>\n"); errAbort("GREAT only supports the %s assemblies." "\nPlease go back and ensure that one of those assemblies is chosen.", dyStringContents(dy)); htmlClose(); dyStringFree(&dy); } slNameFreeList(&supportedAssemblies); }
void snpValid() /* Test snpMap --> dbSnpRs/affy for one assembly. */ { char *Org; char *dbSnpTbl = NULL; struct dbSnpRs *dbSnps = NULL; struct dbSnpRs *dbSnp = NULL; struct affy10KDetails *affy10s = NULL; struct affy10KDetails *affy10 = NULL; struct affy120KDetails *affy120s = NULL; struct affy120KDetails *affy120 = NULL; struct axtScoreScheme *simpleDnaScheme = NULL; int match = 0; /* good match of minimal acceptable quality */ int mismatch = 0; /* unacceptable match quality */ int missing = 0; /* unable to find rsId in dbSnpRs/affy */ int goodrc = 0; /* matches after reverse-complement */ int assemblyDash = 0; /* assembly context is just a single dash - (complex cases) */ int gapNib = 0; /* nib returns n's, we are in the gap */ int totalMatch = 0; int totalMismatch = 0; int totalMissing = 0; int totalGoodrc = 0; int totalAssemblyDash = 0; int totalGapNib = 0; boolean affy = FALSE; int mode = 3; void *next = NULL; char *id = NULL; char *seq = NULL; char affy120id[12]; int matchScore = 100; int misMatchScore = 100; int gapOpenPenalty = 400; int gapExtendPenalty = 50; int noDna = 0; int snpMapRows = 0; /* controls whether affy120k, affy10k, or dbSnpRs is used currently affys are human only */ if (!hDbIsActive(db)) { printf("Currently no support for db %s\n", db); return; } hSetDb(db); Org = hOrganism(db); if (sameWord(Org,"Human")) affy = TRUE; if (sameWord(Org,"Human")) dbSnpTbl = "dbSnpRsHg"; else if (sameWord(Org,"Mouse")) dbSnpTbl = "dbSnpRsMm"; else if (sameWord(Org,"Rat")) dbSnpTbl = "dbSnpRsRn"; else { printf("Currently no support for Org %s\n", Org); return; } simpleDnaScheme = axtScoreSchemeSimpleDna(matchScore, misMatchScore, gapOpenPenalty, gapExtendPenalty); uglyf("dbSnp Table=%s \n",dbSnpTbl); uglyf("Affy=%s \n", affy ? "TRUE" : "FALSE" ); dbSnps = readDbSnps(dbSnpTbl); printf("read hgFixed.%s \n",dbSnpTbl); if (affy) { affy10s = readAffy10(); printf("read hgFixed.affy10KDetails \n"); affy120s = readAffy120(); printf("read hgFixed.affy120KDetails \n"); } int bogus = 0; // debug if (0) { printf("rsId assembly-sequence \n"); printf("---------------------------------------------- \n"); for (dbSnp = dbSnps; dbSnp != NULL; dbSnp = dbSnp->next) { printf("%s %s \n", dbSnp->rsId, dbSnp->assembly ); // debug: cut it short for testing only if (++bogus > 1) break; } printf("\n"); printf("\n"); } bogus=0; struct slName *cns = hAllChromNames(); struct slName *cn=NULL; if (!cns) { printf("testDb: hAllChromNames returned empty list \n"); return; } if (affy) { mode=1; /* start on affy120 with numbers in snpMap.rsId */ } else { mode=2; /* start on dbSnps with "rs*" in snpMap.rsId */ } for (cn = cns; cn != NULL; cn = cn->next) { struct dnaSeq *chromSeq = NULL; struct snpMap *snps = NULL; struct snpMap *snp = NULL; if (chr != NULL) if (!sameWord(chr,cn->name)) continue; //uglyf("testDb: beginning chrom %s \n",cn->name); chromSeq = hLoadChrom(cn->name); printf("testDb: chrom %s : size (%u) \n",cn->name,chromSeq->size); snps = readSnps(cn->name); printf("read %s.snpMap where chrom=%s \n",db,cn->name); dbSnp = dbSnps; affy10 = affy10s; affy120 = affy120s; printf("=========================================================\n"); for (snp = snps; snp != NULL; snp = snp->next) { int cmp = -1; char *nibDna=NULL; char *nibDnaRc=NULL; ++snpMapRows; /* printf("%s %s %u %u %s\n", snp->name, snp->chrom, snp->chromStart, snp->chromEnd, nibDna ); */ while (cmp < 0) { while (cmp < 0) { switch (mode) { case 1: next = affy120; break; case 2: next = dbSnp; break; case 3: next = affy10; break; } if (next == NULL) { switch (mode) { case 1: ++mode; break; case 2: ++mode; break; case 3: cmp = 1; break; } } else { break; } } if (cmp < 0) { switch (mode) { case 1: safef(affy120id, sizeof(affy120id), "%d", affy120->affyId); /* have int type but want string */ id = affy120id; break; case 2: id = dbSnp->rsId; break; case 3: id = affy10->affyId; break; } cmp=mystrcmp(id, snp->name); } if (cmp < 0) { switch (mode) { case 1: affy120 = affy120->next; break; case 2: dbSnp = dbSnp->next; break; case 3: affy10 = affy10->next; break; } } } if (cmp==0) { int strand=1; char *rc = NULL; int m = 0; int lf = 0; /* size of left flank context (lower case dna) */ int rf = 0; /* size of right flank context (lower case dna) */ int ls = 0; /* total size of assembly dna context plus actual region in dbSnpRs/affy */ char *origSeq = NULL; /* use to display the original dnSnpRs.assembly seq */ switch (mode) { case 1: seq = affy120->sequenceA; break; case 2: seq = dbSnp->assembly; break; case 3: seq = affy10->sequenceA; break; } if (sameString(seq,"-")) { ++assemblyDash; if (Verbose) printf("(no assembly context) rsId=%s chrom=%s %u %u \n assembly=%s \n\n", id, snp->chrom, snp->chromStart, snp->chromEnd, seq ); continue; } origSeq = seq; lf = leftFlank(origSeq); rf = rightFlank(origSeq); seq = cloneString(origSeq); stripDashes(seq); /* remove dashes indicating insert to simplify and correct processing of nib data */ ls = strlen(seq); /* used to be: lengthOneDash(seq); */ //debug //uglyf("about to call checkandFetchNib origSeq=%s lf=%d, rf=%d ls=%d \n", origSeq, lf, rf, ls); nibDna = checkAndFetchNib(chromSeq, snp, lf, ls); if (nibDna==NULL) { ++noDna; printf("no dna for %s %s %u %u \n", snp->name, snp->chrom, snp->chromStart, snp->chromEnd ); continue; } //debug //uglyf("got past checkandFetchNib call: \n nibDna=%s \n",nibDna); if (allNs(nibDna)) { ++gapNib; ++mismatch; if (Verbose) printf("(nib gap) rsId=%s chrom=%s %u %u \n assembly=%s \n snpMap=%s \n\n", id, snp->chrom, snp->chromStart, snp->chromEnd, seq, nibDna ); continue; } m = misses(seq,nibDna); if (m > 1) { //debug //uglyf("rc: about to call checkandFetchNib \n"); rc = checkAndFetchNib(chromSeq, snp, rf, ls); if (rc==NULL) { ++noDna; printf("no dna for %s %s %u %u \n", snp->name, snp->chrom, snp->chromStart, snp->chromEnd ); continue; } //debug //uglyf("rc: got past checkandFetchNib call: \n rc Dna=%s \n",rc); reverseComplement(rc,strlen(rc)); int n = misses(seq, rc); if (n < m) { strand=-1; m = n; } } if (m <= 1) { ++match; if (strand < 1) ++goodrc; } else { struct dnaSeq query, target; struct axt *axtAln = NULL; int bestScore = 0; ZeroVar(&query); query.dna = seq; query.size = strlen(query.dna); ZeroVar(&target); target.dna = nibDna; target.size = strlen(target.dna); axtAln = axtAffine(&query, &target, simpleDnaScheme); strand = 1; if (axtAln) { bestScore = axtAln->score / ls; } axtFree(&axtAln); if (bestScore < threshold) { ZeroVar(&target); target.dna = rc; target.size = strlen(target.dna); axtAln = axtAffine(&query, &target, simpleDnaScheme); if ((axtAln) && (bestScore < (axtAln->score / ls))) { strand = -1; bestScore = axtAln->score / ls; } axtFree(&axtAln); } if (bestScore >= threshold) { ++match; if (strand < 1) ++goodrc; } else { ++mismatch; } if ((bestScore < threshold) || Verbose) { printf( "score=%d misses=%u strand=%d rsId=%s chrom=%s %u %u lf=%d ls=%d \n" " assembly=%s \n" " snpMap=%s \n" "rc snpMap=%s \n" "\n", bestScore, m, strand, id, snp->chrom, snp->chromStart, snp->chromEnd, lf, ls, seq, nibDna, rc ); } } freez(&rc); freez(&seq); } else { char snpLkup[10] = ""; /* this id is missing from dbSnpRs/affy! */ ++missing; switch (mode) { case 1: safef(snpLkup,sizeof(snpLkup),"%s","affy120"); break; case 2: safef(snpLkup,sizeof(snpLkup),"%s",dbSnpTbl); break; case 3: safef(snpLkup,sizeof(snpLkup),"%s","affy10"); break; } if (Verbose) printf("snpMap.name=%s is missing from %s (now at %s) \n\n",snp->name,snpLkup,id); } freez(&nibDna); // debug: cut it short for testing only //break; } snpMapFreeList(&snps); dnaSeqFree(&chromSeq); printf("\n\n\n Total matches for chrom %s:\n ",cn->name); printf(" matches: %u \n ",match); printf(" mismatches: %u \n",mismatch); printf("missing from dbSnpRs: %u \n",missing); printf(" rev compl matches: %u \n",goodrc); printf(" assembly = -: %u \n",assemblyDash); printf(" nib in gap : %u \n",gapNib); printf("\n\n=========================================\n"); totalMatch += match; totalMismatch += mismatch; totalMissing += missing; totalGoodrc += goodrc; totalAssemblyDash += assemblyDash; totalGapNib += gapNib; match = 0; mismatch = 0; missing = 0; goodrc = 0; assemblyDash = 0; gapNib = 0; // debug: cut it to just one or two chrom for testing //if (++bogus > 1) // break; printf("\n"); printf("\n"); } slFreeList(&cns); dbSnpRsFreeList(&dbSnps); if (affy) { affy10KDetailsFreeList(&affy10s); affy120KDetailsFreeList(&affy120s); } axtScoreSchemeFree(&simpleDnaScheme); printf("\n\n\n Grand Totals: \n "); printf(" matches: %u \n ",totalMatch); printf(" mismatches: %u \n",totalMismatch); printf("missing from dbSnpRs: %u \n",totalMissing); printf(" rev compl matches: %u \n",totalGoodrc); printf(" assembly = -: %u \n",totalAssemblyDash); printf(" nib in gap : %u \n",totalGapNib); printf("\n Total rows in snpMap: %u \n ",snpMapRows); printf("\n # no dna found for : %u \n ",noDna); printf("\n\n=========================================\n"); }
void mafPrettyOut(FILE *f, struct mafAli *maf, int lineSize, boolean onlyDiff, int blockNo) { int ii, ch; int srcChars = 0; struct mafComp *mc; int lineStart, lineEnd; char *summaryLine = needMem(lineSize+1); char *referenceText; int startChars, sizeChars, srcSizeChars; boolean haveInserts = FALSE; struct mafComp *masterMc = maf->components; startChars = sizeChars = srcSizeChars = 0; for (mc = maf->components; mc != NULL; mc = mc->next) { /* Figure out length of source (species) field. */ /*if (mc->size != 0)*/ { char dbOnly[128]; int len; char *org; memset(dbOnly, 0, sizeof(dbOnly)); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chopPrefix(dbOnly); if ((org = hOrganism(dbOnly)) == NULL) len = strlen(dbOnly); else len = strlen(org); if (srcChars < len) srcChars = len; len = digitsBaseTen(mc->start); if (startChars < len) startChars = len; len = digitsBaseTen(mc->size); if (sizeChars < len) sizeChars = len; len = digitsBaseTen(mc->srcSize); if (srcSizeChars < len) srcSizeChars = len; if (mc->text && (mc->rightStatus == MAF_INSERT_STATUS) && (masterMc->start + masterMc->size < winEnd)) haveInserts = TRUE; #ifdef REVERSESTRAND /* complement bases if hgTracks is on reverse strand */ if (mc->size && cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) complement(mc->text, maf->textSize); #endif } } /* first sequence in the alignment */ referenceText = maf->components->text; for (lineStart = 0; lineStart < maf->textSize; lineStart = lineEnd) { int size; lineEnd = lineStart + lineSize; if (lineEnd >= maf->textSize) lineEnd = maf->textSize; size = lineEnd - lineStart; initSummaryLine(summaryLine, size, '*'); for (mc = maf->components; mc != NULL; mc = mc->next) { char dbOnly[128], *chrom; int s = mc->start; int e = s + mc->size; char *org; char *revComp = ""; char strand = mc->strand; struct dyString *dy = newDyString(512); #ifdef REVERSESTRAND if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) strand = (strand == '+') ? '-' : '+'; #endif if (strand == '-') revComp = "&hgSeq.revComp=on"; dyStringClear(dy); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chrom = chopPrefix(dbOnly); if ((org = hOrganism(dbOnly)) == NULL) org = dbOnly; if (mc->strand == '-') reverseIntRange(&s, &e, mc->srcSize); if (mc->text != NULL) { if (lineStart == 0) { if (hDbIsActive(dbOnly)) { dyStringPrintf(dy, "%s Browser %s:%d-%d %c %*dbps",hOrganism(dbOnly),chrom, s+1, e, mc->strand,sizeChars, mc->size); linkToOtherBrowserTitle(dbOnly, chrom, s, e, dy->string); dyStringClear(dy); fprintf(f, "B</A> "); } else fprintf(f, " "); if (hDbExists(dbOnly)) { dyStringPrintf(dy, "Get %s DNA %s:%d-%d %c %*dbps",hOrganism(dbOnly),chrom, s+1, e, mc->strand,sizeChars, mc->size); printf("<A TITLE=\"%s\" TARGET=\"_blank\" HREF=\"%s?o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&db=%s%s\">D</A> ", dy->string,hgcName(), s, cgiEncode(chrom), chrom, s, e, dbOnly, revComp); } else fprintf(f, " "); } else { fprintf(f, " "); } dyStringClear(dy); dyStringPrintf(dy, "%s:%d-%d %c %*dbps",chrom, s+1, e, mc->strand,sizeChars, mc->size); fprintf(f, "<A TITLE=\"%s\"> %*s </A> ", dy->string, srcChars, org); updateSummaryLine(summaryLine, referenceText + lineStart, mc->text + lineStart, size); blueCapWrite(f, mc->text + lineStart, size, (onlyDiff && mc != maf->components) ? referenceText + lineStart : NULL); fprintf(f, "\n"); } else { if (((mc->leftStatus == MAF_CONTIG_STATUS) && (mc->rightStatus == MAF_CONTIG_STATUS) ) || ((mc->leftStatus == MAF_TANDEM_STATUS) && (mc->rightStatus == MAF_TANDEM_STATUS) ) || ((mc->leftStatus == MAF_INSERT_STATUS) && (mc->rightStatus == MAF_INSERT_STATUS) ) || ((mc->leftStatus == MAF_MISSING_STATUS) && (mc->rightStatus == MAF_MISSING_STATUS) )) { if (lineStart == 0) { int s = mc->start; int e = s + mc->rightLen; struct dyString *dy = newDyString(512); if (mc->strand == '-') reverseIntRange(&s, &e, mc->srcSize); if ( hDbIsActive(dbOnly)) { dyStringPrintf(dy, "%s Browser %s:%d-%d %c %d bps Unaligned",hOrganism(dbOnly),chrom, s+1, e, mc->strand, e-s); linkToOtherBrowserTitle(dbOnly, chrom, s, e, dy->string); fprintf(f,"B</A> "); dyStringClear(dy); } else fprintf(f," "); if (hDbExists(dbOnly)) { dyStringPrintf(dy, "Get %s DNA %s:%d-%d %c %d bps Unaligned",hOrganism(dbOnly),chrom, s+1, e, mc->strand, e-s); printf("<A TITLE=\"%s\" TARGET=\"_blank\" HREF=\"%s?o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&db=%s%s\">D</A> ", dy->string, hgcName(), s, cgiEncode(chrom), chrom, s, e, dbOnly,revComp); } else fprintf(f, " "); } else fprintf(f, " "); initSummaryLine(summaryLine, size, ' '); dyStringClear(dy); dyStringPrintf(dy, "%s:%d-%d %c %*dbps",chrom, s+1, e, mc->strand,sizeChars, mc->size); fprintf(f, "<A TITLE=\"%s\">%*s</A> ", dy->string, srcChars, org); ch = '-'; switch(mc->rightStatus) { case MAF_INSERT_STATUS: ch = '='; break; case MAF_MISSING_STATUS: ch = 'N'; break; case MAF_TANDEM_STATUS: case MAF_CONTIG_STATUS: ch = '-'; break; } for(ii=lineStart; ii < lineEnd ; ii++) fputc(ch,f); fprintf(f,"\n"); } } } #ifdef ADDMATCHLINE if (lineStart == 0) fprintf(f, " %-*s %s\n", srcChars, "", summaryLine); else fprintf(f, "%-*s %s\n", srcChars, "", summaryLine); #else fprintf(f, "\n"); #endif } if (haveInserts) { fprintf(f, "<B>Inserts between block %d and %d in window</B>\n",blockNo, blockNo+1); for (mc = maf->components; mc != NULL; mc = mc->next) { char dbOnly[128], *chrom; int s = mc->start + mc->size; int e = s + mc->rightLen; char *org; if (mc->text == NULL) continue; if (mc->strand == '-') reverseIntRange(&s, &e, mc->srcSize); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chrom = chopPrefix(dbOnly); if ((org = hOrganism(dbOnly)) == NULL) org = dbOnly; if (mc->rightStatus == MAF_INSERT_STATUS) { char *revComp = ""; if (hDbIsActive(dbOnly)) { char strand = mc->strand; #ifdef REVERSESTRAND if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) strand = (strand == '+') ? '-' : '+'; #endif if (strand == '-') revComp = "&hgSeq.revComp=on"; linkToOtherBrowser(dbOnly, chrom, s, e); fprintf(f,"B"); fprintf(f, "</A>"); fprintf(f, " "); } else fprintf(f, " "); if (hDbExists(dbOnly)) { printf("<A TARGET=\"_blank\" HREF=\"%s?o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d" "&db=%s%s\">D</A> ", hgcName(), s, cgiEncode(chrom), chrom, s, e, dbOnly,revComp); } else fprintf(f, " "); fprintf(f, "%*s %dbp\n", srcChars, org,mc->rightLen); } } fprintf(f, "\n"); } freeMem(summaryLine); }