static void checkTagIsInside(struct htmlPage *page, char *outsiders, char *insiders, struct htmlTag *startTag, struct htmlTag *endTag) /* Check that insiders are all bracketed by outsiders. */ { char *outDupe = cloneString(outsiders); char *inDupe = cloneString(insiders); char *line, *word; int depth = 0; struct htmlTag *tag; struct hash *outOpen = newHash(8); struct hash *outClose = newHash(8); struct hash *inHash = newHash(8); char buf[256]; /* Create hashes of all insiders */ line = inDupe; while ((word = nextWord(&line)) != NULL) { touppers(word); hashAdd(inHash, word, NULL); } /* Create hash of open and close outsiders. */ line = outDupe; while ((word = nextWord(&line)) != NULL) { touppers(word); hashAdd(outOpen, word, NULL); safef(buf, sizeof(buf), "/%s", word); hashAdd(outClose, buf, NULL); } /* Stream through tags making sure that insiders are * at least one deep inside of outsiders. */ for (tag = startTag; tag != NULL; tag = tag->next) { char *type = tag->name; if (hashLookup(outOpen, type )) ++depth; else if (hashLookup(outClose, type)) --depth; else if (hashLookup(inHash, type)) { if (depth <= 0) { if (!startsWith("<INPUT TYPE=HIDDEN NAME=", tag->start)) // one exception hardwired tagAbort(page, tag, "%s outside of any of %s", type, outsiders); } } } freeHash(&inHash); freeHash(&outOpen); freeHash(&outClose); freeMem(outDupe); freeMem(inDupe); }
void makeKnownGeneHashes(int knownDbCount, char **knownDbs) /* Create hashes containing info on known genes. */ { int i; knownTextHash = hashNew(18); uniProtToKnown = hashNew(18); refSeqToKnown = hashNew(18); aliasToKnown = hashNew(19); nameToKnown = hashNew(18); for (i=0; i<knownDbCount; i += 1) { char *gdb = knownDbs[i]; struct sqlConnection *conn = sqlConnect(gdb); struct sqlResult *sr; char **row; sr = sqlGetResult(conn, "NOSQLINJ select kgID,geneSymbol,spID,spDisplayID,refseq,description from kgXref"); while ((row = sqlNextRow(sr)) != NULL) { char *kgID = cloneString(row[0]); touppers(kgID); touppers(row[1]); hashAdd(nameToKnown, row[1], kgID); hashAdd(uniProtToKnown, row[2], kgID); hashAdd(uniProtToKnown, row[3], kgID); hashAdd(refSeqToKnown, row[4], kgID); hashAdd(knownTextHash, kgID, cloneString(row[5])); } sqlFreeResult(&sr); sr = sqlGetResult(conn, "NOSQLINJ select kgID,alias from kgAlias"); while ((row = sqlNextRow(sr)) != NULL) { char *upc = cloneString(row[0]); touppers(upc); hashAdd(aliasToKnown, row[1], upc); } sqlFreeResult(&sr); sr = sqlGetResult(conn, "NOSQLINJ select kgID,alias from kgProtAlias"); while ((row = sqlNextRow(sr)) != NULL) { char *upc = cloneString(row[0]); touppers(upc); hashAdd(aliasToKnown, row[1], upc); } sqlFreeResult(&sr); } }
void readNameOmim(char *fileName, struct nameOmim **retList, struct hash **retNameOmimHash, struct hash **retOmimNameHash) /* Read in file into list and hashes. Make hash keyed on transcriptId (txOmimHash) * and hash keyed on omimId (omimNameHash). */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *nameOmimHash = newHash(0); struct hash *omimNameHash = newHash(0); struct nameOmim *list = NULL, *el; char *row[2]; while (lineFileRow(lf, row)) { AllocVar(el); slAddHead(&list, el); touppers(row[0]); hashAddSaveName(nameOmimHash, row[0], el, &el->name); hashAddSaveName(omimNameHash, row[1], el, &el->omimId); } lineFileClose(&lf); slReverse(&list); *retList = list; *retNameOmimHash = nameOmimHash; *retOmimNameHash = omimNameHash; }
void printGeneText(struct gene *gene, FILE *f) /* Print extended text associated with gene. */ { struct hash *uniqHash = hashNew(8); boolean gotSomething = FALSE; gotSomething |= printGeneFromHashOrAlias(gene->refSeq, refSeqToKnown, aliasToKnown, uniqHash, f); gotSomething |= printGeneFromHashOrAlias(gene->uniProt, uniProtToKnown, aliasToKnown, uniqHash, f); gotSomething |= printGeneFromHashOrAlias(gene->genbank, aliasToKnown, aliasToKnown, uniqHash, f); if (gene->name[0] != 0) { char *upcName = cloneString(gene->name); struct hashEl *hel; touppers(upcName); for (hel = hashLookup(nameToKnown, upcName); hel != NULL; hel = hashLookupNext(hel)) { char *kgID = hel->val; gotSomething = TRUE; uniqPrintGene(kgID, uniqHash, f); } if (!gotSomething) { printGeneFromHashOrAlias(upcName, aliasToKnown, aliasToKnown, uniqHash, f); } freeMem(upcName); } hashFree(&uniqHash); }
void tabPepPred(char *database, int fileCount, char *fileNames[], char *table) /* Load a tab separated peptide file. */ { struct hash *uniq = newHash(16); struct lineFile *lf = lineFileOpen(fileNames[0], TRUE); char *words[2]; if (fileCount != 1) errAbort("Only one file allowed for tab separated peptides"); makeCustomTable(database, table, createString); printf("Processing %s\n", fileNames[0]); while (lineFileRow(lf, words)) { char *upperCase; if (hashLookupUpperCase(uniq, words[0]) != NULL) errAbort("Duplicate (case insensitive) '%s' line %d of %s", words[0], lf->lineIx, lf->fileName); upperCase = cloneString(words[0]); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); } lineFileClose(&lf); printf("Loading %s\n", fileNames[0]); loadTableFromTabFile(database, table, fileNames[0]); freeHash(&uniq); }
struct annoRow *aggvIntergenicRow(struct annoGratorGpVar *self, struct variant *variant, boolean *retRJFilterFailed, struct lm *callerLm) /* If intergenic variants (no overlapping or nearby genes) are to be included in output, * make an output row with empty genePred and a gpFx that is empty except for soNumber. */ { struct annoGrator *gSelf = &(self->grator); struct annoStreamer *sSelf = &(gSelf->streamer); char **wordsOut; lmAllocArray(self->lm, wordsOut, sSelf->numCols); // Add empty strings for genePred string columns: int gpColCount = gSelf->mySource->numCols; int i; for (i = 0; i < gpColCount; i++) wordsOut[i] = ""; struct gpFx *intergenicGpFx; lmAllocVar(self->lm, intergenicGpFx); intergenicGpFx->allele = firstAltAllele(variant->alleles); if (isAllNt(intergenicGpFx->allele, strlen(intergenicGpFx->allele))) touppers(intergenicGpFx->allele); intergenicGpFx->soNumber = intergenic_variant; intergenicGpFx->detailType = none; aggvStringifyGpFx(&wordsOut[gpColCount], intergenicGpFx, self->lm); boolean rjFail = (retRJFilterFailed && *retRJFilterFailed); return annoRowFromStringArray(variant->chrom, variant->chromStart, variant->chromEnd, rjFail, wordsOut, sSelf->numCols, callerLm); }
static void addPrimaryIdsToHash(struct sqlConnection *conn, struct hash *hash, char *idField, struct slName *tableList, struct lm *lm, char *extraWhere) /* For each table in tableList, query all idField values and add to hash, * id -> uppercased id for case-insensitive matching. */ { struct slName *table; struct sqlResult *sr; char **row; struct dyString *query = dyStringNew(0); for (table = tableList; table != NULL; table = table->next) { dyStringClear(query); sqlDyStringPrintf(query, "select %s from %s", idField, table->name); if (extraWhere != NULL) dyStringPrintf(query, " where %s", extraWhere); sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { if (isNotEmpty(row[0])) { char *origCase = lmCloneString(lm, row[0]); touppers(row[0]); hashAdd(hash, row[0], origCase); } } sqlFreeResult(&sr); } }
static void checkTerm(char *term, char *target, enum dbDbMatchType type, struct dbDb *dbDb, struct hash *matchHash, struct dbDbMatch **pMatchList) /* If target starts with term (case-insensitive), and target is not already in matchHash, * add target to matchHash and add a new match to pMatchList. */ { // Make uppercase version of target for case-insensitive matching. int targetLen = strlen(target); char targetUpcase[targetLen + 1]; safencpy(targetUpcase, sizeof(targetUpcase), target, targetLen); touppers(targetUpcase); int offset = wordMatchOffset(term, targetUpcase); if (offset >= 0) { addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList); } else if (offset < 0 && type == ddmtSciName && term[0] == targetUpcase[0]) { // For scientific names ("Genus species"), see if the user entered the term as 'G. species' // e.g. term 'P. trog' for target 'Pan troglodytes' regmatch_t substrArr[3]; if (regexMatchSubstrNoCase(term, "^[a-z](\\.| ) *([a-z]+)", substrArr, ArraySize(substrArr))) { char *termSpecies = term + substrArr[2].rm_so; char *targetSpecies = skipLeadingSpaces(skipToSpaces(targetUpcase)); if (targetSpecies && startsWithNoCase(termSpecies, targetSpecies)) { // Keep the negative offset since we can't just bold one chunk of target... addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList); } } } }
void readGbZfin (struct lineFile *gzf) /* Read in Genbank IDs and ZFIN IDs */ { char *words[24], *acc = NULL, addAcc[20], *name = NULL, *newAcc = NULL; struct zfin *gz = NULL; while (lineFileChopTab(gzf, words) ) { /* copy name and change to upper case */ name = cloneString(words[1]); touppers(name); if ((gz = hashFindVal(zfinMarkerHash, name) ) != NULL) { acc = gz->acc; /* add new accession */ if (acc != NULL) { safef(addAcc, sizeof(addAcc), ",%s", words[2]); newAcc = addSuffix(acc, addAcc); gz->acc = cloneString(newAcc); } else gz->acc = cloneString(words[2]); /* add structure back to hash */ addHashElUnique(zfinMarkerHash, name, gz); } else fprintf(stderr, "The marker, %s, with ZFIN ID, %s, is not found in the mapping panels \n", words[1], words[0]); } }
void *addExtensionAndSearch(char *name, struct hash *hash, boolean alias) { char *addName = NULL, *newName = NULL, *nameLower = NULL; void *result = NULL; boolean found = FALSE; int i; addName = cloneString(name); if (alias) { nameLower = cloneString(name); touppers(addName); } for (i = 0; (i < NUMEXT) && (!found); i++) { newName = NULL; newName = addSuffix(addName, extensions[i]); /* for alias, check the lower case name */ if (alias && ((result = hashFindVal(hash, newName)) != NULL) ) found = TRUE; /* change name to upper case and check in hash */ touppers(newName); if (!found && (result = hashFindVal(hash, newName)) != NULL) found = TRUE; else if (!found) { /* remove the suffix after the last '.' and compare */ chopSuffix(addName); if (alias) chopSuffix(nameLower); if (!sameString(name, addName) && (result = hashFindVal(hash, addName)) != NULL) found = TRUE; else if (alias) /* check also lower case name */ { if (!sameString(name, nameLower) && (result = hashFindVal(hash, nameLower)) != NULL) found = TRUE; } } } if (found) return result; else return NULL; }
struct dnaSeq *getSkinnySeq(char *sequenceFile, char *chromName) /* mark deletions with '-' */ { char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; struct dnaSeq *seq; char *seqPtr = NULL; int pos = 0; int start = 0; int end = 0; int chromSize = 0; int snpCount = 0; char *snpChrom = NULL; char *rsId = NULL; verbose(1, "sequence file = %s\n", sequenceFile); verbose(1, "chrom = %s\n", chromName); chromSize = hChromSize(chromName); verbose(1, "chromSize = %d\n", chromSize); seq = hFetchSeq(sequenceFile, chromName, 0, chromSize); // seq = hLoadChrom(chromName); touppers(seq->dna); seqPtr = seq->dna; sqlSafef(query, sizeof(query), "select chrom, chromStart, chromEnd, name from %s", snpTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { snpChrom = cloneString(row[0]); start = sqlUnsigned(row[1]); end = sqlUnsigned(row[2]); rsId = cloneString(row[3]); if (!sameString(snpChrom, chromName)) continue; assert (end < chromSize); assert (end > start); snpCount++; for (pos = start; pos < end; pos++) seqPtr[pos] = '-'; } sqlFreeResult(&sr); hFreeConn(&conn); if (snpCount == 0) verbose(1, "no matching SNPs\n"); return seq; }
struct hashEl *hashLookupUpperCase(struct hash *hash, char *name) /* Lookup upper cased name in hash. (Assumes all elements of hash * are themselves already in upper case.) */ { char s[256]; safef(s, sizeof(s), "%s", name); touppers(s); return hashLookup(hash, s); }
char *splatAliBasesOnly(char *aligned) /* Strip out ^ and - chars, and upper case everything. FreeMem result when done. */ { char *bases = cloneString(aligned); stripChar(bases, '-'); stripChar(bases, '^'); touppers(bases); return bases; }
static char *stripSpacesEtc(char *s) /* Return a copy of s with spaces, periods, and dashes removed */ { char *d = cloneString(s); stripChar(d, ' '); stripChar(d, '.'); stripChar(d, '-'); touppers(d); return d; }
struct hash *tableToAliasHash(struct sqlConnection *conn, char *table, char *query) /* Create hash of true name keyed by alias */ { struct sqlResult *sr; char **row; struct hash *hash = hashNew(19); char buf[256]; sqlSafef(buf, sizeof(buf), query, table); sr = sqlGetResult(conn, buf); while ((row = sqlNextRow(sr)) != NULL) { touppers(row[0]); touppers(row[1]); hashAdd(hash, row[0], lmCloneString(hash->lm, row[1])); } sqlFreeResult(&sr); return hash; }
double oligoTm(char *dna, double DNA_nM, double K_mM) /* Calculate melting point of short DNA sequence given DNA concentration in * nanomoles, and salt concentration in millimoles. This is calculated using eqn * (ii) in Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21, page * 6410, with tables of nearest-neighbor thermodynamics for DNA bases as * provided in Breslauer, Frank, Bloecker, and Markey, * Proc. Natl. Acad. Sci. USA, vol 83, page 3748. */ { register int dh = 0, ds = 108; register char c; char *dupe = cloneString(dna); char *s = dupe; double delta_H, delta_S; touppers(s); /* Use a finite-state machine (DFA) to calucluate dh and ds for s. */ c = *s; s++; if (c == 'A') goto A_STATE; else if (c == 'G') goto G_STATE; else if (c == 'T') goto T_STATE; else if (c == 'C') goto C_STATE; else if (c == 'N') goto N_STATE; else goto ERROR; STATE(A); STATE(T); STATE(G); STATE(C); STATE(N); DONE: /* dh and ds are now computed for the given sequence. */ delta_H = dh * -100.0; /* * Nearest-neighbor thermodynamic values for dh * are given in 100 cal/mol of interaction. */ delta_S = ds * -0.1; /* * Nearest-neighbor thermodynamic values for ds * are in in .1 cal/K per mol of interaction. */ /* * See Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21, * page 6410, eqn (ii). */ freeMem(dupe); return delta_H / (delta_S + 1.987 * log(DNA_nM/4000000000.0)) - 273.15 + 16.6 * log10(K_mM/1000.0); ERROR: /* * length of s was less than 2 or there was an illegal character in * s. */ freeMem(dupe); errAbort("Not a valid oligo in oligoTm."); return 0; }
boolean mahoneyNameAgrees(char *mName, char *name) /* Return TRUE if mahoney name agrees with name. */ { if (mName == NULL || mName[0] == 0 || name == NULL || name[0] == 0) return FALSE; else { char *mNameDupe = cloneString(mName); char *nameDupe = cloneString(name); char *s, *e; boolean match = FALSE; touppers(mNameDupe); touppers(nameDupe); stripChar(mNameDupe, ' '); stripChar(mNameDupe, '-'); stripChar(mNameDupe, '.'); stripChar(nameDupe, ' '); stripChar(nameDupe, '-'); stripChar(mNameDupe, '.'); verbose(2, "mahoneyNameAgrees %s (%s) %s (%s)", mName, mNameDupe, name, nameDupe); s = mNameDupe; while (s != NULL && s[0] != 0) { e = strchr(s, '/'); if (e != NULL) *e++ = 0; if (sameString(s, nameDupe)) { match = TRUE; break; } s = e; } verbose(2, " matches %d\n", match); freeMem(mNameDupe); freeMem(nameDupe); return match; } }
void motifLogoAndMatrix(struct dnaSeq **seqs, int count, struct dnaMotif *motif) /* Print out motif sequence logo and text (possibly with multiple occurences) */ { // Detect inconsistent motif/pwm tables and suppress confusing display if (motif != NULL) { if (seqs != NULL && motif->columnCount != seqs[0]->size) { warn("Motif seq length doesn't match PWM\n"); return; } } #define MOTIF_HELP_PAGE "../goldenPath/help/hgRegMotifHelp.html" printf("<PRE>\n"); printf("<table>\n"); if (motif != NULL) { struct tempName pngTn; dnaMotifMakeProbabalistic(motif); makeTempName(&pngTn, "logo", ".png"); dnaMotifToLogoPng(motif, 47, 140, NULL, "../trash", pngTn.forCgi); printf("<tr><td></td><td colspan='%d'align=right><a href=\"%s\" target=_blank>Motif display help</a></td></tr>", motif->columnCount, MOTIF_HELP_PAGE); printf("<tr><td></td><td colspan='%d'>", motif->columnCount); printf("<IMG SRC=\"%s\" BORDER=1>", pngTn.forHtml); printf("</td><td></td></tr>\n"); } if (count > 0) { int i; for (i = 0; i < count; i++) { struct dnaSeq *seq = seqs[i]; printf("<tr><td></td>"); touppers(seq->dna); printDnaCells(seq->dna, seq->size); if (count == 1) printf("<td>this occurrence</td></tr>\n"); else // is there a library routine to get 1st, 2nd ...? printf("<td>occurrence #%d</td></tr>\n", i + 1); } } if (motif != NULL) { printf("<tr><td></td>"); printConsensus(motif); printf("<td>motif consensus</td></tr>\n"); dnaMotifPrintProbTable(motif, stdout); } printf("</table>\n"); printf("</PRE>"); }
static char *getSearchTermUpperCase() /* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response. * If we do, convert it to upper case for case-insensitive matching and return it. */ { pushAbortHandler(htmlVaBadRequestAbort); char *term = cgiOptionalString(SEARCH_TERM); touppers(term); if (isEmpty(term)) errAbort("Missing required CGI parameter %s", SEARCH_TERM); popAbortHandler(); return term; }
int main(int argc, char *argv[]) { char *database; char *outFn; struct dnaSeq *seq; struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; if (argc != 4) usage(); database = argv[1]; conn2= hAllocConn(database); outFn = argv[2]; outf = mustOpen(outFn, "w"); tgtChrom = argv[3]; sqlSafef(query2, sizeof query2, "select secStr, name, chrom, chromStart, chromEnd, strand from evofold where chrom='%s'", tgtChrom); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { secStr = row2[0]; id = row2[1]; chrom = row2[2]; chromStart = atoi(row2[3]); chromEnd = atoi(row2[4]); strand = *row2[5]; seq = hChromSeq(database, chrom, chromStart, chromEnd); touppers(seq->dna); if (strand == '-') reverseComplement(seq->dna, seq->size); memSwapChar(seq->dna, seq->size, 'T', 'U'); safef(javaCmd, sizeof(javaCmd), "java -cp VARNAv3-7.jar fr.orsay.lri.varna.applications.VARNAcmd -sequenceDBN %s -structureDBN '%s' -o evoFold/%s/%s.png", seq->dna, secStr, chrom, id); fprintf(outf, "%s\n", javaCmd); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); fclose(outf); hFreeConn(&conn2); return(0); }
void oneGenieFile(char *fileName, struct hash *uniq, FILE *f) /* Process one genie peptide prediction file into known and alt tab files. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; boolean firstTime = TRUE; char *trans; boolean skip = FALSE; /* Do cursory sanity check. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", fileName); if (line[0] != '>') errAbort("%s is badly formatted, doesn't begin with '>'", fileName); lineFileReuse(lf); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { /* End last line. */ if (firstTime) firstTime = FALSE; else fputc('\n', f); trans = firstWordInLine(line+1); if (abbr != NULL && startsWith(abbr, trans)) trans += strlen(abbr); if (hashLookupUpperCase(uniq, trans) != NULL) { warn("Duplicate (case insensitive) '%s' line %d of %s. Ignoring all but first.", trans, lf->lineIx, lf->fileName); skip = TRUE; } else { char *upperCase; upperCase = cloneString(trans); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); fprintf(f, "%s\t", trans); skip = FALSE; } } else if (!skip) { mustWrite(f, line, lineSize-1); } } fputc('\n', f); lineFileClose(&lf); }
void htmlPageValidateOrAbort(struct htmlPage *page) /* Do some basic validations. Aborts if there is a problem. */ { struct htmlTag *tag; boolean gotTitle = FALSE; char *contentType = NULL; if (page == NULL) errAbort("Can't validate NULL page"); if (page->header != NULL) contentType = hashFindVal(page->header, "Content-Type:"); if (contentType == NULL || startsWith("text/html", contentType)) { /* To simplify things upper case all tag names. */ for (tag = page->tags; tag != NULL; tag = tag->next) touppers(tag->name); checkExactlyOne(page->tags, "BODY"); /* Validate header, and make a suggestion or two */ if ((tag = page->tags) == NULL) errAbort("No tags"); if (!sameWord(tag->name, "HTML")) errAbort("Doesn't start with <HTML> tag"); tag = tag->next; if (tag == NULL || !sameWord(tag->name, "HEAD")) warn("<HEAD> tag does not follow <HTML> tag"); else { for (;;) { tag = tag->next; if (tag == NULL) errAbort("Missing </HEAD>"); if (sameWord(tag->name, "TITLE")) gotTitle = TRUE; if (sameWord(tag->name, "/HEAD")) break; } if (!gotTitle) warn("No title in <HEAD>"); validateNestingTags(page, page->tags, tag, headNesters, ArraySize(headNesters)); tag = tag->next; } if (tag == NULL || !sameWord(tag->name, "BODY")) errAbort("<BODY> tag does not follow <HTML> tag"); tag = validateBody(page, tag->next); if (tag == NULL || !sameWord(tag->name, "/HTML")) errAbort("Missing </HTML>"); validateCgiUrls(page); } }
void saveEntities(struct dlList *entList, char *dir, char *prefix, char *chrom) /* Write out list of entities to a file. */ { char fileName[512]; FILE *f; struct dlNode *node; struct entity *ent; static int entCount = 0; struct intron *intron; int igStart, igEnd, igCount; char *source = "genieCon"; char upcChrom[16]; strcpy(upcChrom, chrom); touppers(upcChrom); sprintf(fileName, "%s/%s%s.gff", dir, prefix, upcChrom); f = mustOpen(fileName, "w"); for (node = entList->head; node->next != NULL; node = node->next) { ent = node->val; ++entCount; fprintf(f, "%s\t%s\tcdnaCluster\t%d\t%d\t%d\t%c\t.\tgc%d\n", chrom, source, ent->start+1, ent->end, slCount(ent->cdaRefList), ent->strand, entCount); for (intron = ent->intronList; intron != NULL; intron = intron->next) { char *startType, *endType; if (ent->strand == '+') { startType = "splice5"; endType = "splice3"; } else { startType = "splice3"; endType = "splice5"; } fprintf(f, "%s\t%s\t%s\t%d\t%d\t.\t%c\t.\tgc%d\n", chrom, source, startType, intron->start, intron->start+1, ent->strand, entCount); fprintf(f, "%s\t%s\tintron\t%d\t%d\t.\t%c\t.\tgc%d\n", chrom, source, intron->start+1, intron->end, ent->strand, entCount); fprintf(f, "%s\t%s\t%s\t%d\t%d\t.\t%c\t.\tgc%d\n", chrom, source, endType, intron->end, intron->end+1, ent->strand, entCount); } if (findIgRegion(ent, &igStart, &igEnd, &igCount)) { fprintf(f, "%s\t%s\tIG\t%d\t%d\t%d\t%c\t.\tafter_gc%d\n", chrom, source, igStart+1, igEnd, igCount, ent->strand, entCount); } } fclose(f); }
bool rnaPair(char a, char b) /* Returns TRUE if a and b can pair, and false otherwise */ { char pair[] = {a,b,'\0'}; int i; dna2rna(pair); touppers(pair); for (i=0;RNA_PAIRS[i] != 0; i++) if (pair[0] == RNA_PAIRS[i][0] && pair[1] == RNA_PAIRS[i][1] ) return TRUE; return FALSE; }
void genericOne(char *fileName, struct hash *uniq, FILE *f) /* Process one ensemble peptide prediction file into tab delimited * output f, using uniq hash to make sure no dupes. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; boolean firstTime = TRUE; char *trans, transBuf[128]; /* Do cursory sanity check. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", fileName); if (line[0] != '>') errAbort("%s is badly formatted, doesn't begin with '>'", fileName); lineFileReuse(lf); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { char *upperCase; /* End last line. */ if (firstTime) firstTime = FALSE; else fputc('\n', f); trans = firstWordInLine(line+1); if (abbr != NULL && startsWith(abbr, trans)) trans += strlen(abbr); if (suffix != NULL) { safef(transBuf, sizeof(transBuf), "%s%s", trans, suffix); trans = transBuf; } if (hashLookupUpperCase(uniq, trans) != NULL) errAbort("Duplicate (case insensitive) '%s' line %d of %s", trans, lf->lineIx, lf->fileName); upperCase = cloneString(trans); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); fprintf(f, "%s\t", trans); } else { mustWrite(f, line, lineSize-1); } } fputc('\n', f); lineFileClose(&lf); }
struct gpFx *gpFxNew(char *allele, char *transcript, enum soTerm soNumber, enum detailType detailType, struct lm *lm) /* Fill in the common members of gpFx; leave soTerm-specific members for caller to fill in. */ { struct gpFx *effect; lmAllocVar(lm, effect); effect->allele = collapseDashes(lmCloneString(lm, allele)); if (isAllNt(effect->allele, strlen(effect->allele))) touppers(effect->allele); effect->transcript = lmCloneString(lm, transcript); effect->soNumber = soNumber; effect->detailType = detailType; return effect; }
struct hash *hashMahoneys(struct mahoney *list) /* Put list of mahoneys into hash keyed by mahoney id. */ { struct hash *hash = hashNew(0); struct mahoney *el; for (el = list; el != NULL; el = el->next) { char hex[8]; touppers(el->genbank); safef(hex, sizeof(hex), "%x", el->mtf); hashAdd(hash, hex, el); } return hash; }
void doFetch(char *inputFileName, char *sequenceFileName, char *outputFileName) /* lookup sequence for each line */ { struct lineFile *lf = NULL; char *line; char *row[6]; int elementCount; struct twoBitFile *tbf; char *fileChrom = NULL; int start = 0; int end = 0; char *name = NULL; int score = 0; char *strand = NULL; struct dnaSeq *chunk = NULL; FILE *outputFileHandle = mustOpen(outputFileName, "w"); tbf = twoBitOpen(sequenceFileName); lf = lineFileOpen(inputFileName, TRUE); while (lineFileNext(lf, &line, NULL)) { elementCount = chopString(line, "\t", row, ArraySize(row)); if (elementCount != 6) continue; fileChrom = cloneString(row[0]); start = sqlUnsigned(row[1]); end = sqlUnsigned(row[2]); name = cloneString(row[3]); score = sqlUnsigned(row[4]); strand = cloneString(row[5]); if (start == end) continue; assert (end > start); chunk = twoBitReadSeqFrag(tbf, fileChrom, start, end); touppers(chunk->dna); if (sameString(strand, "-")) reverseComplement(chunk->dna, chunk->size); fprintf(outputFileHandle, "%s\t%d\t%d\t%s\t%d\t%s\t%s\n", fileChrom, start, end, name, score, strand, chunk->dna); dnaSeqFree(&chunk); } lineFileClose(&lf); carefulClose(&outputFileHandle); }
void oneEnsFile(char *ensFile, struct hash *uniq, struct hash *pToT, FILE *f) /* Process one ensemble peptide prediction file into tab delimited * output f, using uniq hash to make sure no dupes. */ { struct lineFile *lf = lineFileOpen(ensFile, TRUE); char *line; int lineSize; boolean firstTime = TRUE; char *translation; /* Do cursory sanity check. */ if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", ensFile); if (line[0] != '>') errAbort("%s is badly formatted, doesn't begin with '>'", ensFile); lineFileReuse(lf); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { char *upperCase; char *transcript; /* End last line. */ if (firstTime) firstTime = FALSE; else fputc('\n', f); translation = findEnsTrans(lf, line); if (hashLookupUpperCase(uniq, translation) != NULL) errAbort("Duplicate (case insensitive) '%s' line %d of %s", translation, lf->lineIx, lf->fileName); upperCase = cloneString(translation); touppers(upperCase); hashAdd(uniq, upperCase, NULL); freeMem(upperCase); transcript = hashFindVal(pToT, translation); if (transcript == NULL) errAbort("Can't find transcript for %s", translation); fprintf(f, "%s\t", transcript); } else { mustWrite(f, line, lineSize-1); } } fputc('\n', f); lineFileClose(&lf); }
static struct hash *upcHashWordsInFile(char *fileName, int hashSize) /* Create a hash of space delimited uppercased words in file. */ { struct hash *hash = newHash(hashSize); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; while (lineFileNext(lf, &line, NULL)) { while ((word = nextQuotedWord(&line)) != NULL) { touppers(word); hashAdd(hash, word, NULL); } } lineFileClose(&lf); return hash; }