struct blackListRange *genbankBlackListParse(char *blackList) /* parse a black list file into blackListRange data structure */ { struct lineFile *lf = lineFileMayOpen(blackList, TRUE); if (lf == NULL) errAbort("Could not open black list file %s. ", blackList); struct blackListRange *ranges = NULL; char *words[2]; while(lineFileRow(lf, words)) { char *prefix1 = cloneString(words[0]); genbankDropVer(prefix1, prefix1); char *number1 = skipToNumeric(prefix1); int begin = atoi(number1); *number1 = 0; // null so now prefix1 points to only the prefix char *prefix2 = cloneString(words[1]); genbankDropVer(prefix2, prefix2); char *number2 = skipToNumeric(prefix2); int end = atoi(number2); *number2 = 0; // null so now prefix2 points to only the prefix if (differentString(prefix1, prefix2)) errAbort("blackList file %s has accesions with different prefixes on line %d\n", lf->fileName, lf->lineIx); if (begin > end) errAbort("blackList file %s has end before begin on line %d\n", lf->fileName, lf->lineIx); struct blackListRange *range; AllocVar(range); range->prefix = prefix1; range->begin = begin; range->end = end; slAddHead(&ranges, range); } return ranges; }
static char *getCcdsRefSeqSummary(struct sqlConnection *conn, struct ccdsInfo *rsCcds) /* get the refseq summary for a CCDS */ { struct ccdsInfo *ci; char accBuf[GENBANK_ACC_BUFSZ]; char *summary = NULL; for (ci = rsCcds; ci != NULL; ci = ci->next) { summary = getRefSeqSummary(conn, genbankDropVer(accBuf, ci->mrnaAcc)); if (summary != NULL) return summary; } return NULL; }
static char *getCcdsGeneSymbol(struct sqlConnection *conn, struct ccdsInfo *rsCcds) /* get the gene name for a CCDS */ { struct ccdsInfo *ci; char accBuf[GENBANK_ACC_BUFSZ], query[256]; char *geneSym = NULL; for (ci = rsCcds; ci != NULL; ci = ci->next) { sqlSafef(query, sizeof(query), "select name from %s where mrnaAcc='%s'", refLinkTable, genbankDropVer(accBuf, ci->mrnaAcc)); geneSym = sqlQuickString(conn, query); if (geneSym != NULL) return geneSym; } return NULL; }
static char *findRefSeqSummary(struct sqlConnection *conn, struct geneSimilarities *refSeqs, char **sumAccv) /* Given similar refseq genes, find the first one with a RefSeq * summary and return that summary, or NULL if not found. Also returns * accv of matched */ { char buf[GENBANK_ACC_BUFSZ]; struct geneSim *rs; for (rs = refSeqs->genes; rs != NULL; rs = rs->next) { char *sum = getRefSeqSummary(conn, genbankDropVer(buf, rs->gene->name)); if (sum != NULL) { *sumAccv = cloneString(rs->gene->name); return sum; } } *sumAccv = NULL; return NULL; }