static bool checkForAccTypeChange(struct sqlConnection *conn, struct gbSelect* select, struct gbStatus* status) /* Check if a sequence that appears new has really had it's type has changed. * Returns true if type changed (or other error), false if nothing detected. */ { char query[128]; struct sqlResult* sr; char **row; bool changed = FALSE; sqlSafef(query, sizeof(query), "SELECT type FROM gbSeq WHERE acc = '%s'", status->acc); sr = sqlGetResult(conn, query); if ((sr != NULL) && ((row = sqlNextRow(sr)) != NULL)) { unsigned type = gbParseType(row[0]); if (type != status->type) fprintf(stderr, "Error: %s %s type has changed from %s to %s; add to ignore file\n", status->acc, gbFormatDate(status->modDate), gbFmtSelect(type), gbFmtSelect(status->type)); else fprintf(stderr, "Error: %s %s is in the seq table, but shouldn't be, don't know why\n", status->acc, gbFormatDate(status->modDate)); changed = TRUE; gErrorCnt++; } sqlFreeResult(&sr); return changed; }
int main(int argc, char* argv[]) { char *relName, *updateName, *typeAccPrefix, *database, *sep; struct gbIndex* index; struct gbSelect select; struct gbSelect* prevSelect = NULL; struct gbAlignInfo alignInfo; boolean noMigrate; ZeroVar(&select); optionInit(&argc, argv, optionSpecs); if (argc != 5) usage(); maxFaSize = optionInt("fasize", -1); workDir = optionVal("workdir", "work/align"); noMigrate = optionExists("noMigrate"); createPolyASizes = optionExists("polyASizes"); gbVerbInit(optionInt("verbose", 0)); relName = argv[1]; updateName = argv[2]; typeAccPrefix = argv[3]; database = argv[4]; /* parse typeAccPrefix */ sep = strchr(typeAccPrefix, '.'); if (sep != NULL) *sep = '\0'; select.type = gbParseType(typeAccPrefix); if (sep != NULL) { select.accPrefix = sep+1; *sep = '.'; } select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno")); index = gbIndexNew(database, NULL); select.release = gbIndexMustFindRelease(index, relName); select.update = gbReleaseMustFindUpdate(select.release, updateName); gbVerbMsg(0, "gbAlignGet: %s/%s/%s/%s", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix); /* Get the release to migrate, if applicable */ if (!noMigrate) prevSelect = gbAlignGetMigrateRel(&select); alignInfo = gbAlignGet(&select, prevSelect); /* always print stats */ fprintf(stderr, "gbAlignGet: %s/%s/%s/%s: align=%d, migrate=%d\n", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix, alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt); gbIndexFree(&index); /* print alignment and migrate count, which is read by the driver program */ printf("alignCnt: %d %d\n", alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt); return 0; }
int main(int argc, char* argv[]) { char *relName, *updateName, *typeAccPrefix, *database, *sep; struct gbIndex* index; struct gbSelect select; struct gbSelect* prevSelect = NULL; boolean noMigrate; ZeroVar(&select); optionInit(&argc, argv, optionSpecs); if (argc != 5) usage(); gWorkDir = optionVal("workdir", "work/align"); gSortTmp = optionVal("sortTmp", NULL); noMigrate = optionExists("noMigrate"); gbVerbInit(optionInt("verbose", 0)); relName = argv[1]; updateName = argv[2]; typeAccPrefix = argv[3]; database = argv[4]; /* parse typeAccPrefix */ sep = strchr(typeAccPrefix, '.'); if (sep != NULL) *sep = '\0'; select.type = gbParseType(typeAccPrefix); if (sep != NULL) { select.accPrefix = sep+1; *sep = '.'; } index = gbIndexNew(database, NULL); select.release = gbIndexMustFindRelease(index, relName); select.update = gbReleaseMustFindUpdate(select.release, updateName); select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno")); gbVerbMsg(0, "gbAlignInstall: %s/%s/%s/%s", select.release->name, select.release->genome->database, select.update->name, typeAccPrefix); /* Get the release to migrate, if applicable */ if (!noMigrate) prevSelect = gbAlignGetMigrateRel(&select); gbAlignInstall(&select, prevSelect); /* must go to stderr to be logged */ gbVerbMsg(0, "gbAlignInstall: complete"); gbIndexFree(&index); return 0; }
int main(int argc, char *argv[]) /* Check parameters, set up, loop through each GenBank file. */ { char *gbName; int argi = 1; struct hash *estAuthorHash = NULL; char *pepFa; optionInit(&argc, argv, optionSpecs); if (argc < 4) usage(); gByAccPrefixSize = optionInt("byAccPrefix", 0); gbIdxName = optionVal("gbidx", NULL); pepFa = optionVal("pepFa", NULL); gbType = gbParseType(optionVal("type", "mrna,est")); gbOrg = optionVal("org", NULL); inclXMs = optionExists("inclXMs"); if (gByAccPrefixSize > 4) /* keep small to avoid tons of open files */ errAbort("max value of -byAccPrefix is 4"); gCurAccPrefix[0] = '\0'; faName = argv[argi++]; raName = argv[argi++]; estAuthorHash = newHash(23); kvt = newKvt(5*1024); gbfInit(); if (pepFa != NULL) gPepFa = gbFaOpen(pepFa,"w"); char *blackList = optionVal("blackList", NULL); if (blackList != NULL) blackListRanges = genbankBlackListParse(blackList); while (argi < argc) { gbName = argv[argi++]; printf("Processing %s into %s and %s\n", gbName, faName, raName); procOneGbFile(gbName, estAuthorHash); } gbFaClose(&faFile); gbFaClose(&gPepFa); carefulClose(&raFile); carefulClose(&gbIdxFile); return 0; }
static void loadGbCdnaInfoRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row) /* load one row from the gbCdnaInfo table */ { struct metaData* md; int len, numNonZero, iRow = 0; char *acc, *dir; boolean gotError, isOk; /* columns: acc,id,moddate,version,moddate,type */ acc = row[iRow++]; md = metaDataTblsGet(metaDataTbls, acc); if (md->inGbCdnaInfo) { gbError("%s: acc occurs multiple times in the mrna table", acc); return; } md->inGbCdnaInfo = TRUE; md->gbCdnaInfoId = strToUnsigned(row[iRow++], acc, "gbCdnaInfo.id", NULL); len = strlen(acc); md->gbCdnaInfoVersion = strToUnsigned(row[iRow++], "gbCdnaInfo.version", acc, &gotError); if (!gotError && (md->gbCdnaInfoVersion <= 0)) gbError("%s: gbCdnaInfo.version invalid: \"%d\"", acc, md->gbCdnaInfoVersion); isOk = TRUE; md->gbCdnaInfoModdate = gbParseChkDate(row[iRow++], &isOk); if (!isOk) gbError("%s: invalid gbCdnaInfo.moddate value: \"%s\"", acc, row[iRow-1]); md->gbCdnaInfoType = gbParseType(row[iRow++]); md->typeFlags |= md->gbCdnaInfoType; dir = row[iRow++]; if ((strlen(dir) > 1) || (strchr("053", *dir) == NULL)) gbError("%s: invalid gbCdnaInfo.direction value: \"%s\"", acc, dir); /* Make sure that at least a few of the id fields have data */ numNonZero = 0; while (iRow < 20) { int id = strToUnsigned(row[iRow++], md->acc, "gbCdnaInfo.?", NULL); if (id > 0) numNonZero++; /* remember if we have a description */ if (iRow-1 == 16) md->haveDesc = (id != 0); } if (numNonZero == 0) gbError("%s: none of gbCdnaInfo string ids have non-zero values", dir); else if (numNonZero < 4) gbError("%s: only %d of gbCdnaInfo string ids have non-zero values", dir, numNonZero); }
static void parseGbSeqRow(char **row, struct seqFields *seq) /* parse a row from gbSeq. No dynamic memory is allocated */ { char *acc = row[1]; int iRow = 0; seq->id = strToUnsigned(row[iRow++], acc, "gbSeq.id", NULL); seq->acc = row[iRow++]; seq->version = strToUnsigned(row[iRow++], acc, "gbSeq.version", NULL); seq->size = strToUnsigned(row[iRow++], acc, "gbSeq.size", NULL); seq->gbExtFile = strToUnsigned(row[iRow++], acc, "gbSeq.gbExtFile", NULL); seq->file_offset = strToOffset(row[iRow++], acc, "gbSeq.file_offset"); seq->file_size = strToOffset(row[iRow++], acc, "gbSeq.file_size"); if (sameWord(row[iRow], "PEP")) iRow++; /* type for peptides not supported by gbParseType */ else seq->type = gbParseType(row[iRow++]); seq->srcDb = gbParseSrcDb(row[iRow++]); }
static void loadGbStatusRow(struct metaDataTbls* metaDataTbls, struct sqlConnection* conn, char** row, unsigned descOrgCats) /* load a row of the gbStatus table */ { struct metaData* md; int iRow = 0; boolean isOk; HGID seqId; /* columns: acc,version,modDate,type,srcDb,gbSeq,numAligns */ md = metaDataTblsGet(metaDataTbls, row[iRow++]); if (md->inGbStatus) gbError("%s: occurs multiple times in the gbStatus table", md->acc); md->inGbStatus = TRUE; md->gbsVersion = strToUnsigned(row[iRow++], md->acc, "gbStatus.version", NULL); isOk = TRUE; md->gbsModDate = gbParseChkDate(row[iRow++], &isOk); if (!isOk) gbError("%s: invalid gbStatus.moddate value: \"%s\"", md->acc, row[iRow-1]); md->gbsType = gbParseType(row[iRow++]); md->gbsSrcDb = gbParseSrcDb(row[iRow++]); md->gbsOrgCat = gbParseOrgCat(row[iRow++]); seqId = strToUnsigned(row[iRow++], md->acc, "gbStatus.gbSeq", NULL); md->gbsNumAligns = strToUnsigned(row[iRow++], md->acc, "gbStatus.numAligns", NULL); md->typeFlags |= md->gbsType; if (md->inGbCdnaInfo) { if (seqId != md->gbCdnaInfoId) gbError("%s: gbStatus.gbSeq (%d) not same gbCdnaInfo.id (%d)", md->acc, seqId, md->gbCdnaInfoId); if (md->gbsType != md->gbCdnaInfoType) gbError("%s: gbStatus.type (%s) not same as gbCdnaInfo.type (%s)", md->acc, gbFmtSelect(md->gbsType), gbFmtSelect(md->gbCdnaInfoType)); if (md->gbsSrcDb != (md->typeFlags & GB_SRC_DB_MASK)) gbError("%s: gbStatus.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", md->acc, gbFmtSelect(md->gbsSrcDb), gbFmtSelect(md->typeFlags)); if (md->gbsVersion != md->gbCdnaInfoVersion) gbError("%s: gbStatus.version (%d) not same gbCdnaInfo.version (%d)", md->acc, md->gbsVersion, md->gbCdnaInfoVersion); if ((md->gbsModDate != md->gbCdnaInfoModdate)) gbError("%s: gbStatus.modDate (%s) not same gbCdnaInfo.moddate (%s)", md->acc, gbFormatDate(md->gbsModDate), gbFormatDate(md->gbCdnaInfoModdate)); /* verify either have or don't have a description */ if (descOrgCats & md->gbsOrgCat) { if (!md->haveDesc) gbError("%s: should have gbCdnaInfo.description: %s", md->acc, gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb)); } else { if (md->haveDesc) gbError("%s: should not have gbCdnaInfo.description: %s", md->acc, gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb)); } } }