Beispiel #1
0
static bool checkForAccTypeChange(struct sqlConnection *conn, 
                                  struct gbSelect* select,
                                  struct gbStatus* status)
/* Check if a sequence that appears new has really had it's type has changed.
 * Returns true if type changed (or other error), false if nothing detected.
 */
{
char query[128];
struct sqlResult* sr;
char **row;
bool changed = FALSE;

sqlSafef(query, sizeof(query),
      "SELECT type FROM gbSeq WHERE acc = '%s'", status->acc);
sr = sqlGetResult(conn, query);
if ((sr != NULL) && ((row = sqlNextRow(sr)) != NULL))
    {
    unsigned type = gbParseType(row[0]);
    if (type != status->type)
        fprintf(stderr,
                "Error: %s %s type has changed from %s to %s; add to ignore file\n",
                status->acc, gbFormatDate(status->modDate),
                gbFmtSelect(type), gbFmtSelect(status->type));
    else
        fprintf(stderr,
                "Error: %s %s is in the seq table, but shouldn't be, don't know why\n",
                status->acc, gbFormatDate(status->modDate));
    changed = TRUE;
    gErrorCnt++;
    }
sqlFreeResult(&sr);
return changed;
}
int main(int argc, char* argv[])
{
char *relName, *updateName, *typeAccPrefix, *database, *sep;
struct gbIndex* index;
struct gbSelect select;
struct gbSelect* prevSelect = NULL;
struct gbAlignInfo alignInfo;
boolean noMigrate;
ZeroVar(&select);

optionInit(&argc, argv, optionSpecs);
if (argc != 5)
    usage();
maxFaSize = optionInt("fasize", -1);
workDir = optionVal("workdir", "work/align");
noMigrate = optionExists("noMigrate");
createPolyASizes = optionExists("polyASizes");
gbVerbInit(optionInt("verbose", 0));
relName = argv[1];
updateName = argv[2];
typeAccPrefix = argv[3];
database = argv[4];

/* parse typeAccPrefix */
sep = strchr(typeAccPrefix, '.');
if (sep != NULL)
    *sep = '\0';
select.type = gbParseType(typeAccPrefix);
if (sep != NULL)
    {
    select.accPrefix = sep+1;
    *sep = '.';
    }
select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno"));

index = gbIndexNew(database, NULL);
select.release = gbIndexMustFindRelease(index, relName);
select.update = gbReleaseMustFindUpdate(select.release, updateName);
gbVerbMsg(0, "gbAlignGet: %s/%s/%s/%s", select.release->name,
          select.release->genome->database, select.update->name,
          typeAccPrefix);

/* Get the release to migrate, if applicable */
if (!noMigrate)
    prevSelect = gbAlignGetMigrateRel(&select);

alignInfo = gbAlignGet(&select, prevSelect);

/* always print stats */
fprintf(stderr, "gbAlignGet: %s/%s/%s/%s: align=%d, migrate=%d\n",
        select.release->name, select.release->genome->database,
        select.update->name, typeAccPrefix,
        alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt);
gbIndexFree(&index);

/* print alignment and migrate count, which is read by the driver program */
printf("alignCnt: %d %d\n", alignInfo.align.accTotalCnt, alignInfo.migrate.accTotalCnt);
return 0;
}
Beispiel #3
0
int main(int argc, char* argv[])
{
char *relName, *updateName, *typeAccPrefix, *database, *sep;
struct gbIndex* index;
struct gbSelect select;
struct gbSelect* prevSelect = NULL;
boolean noMigrate;
ZeroVar(&select);

optionInit(&argc, argv, optionSpecs);
if (argc != 5)
    usage();
gWorkDir = optionVal("workdir", "work/align");
gSortTmp = optionVal("sortTmp", NULL);
noMigrate = optionExists("noMigrate");
gbVerbInit(optionInt("verbose", 0));
relName = argv[1];
updateName = argv[2];
typeAccPrefix = argv[3];
database = argv[4];

/* parse typeAccPrefix */
sep = strchr(typeAccPrefix, '.');
if (sep != NULL)
    *sep = '\0';
select.type = gbParseType(typeAccPrefix);
if (sep != NULL)
    {
    select.accPrefix = sep+1;
    *sep = '.';
    }

index = gbIndexNew(database, NULL);
select.release = gbIndexMustFindRelease(index, relName);
select.update = gbReleaseMustFindUpdate(select.release, updateName);
select.orgCats = gbParseOrgCat(optionVal("orgCats", "native,xeno"));

gbVerbMsg(0, "gbAlignInstall: %s/%s/%s/%s", select.release->name,
          select.release->genome->database, select.update->name,
          typeAccPrefix);

/* Get the release to migrate, if applicable */
if (!noMigrate)
    prevSelect = gbAlignGetMigrateRel(&select);

gbAlignInstall(&select, prevSelect);

/* must go to stderr to be logged */
gbVerbMsg(0, "gbAlignInstall: complete");
    
gbIndexFree(&index);
return 0;
}
Beispiel #4
0
int main(int argc, char *argv[])
/* Check parameters, set up, loop through each GenBank file. */
{
char *gbName;
int argi = 1;
struct hash *estAuthorHash = NULL;
char *pepFa;

optionInit(&argc, argv, optionSpecs);
if (argc < 4)
    usage();

gByAccPrefixSize = optionInt("byAccPrefix", 0);
gbIdxName = optionVal("gbidx", NULL);
pepFa = optionVal("pepFa", NULL);
gbType = gbParseType(optionVal("type", "mrna,est"));
gbOrg  = optionVal("org", NULL);
inclXMs = optionExists("inclXMs");

if (gByAccPrefixSize > 4)  /* keep small to avoid tons of open files */
    errAbort("max value of -byAccPrefix is 4");

gCurAccPrefix[0] = '\0';

faName = argv[argi++];
raName = argv[argi++];

estAuthorHash = newHash(23);
kvt = newKvt(5*1024);
gbfInit();

if (pepFa != NULL)
    gPepFa = gbFaOpen(pepFa,"w");

char *blackList = optionVal("blackList", NULL);
if (blackList != NULL)
    blackListRanges = genbankBlackListParse(blackList);

while (argi < argc)
    {
    gbName = argv[argi++];
    printf("Processing %s into %s and %s\n", gbName, faName, raName);
    procOneGbFile(gbName, estAuthorHash);
    }

gbFaClose(&faFile);
gbFaClose(&gPepFa);
carefulClose(&raFile);
carefulClose(&gbIdxFile);

return 0;
}
static void loadGbCdnaInfoRow(struct metaDataTbls* metaDataTbls,
                              struct sqlConnection* conn, char** row)
/* load one row from the gbCdnaInfo table */
{
struct metaData* md;
int len, numNonZero, iRow = 0;
char *acc, *dir;
boolean gotError, isOk;

/* columns: acc,id,moddate,version,moddate,type */
acc = row[iRow++];
md = metaDataTblsGet(metaDataTbls, acc);
if (md->inGbCdnaInfo)
    {
    gbError("%s: acc occurs multiple times in the mrna table", acc);
    return;
    }
md->inGbCdnaInfo = TRUE;
md->gbCdnaInfoId = strToUnsigned(row[iRow++], acc, "gbCdnaInfo.id", NULL);
len = strlen(acc);
md->gbCdnaInfoVersion = strToUnsigned(row[iRow++], "gbCdnaInfo.version", acc, &gotError);
if (!gotError && (md->gbCdnaInfoVersion <= 0))
     gbError("%s: gbCdnaInfo.version invalid: \"%d\"", acc, md->gbCdnaInfoVersion);
isOk = TRUE;
md->gbCdnaInfoModdate = gbParseChkDate(row[iRow++], &isOk);
if (!isOk)
    gbError("%s: invalid gbCdnaInfo.moddate value: \"%s\"", acc, row[iRow-1]);
md->gbCdnaInfoType = gbParseType(row[iRow++]);
md->typeFlags |= md->gbCdnaInfoType;

dir = row[iRow++];
if ((strlen(dir) > 1) || (strchr("053", *dir) == NULL))
    gbError("%s: invalid gbCdnaInfo.direction value: \"%s\"", acc, dir);

/* Make sure that at least a few of the id fields have data  */
numNonZero = 0;
while (iRow < 20)
    {
    int id = strToUnsigned(row[iRow++], md->acc, "gbCdnaInfo.?", NULL);
    if (id > 0)
        numNonZero++;
    /* remember if we have a description */
    if (iRow-1 == 16)
        md->haveDesc = (id != 0);
    }
if (numNonZero == 0)
    gbError("%s: none of gbCdnaInfo string ids have non-zero values", dir);
else if (numNonZero < 4)
    gbError("%s: only %d of gbCdnaInfo string ids have non-zero values",
            dir, numNonZero);
}
static void parseGbSeqRow(char **row, struct seqFields *seq)
/* parse a row from gbSeq.  No dynamic memory is allocated */
{
char *acc = row[1];
int iRow = 0;
seq->id = strToUnsigned(row[iRow++], acc, "gbSeq.id", NULL);
seq->acc = row[iRow++];
seq->version = strToUnsigned(row[iRow++], acc, "gbSeq.version", NULL);
seq->size = strToUnsigned(row[iRow++], acc, "gbSeq.size", NULL);
seq->gbExtFile = strToUnsigned(row[iRow++], acc, "gbSeq.gbExtFile", NULL);
seq->file_offset = strToOffset(row[iRow++], acc, "gbSeq.file_offset");
seq->file_size = strToOffset(row[iRow++], acc, "gbSeq.file_size");
if (sameWord(row[iRow], "PEP"))
    iRow++;  /* type for peptides not supported by gbParseType */
else
    seq->type = gbParseType(row[iRow++]);
seq->srcDb = gbParseSrcDb(row[iRow++]);
}
static void loadGbStatusRow(struct metaDataTbls* metaDataTbls,
                            struct sqlConnection* conn, char** row,
                            unsigned descOrgCats)
/* load a row of the gbStatus table */
{
struct metaData* md;
int iRow = 0;
boolean isOk;
HGID seqId;

/* columns: acc,version,modDate,type,srcDb,gbSeq,numAligns */

md = metaDataTblsGet(metaDataTbls, row[iRow++]);
if (md->inGbStatus)
    gbError("%s: occurs multiple times in the gbStatus table", md->acc);
md->inGbStatus = TRUE;
md->gbsVersion = strToUnsigned(row[iRow++], md->acc, "gbStatus.version", NULL);

isOk = TRUE;
md->gbsModDate = gbParseChkDate(row[iRow++], &isOk);
if (!isOk)
    gbError("%s: invalid gbStatus.moddate value: \"%s\"", md->acc, row[iRow-1]);

md->gbsType = gbParseType(row[iRow++]);
md->gbsSrcDb = gbParseSrcDb(row[iRow++]);
md->gbsOrgCat = gbParseOrgCat(row[iRow++]);
seqId = strToUnsigned(row[iRow++], md->acc, "gbStatus.gbSeq", NULL);
md->gbsNumAligns = strToUnsigned(row[iRow++], md->acc, "gbStatus.numAligns",
                                 NULL);

md->typeFlags |= md->gbsType;

if (md->inGbCdnaInfo)
    {
    if (seqId != md->gbCdnaInfoId)
        gbError("%s: gbStatus.gbSeq (%d) not same gbCdnaInfo.id (%d)", md->acc, seqId,
                md->gbCdnaInfoId);
    if (md->gbsType != md->gbCdnaInfoType)
        gbError("%s: gbStatus.type (%s) not same as gbCdnaInfo.type (%s)", md->acc,
                gbFmtSelect(md->gbsType), gbFmtSelect(md->gbCdnaInfoType));
    if (md->gbsSrcDb != (md->typeFlags & GB_SRC_DB_MASK))
        gbError("%s: gbStatus.srcDb (%s) not same gbCdnaInfo.srcDb (%s)", md->acc,
                gbFmtSelect(md->gbsSrcDb), gbFmtSelect(md->typeFlags));
    if (md->gbsVersion != md->gbCdnaInfoVersion)
        gbError("%s: gbStatus.version (%d) not same gbCdnaInfo.version (%d)", md->acc,
                md->gbsVersion, md->gbCdnaInfoVersion);
    if ((md->gbsModDate != md->gbCdnaInfoModdate))
        gbError("%s: gbStatus.modDate (%s) not same gbCdnaInfo.moddate (%s)", md->acc,
                gbFormatDate(md->gbsModDate), gbFormatDate(md->gbCdnaInfoModdate));
    /* verify either have or don't have a description */
    if (descOrgCats & md->gbsOrgCat)
        {
        if (!md->haveDesc)
            gbError("%s: should have gbCdnaInfo.description: %s", md->acc,
                    gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb));
        }
    else
        {
        if (md->haveDesc)
            gbError("%s: should not have gbCdnaInfo.description: %s", md->acc,
                    gbFmtSelect(md->gbsType|md->gbsOrgCat|md->gbsSrcDb));
        }
    }
}