Пример #1
0
static void checkTblOrder(struct sqlConnection *conn,
                          char *tbl, char *chromFld, char *startFld)
/* check sorting of a positional table */
{
verbose(2, "checking %s.%s\n", sqlGetDatabase(conn), tbl);
char prevChrom[256], *chrom;
prevChrom[0] = '\0';
int prevStart = 0, start;
off_t irow = 0;

char query[512];
sqlSafef(query, sizeof(query), "SELECT %s,%s FROM %s", chromFld, startFld, tbl);
struct sqlResult *sr = sqlGetResult(conn, query);
while (nextRow(sr, &chrom, &start))
    {
    if (!sameString(chrom, prevChrom))
        {
        safecpy(prevChrom, sizeof(prevChrom), chrom);
        prevStart = start;
        }
    else
        {
        if (start < prevStart)
            errAbort("table %s.%s not sorted starting at row %lld: %s:%d",
                     sqlGetDatabase(conn), tbl, (long long)irow, chrom, start);
        prevStart = start;
        }
    irow++;
    }
sqlFreeResult(&sr);
}
Пример #2
0
struct hash *getAncientRepeats(struct sqlConnection *tConn,
			       struct sqlConnection *qConn)
/* Get hash of ancient repeats.  This keyed by name.family.class. */
{
struct sqlConnection *conn = NULL;
struct sqlResult *sr;
char **row;
char key[512];
struct hash *hash = newHash(10);

if (sqlTableExists(tConn, "ancientRepeat"))
    conn = tConn;
else if (sqlTableExists(qConn, "ancientRepeat"))
    conn = qConn;
else
    errAbort("Can't find ancientRepeat table in %s or %s",
	     sqlGetDatabase(tConn), sqlGetDatabase(qConn));
sr = sqlGetResult(conn, "NOSQLINJ select name,family,class from ancientRepeat");
while ((row = sqlNextRow(sr)) != NULL)
    {
    sprintf(key, "%s.%s.%s", row[0], row[1], row[2]);
    hashAdd(hash, key, NULL);
    }
sqlFreeResult(&sr);
return hash;
}
Пример #3
0
void refPepRepair(char *db,
                  char *accFile,
                  boolean dryRun)
/* fix dangling repPep gbSeq entries. */
{
struct sqlConnection *conn = sqlConnect(db);
struct brokenRefPepTbl *brpTbl;
struct extFileTbl* extFileTbl;
struct slName *accs = (accFile == NULL) ? NULL : slNameLoadReal(accFile);
if (!checkForRefLink(conn))
    {
    sqlDisconnect(&conn);
    return;
    }

gbVerbMsg(1, "%s: repairing refseq protein gbExtFile entries%s",
          sqlGetDatabase(conn), (dryRun? " (dry run)" : ""));

extFileTbl = extFileTblLoad(conn);
brpTbl = brokenRefPepTblNew(conn, accs);
brokenRefPepGetSeqScan(conn, extFileTbl, brpTbl);
brokenRefPepGetMrnas(conn, brpTbl);

fillInFastaOffsets(brpTbl, conn, extFileTbl);
if (brpTbl->numToRepair > 0)
    makeRepairs(brpTbl, conn, extFileTbl, dryRun);
else
    gbVerbMsg(1, "%s: no refseq proteins to repair", sqlGetDatabase(conn));
brokenRefPepTblFree(&brpTbl);
extFileTblFree(&extFileTbl);
sqlDisconnect(&conn);
slFreeList(&accs);
}
static void showMrnaFromGenePred(struct sqlConnection *conn, 
	char *geneId, char *geneName)
/* Get mRNA sequence for gene from gene prediction. */
{
char *table = genomeSetting("knownGene");
struct sqlResult *sr;
char **row;
char query[256];
boolean hasBin = hIsBinned(sqlGetDatabase(conn), table);

hPrintf("<TT><PRE>");
safef(query, sizeof(query), 
    "select * from %s where name='%s'"
    " and chrom='%s' and txStart=%d and txEnd=%d", 
    table, geneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gene = genePredLoad(row+hasBin);
    struct bed *bed = bedFromGenePred(gene);
    struct dnaSeq *seq = hSeqForBed(sqlGetDatabase(conn), bed);
    hPrintf(">%s (%s predicted mRNA)\n", geneId, geneName);
    writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
    dnaSeqFree(&seq);
    bedFree(&bed);
    genePredFree(&gene);
    }
else
    errAbort("Couldn't find %s at %s:%d-%d", geneId, 
    	curGeneChrom, curGeneStart, curGeneEnd);
sqlFreeResult(&sr);
hPrintf("</TT></PRE>");
}
Пример #5
0
void refPepList(char *db,
                FILE* outFh)
/* list of sequences needing repair */
{
struct sqlConnection *conn = sqlConnect(db);
struct brokenRefPepTbl *brpTbl;
struct hashCookie cookie;
struct hashEl *hel;
struct extFileTbl* extFileTbl = NULL;

if (!checkForRefLink(conn))
    {
    sqlDisconnect(&conn);
    return;
    }

extFileTbl = extFileTblLoad(conn);
brpTbl = brokenRefPepTblNew(conn, NULL);
brokenRefPepGetSeqScan(conn, extFileTbl, brpTbl);
brokenRefPepGetMrnas(conn, brpTbl);
extFileTblFree(&extFileTbl);

cookie = hashFirst(brpTbl->protAccHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct brokenRefPep *brp = hel->val;
    fprintf(outFh, "%s\t%s\t%s\n", sqlGetDatabase(conn), brp->protAcc, (brp->mrnaAcc != NULL)? "repair" : "drop");
    }
gbVerbMsg(1, "%s: need to repair %d refseq protein gbExtFile entries",
          sqlGetDatabase(conn), brpTbl->numToRepair);
gbVerbMsg(1, "%s: need to drop %d refseq protein gbExtFile entries",
          sqlGetDatabase(conn), brpTbl->numToDrop);
}
void setupTable(struct sqlConnection *conn) 
/* Set up the autoTest table for testing. Call dropTable() later
 to remove table. */
{
struct lineFile *lf = lineFileOpen("output/newTest.sql", TRUE);
char *update = NULL;
char *line = NULL;
struct dyString *ds = newDyString(256);
if(sqlTableExists(conn, testTableName))
    errAbort("dbLinkTest.c::setupTable() - Table %s.%s already exists. Can't create another.",
             sqlGetDatabase(conn), testTableName);
while(lineFileNextReal(lf, &line)) 
    {
    char *tmp = strstr(line, "not null");
    if(tmp != NULL)
	{
	*tmp = ',';
	tmp++;
	*tmp = '\0';
	}
    subChar(line, '\t', ' ');
    dyStringPrintf(ds, "%s", line);
    }
update = replaceChars(ds->string, "PRIMARY KEY", "UNIQUE");
sqlUpdate(conn, update);
freez(&update);
dyStringFree(&ds);
lineFileClose(&lf);
}
static boolean mrnaDescriptionsExists(struct section *section, 
	struct sqlConnection *conn, char *geneId)
/* Return TRUE if mrna  on this one. */
{
struct psl *list = NULL;
if (hTableExists(sqlGetDatabase(conn), "all_mrna"))
    {
    struct sqlResult *sr;
    char **row;
    struct psl *psl;
    int rowOffset;
    char extra[64];
    safef(extra, sizeof(extra), "strand='%c'", curGenePred->strand[0]);
    sr = hRangeQuery(conn, "all_mrna", curGeneChrom, curGeneStart, curGeneEnd,
    	extra, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
         {
	 psl = pslLoad(row+rowOffset);
	 slAddHead(&list, psl);
	 }
    slReverse(&list);
    section->items = list;
    }
return slCount(list) > 0;
}
Пример #8
0
static void altSplicePrint(struct section *section,
	struct sqlConnection *conn, char *geneId)
/* Print out altSplicing info. */
{
char *altId = section->items;
char query[256];
struct sqlResult *sr;
char **row;
struct altGraphX *ag;
char table[64];
boolean hasBin;

hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, "altGraphX", table, &hasBin);
sqlSafef(query, sizeof(query), "select * from %s where name='%s'", table, altId);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    ag = altGraphXLoad(row+hasBin);
    hPrintf("<TABLE><TR><TD BGCOLOR='#888888'>\n");
    altGraphXMakeImage(ag);
    hPrintf("</TD></TR></TABLE><BR>");
    }
sqlFreeResult(&sr);
hPrintf("This graph shows alternative splicing observed in mRNAs and "
        "ESTs that is either conserved in mouse, present in full length "
	"mRNAs, or observed at least three times in ESTs.");
}
Пример #9
0
static void fillInFastaOffsets(struct brokenRefPepTbl *brpTbl,
                               struct sqlConnection *conn,
                               struct extFileTbl* extFileTbl)
/* get offsets of proteins in fasta files */
{
struct hashCookie cookie = hashFirst(brpTbl->protFaHash);
struct hashEl *hel;

/* fill in */
while ((hel = hashNext(&cookie)) != NULL)
    getFastaOffsets(brpTbl, conn, extFileTbl, hel->name);

/* check if any missing */
cookie = hashFirst(brpTbl->protAccHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct brokenRefPep *brp = hel->val;
    if (strlen(brp->mrnaAcc) && (brp->newFaOff < 0))
        {
        /* in one case, this was a pseudoGene mistakenly left in as an
         * mRNA, so make it a warning */
        fprintf(stderr, "Warning: %s: refPep %s (for %s) not found in %s\n",
                sqlGetDatabase(conn), brp->protAcc, brp->mrnaAcc,
                brp->newFaPath);
        }
    }
}
Пример #10
0
static struct genePred *getCurGenePred(struct sqlConnection *conn)
/* Return current gene in genePred. */
{
char *track = genomeSetting("knownGene");
char table[HDB_MAX_TABLE_STRING];
boolean hasBin;
char query[256];
struct sqlResult *sr;
char **row;
struct genePred *gp = NULL;
if (!hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, sizeof table, &hasBin))
    errAbort("track %s not found", track);
bool hasAttrId = sqlColumnExists(conn, table, "alignId");
sqlSafef(query, sizeof(query),
	"select * from %s where name = '%s' "
	"and chrom = '%s' and txStart=%d and txEnd=%d"
	, table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row + hasBin);

#define  ALIGNIDFIELD      11  // Gencode Id
    if (hasAttrId)
	curAlignId = cloneString(row[ALIGNIDFIELD]);
    else
	curAlignId = gp->name;
    }
sqlFreeResult(&sr);
if (gp == NULL)
    errAbort("getCurGenePred: Can't find %s", query);
return gp;
}
static char *otherOrgPositionFromDb(struct otherOrg *otherOrg, char *id)
/* Get position of id from other organism database, if possible. */
{
struct hTableInfo *hti = hFindTableInfo(otherOrg->db, NULL,
                                        otherOrg->geneTable);
if (hti == NULL)
    return NULL;  // table  not found

struct sqlConnection *conn = hAllocConn(otherOrg->db);
char query[512];
safef(query, sizeof(query),
      "select concat(%s, ':', %s+1, '-', %s) from %s "
      "where %s = '%s'",
      hti->chromField, hti->startField, hti->endField,
      otherOrg->geneTable, hti->nameField, id);
char *pos = sqlQuickString(conn, query);
if (pos != NULL)
    {
    char posPlus[2048];
    safef(posPlus, sizeof(posPlus), "%s&%s=%s&hgFind.matches=%s",
          pos,
          otherOrg->geneTable, hTrackOpenVis(sqlGetDatabase(conn), otherOrg->geneTable),
          id);
    hFreeConn(&conn);
    freez(&pos);
    return cloneString(posPlus);
    }
else
    {
    hFreeConn(&conn);
    return NULL;
    }
}
Пример #12
0
static void makeRepairs(struct brokenRefPepTbl *brpTbl,
                        struct sqlConnection *conn,
                        struct extFileTbl* extFileTbl,
                        boolean dryRun)
/* make repairs once data is collected */
{
static char *tmpDir = "/var/tmp";
struct hashCookie cookie;
struct hashEl *hel;
int repairCnt = 0;
int dropCnt = 0;
struct seqTbl* seqTbl = seqTblNew(conn, tmpDir, (gbVerbose > 3));
struct sqlDeleter* seqTblDeleter = sqlDeleterNew(tmpDir, (gbVerbose > 3));

cookie = hashFirst(brpTbl->protAccHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct brokenRefPep *brp = hel->val;
    if ((brp->mrnaAcc != NULL) && (brp->newFaOff >= 0))
        {
        refPepRepairOne(conn, brp, seqTbl, extFileTbl, dryRun);
        repairCnt++;
        }
    else
        {
        refPepDropOne(conn, brp, seqTblDeleter, dryRun);
        dropCnt++;
        }
    }
if (dryRun)
    {
    gbVerbMsg(1, "%s: would have repaired %d refseq protein gbExtFile entries", 
              sqlGetDatabase(conn), repairCnt);
    gbVerbMsg(1, "%s: would have dropped %d refseq protein gbExtFile entries", 
              sqlGetDatabase(conn), dropCnt);
    }
else
    {
    seqTblCommit(seqTbl, conn);
    gbVerbMsg(1, "%s: repaired %d refseq protein gbExtFile entries",
              sqlGetDatabase(conn), repairCnt);
    sqlDeleterDel(seqTblDeleter, conn, SEQ_TBL, "acc");
    gbVerbMsg(1, "%s: dropped %d refseq protein gbExtFile entries",
              sqlGetDatabase(conn), dropCnt);
    }
}
Пример #13
0
void palOptions(struct cart *cart,
	struct sqlConnection *conn, void (*addButtons)(),
	char *extraVar)
/* output the options dialog (select MAF file, output options */
{
char *database = sqlGetDatabase(conn);

hPrintf("<FORM ACTION=\"%s\" NAME=\"mainForm\" METHOD=POST>\n", cgiScriptName());
cartSaveSession(cart);

char *mafTable = outMafTableDrop(cart, conn);
char *numColumns = cartUsualString(cart, hgtaCGINumColumns, "");

printf("<BR><BR><B>Formatting options:</B><BR>\n");
jsMakeTrackingCheckBox(cart, hgtaCGIGeneExons, hgtaJSGeneExons, FALSE);
printf("Separate into exons<BR>");
jsMakeTrackingCheckBox(cart, hgtaCGIGeneNoTrans, hgtaJSGeneNoTrans, FALSE);
printf("Show nucleotides<BR>");
jsMakeTrackingCheckBox(cart, hgtaCGIGeneOutBlank, hgtaJSGeneOutBlank, FALSE);
printf("Output lines with just dashes<BR>");
jsMakeTrackingCheckBox(cart, hgtaCGIOutTable, hgtaJSOutTable, FALSE);
printf("Format output as table ");
jsMakeTrackingCheckBox(cart, hgtaCGITruncHeader, hgtaJSTruncHeader, FALSE);
printf("Truncate headers at ");
cgiMakeTextVar(hgtaCGINumColumns, numColumns, 2);
printf("characters (enter zero for no headers)<BR>");

printf("<BR>");
struct trackDb *maftdb = hTrackDbForTrack(database, mafTable);

/* get maf parent (if any) */
maftdb->parent = hCompositeTrackDbForSubtrack(database,maftdb);

wigMafSpeciesTable(cart, maftdb, mafTable, database);

addButtons();

cartSaveSession(cart);

hPrintf("</FORM>\n");

/* Hidden form - for benefit of javascript. */
    {
    static char *saveVars[32];
    int varCount = ArraySize(curVars);

    assert(varCount  < (sizeof saveVars / sizeof(char *)));
    memcpy(saveVars, curVars, varCount * sizeof(saveVars[0]));
    if (extraVar != NULL)
	{
	assert(varCount + 1 < (sizeof saveVars / sizeof(char *)));
	saveVars[varCount] = extraVar;
	varCount++;
	}
    jsCreateHiddenForm(cart, cgiScriptName(), saveVars, varCount);
    }

}
struct mafAli *mafLoadInRegion(struct sqlConnection *conn, char *table,
	char *chrom, int start, int end)
{
struct sqlConnection *conn2 = hAllocConn(sqlGetDatabase(conn));
struct mafAli *ret = mafLoadInRegion2(conn, conn2, table, chrom,
    start, end,NULL);
hFreeConn(&conn2);
return ret;
}
Пример #15
0
static void processGenomic(struct sqlConnection *conn, struct customPp *cpp, 
	int colCount, char *formatType, boolean firstLineLabels,
	struct labeledFile *fileList, boolean report)
/* Process three column file into chromGraph.  Abort if
 * there's a problem. */
{
char **row;
struct chromGraph *cg;
struct hash *chromHash = chromInfoHash(conn);
struct labeledFile *fileEl;
struct chromInfo *ci;
int i;
int rowCount = 0;
Chopper chopper = getChopper(formatType);
char *line;

AllocArray(row, colCount);
if (firstLineLabels)
    readLabels(cpp, 2, chopper, row, colCount, fileList);
while ((line = customFactoryNextRealTilTrack(cpp)) != NULL)
    {
    chopper(line, row, colCount);
    char *chrom = cloneString(row[0]);
    int start = cppNeedNum(cpp, row, 1);
    ci = hashFindVal(chromHash, chrom);
    if (ci == NULL)
	cppAbort(cpp, "Chromosome %s not found in this assembly (%s).", 
		 chrom, sqlGetDatabase(conn));
    if (start >= ci->size)
	{
	cppAbort(cpp, "Chromosome %s is %d bases long, but got coordinate %d.",
		 chrom, ci->size, start);
	}
    else if (start < 0)
        {
	cppAbort(cpp, "Negative base position %d on chromosome %s.",
		 start, chrom);
	}
    for (i=2, fileEl = fileList; i<colCount; ++i, fileEl = fileEl->next)
	{
	char *val = row[i];
	if (val[0] != 0)
	    {
	    AllocVar(cg);
	    cg->chrom = chrom;
	    cg->chromStart = start;
	    cg->val = cppNeedDouble(cpp, row, i);
	    slAddHead(&fileEl->cgList, cg);
	    }
	}
    ++rowCount;
    }
if (report)
    printf("Read in %d markers and values in <i>chromosome base</i> format.<BR>\n", 
	rowCount);
}
Пример #16
0
void loadOneSplitTable(struct hashEl *hel)
/* Load the table for the given chrom. */
{
    char tempName[512];
    char tableName[256];
    char *chrom = hel->name;
    safef(tempName, sizeof(tempName), "%s_%s", chrom, defaultTempName);
    safef(tableName, sizeof(tableName), "%s_%s", chrom, suffix);
    loadOneTable(sqlGetDatabase(theConn), theConn, tempName, tableName);
}
Пример #17
0
static void createGeneTbl(struct gbGeneTbl *ggt, struct sqlConnection* conn)
/* create a genePred table */
{
char *sql = genePredGetCreateSql(ggt->tbl,
                                 (ggt->hasExtCols ? genePredAllFlds : 0),
                                 (ggt->hasBin ? genePredWithBin : 0),
                                 hGetMinIndexLength(sqlGetDatabase(conn)));
sqlRemakeTable(conn, ggt->tbl, sql);
freeMem(sql);
}
Пример #18
0
static void refPepDropOne(struct sqlConnection *conn,
                          struct brokenRefPep *brp,
                          struct sqlDeleter* seqTblDeleter,
                          boolean dryRun)
/* drop a refPep */
{
gbVerbPr(2, "%s\t%s\tdrop", sqlGetDatabase(conn), brp->protAcc);
if (!dryRun)
    sqlDeleterAddAcc(seqTblDeleter, brp->protAcc);
}
Пример #19
0
struct slName *getTableNames(struct sqlConnection *conn)
/* Return a list of names of tables that have not been excluded by 
 * command line options. */
{
char *query = hoursOld ? "NOSQLINJ show table status" : "NOSQLINJ show tables";
struct sqlResult *sr = sqlGetResult(conn, query);
struct slName *tableList = NULL;
char **row = NULL;
int startTime = clock1();
int ageThresh = hoursOld * 3600;

while((row = sqlNextRow(sr)) != NULL)
    {
    struct slName *tableName = NULL;
    struct slName *pat = NULL;
    boolean gotMatch = FALSE;
    if (hoursOld)
	{
	if (row[11] != NULL)
	    {
	    int tableUpdateTime = sqlDateToUnixTime(row[11]);
	    int ageInSeconds = startTime - tableUpdateTime;
	    if (ageInSeconds > ageThresh)
		continue;
	    }
	else
	    {
	    verbose(2,
		    "Got NULL update time for table %s.%s with hoursOld=%d\n",
		    sqlGetDatabase(conn), row[0], hoursOld);
	    }
	}
    for (pat = excludePatterns;  pat != NULL;  pat=pat->next)
	{
	if (wildMatch(pat->name, row[0]))
	    {
	    gotMatch = TRUE;
	    break;
	    }
	}
    if (gotMatch)
	continue;
    if (verboseLevel() >= 3 || justList)
	fprintf(stderr, "Adding %s\n", row[0]);
    tableName = newSlName(row[0]);
    slAddHead(&tableList, tableName);
    }
sqlFreeResult(&sr);
if (justList)
    exit(0);
slReverse(&tableList);
return tableList;
}
Пример #20
0
static void getMetaData(struct sqlConnection *conn, struct refSeqVerInfo *refSeqVerInfoLst, char *outFile)
/* get request prot sequences from database */
{
struct sqlConnection *conn2 = sqlConnect(sqlGetDatabase(conn));
static char *hdr = "#mrnaAcc\t" "protAcc\t" "geneName\t" "ncbiGeneId\t" "cds\t" "product\t" "status\n";
FILE *fh = mustOpen(outFile, "w");
fputs(hdr, fh);
struct refSeqVerInfo *rsvi;
for (rsvi = refSeqVerInfoLst; rsvi != NULL; rsvi = rsvi->next)
    processMetaData(fh, conn, conn2, rsvi);
carefulClose(&fh);
sqlDisconnect(&conn2);
}
Пример #21
0
static boolean checkForRefLink(struct sqlConnection *conn)
/* check if there is a refLink table, if none, issue message and return
 * false */
{
if (!sqlTableExists(conn, "refLink"))
    {
    fprintf(stderr, "Note: %s: no RefSeqs loaded, skipping",
            sqlGetDatabase(conn));
    return FALSE;
    }
else
    return TRUE;
}
static void getMetaData(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request prot sequences from database */
{
struct sqlConnection *conn2 = sqlConnect(sqlGetDatabase(conn));
static char *hdr = "#mrnaAcc\t" "protAcc\t" "geneName\t" "ncbiGeneId\t" "cds\t" "product\n";
FILE *fh = mustOpen(outFile, "w");
fputs(hdr, fh);
struct hashCookie cookie = hashFirst(refSeqVerInfoTbl);
struct hashEl *hel;
while ((hel = hashNext(&cookie)) != NULL)
    {
    processMetaData(fh, conn, conn2, hel->val);
    }
carefulClose(&fh);
sqlDisconnect(&conn2);
}
static void getAligns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request alignments from database */
{
int off = hOffsetPastBin(sqlGetDatabase(conn), NULL, "refSeqAli");
struct sqlResult *sr = sqlGetResult(conn, "SELECT * FROM refSeqAli");
FILE *fh = mustOpen(outFile, "w");
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct psl *psl = pslLoad(row+off);
    processPsl(fh, refSeqVerInfoTbl, psl);
    pslFree(&psl);
    }
carefulClose(&fh);
sqlFreeResult(&sr);
}
Пример #24
0
static void createFlatTbl(struct gbGeneTbl *ggt, struct sqlConnection* conn)
/* create a genePred flat table */
{
/* edit generated SQL to add geneName column and index */
char *tmpDef = genePredGetCreateSql(ggt->flatTbl, 0, 0, hGetMinIndexLength(sqlGetDatabase(conn)));
char *part2 = strchr(tmpDef, '(');
*(part2++) = '\0';
char *p = strrchr(part2, ')');
*p = '\0';
char editDef[1024];
safef(editDef, sizeof(editDef),
      "%s(geneName varchar(255) not null, %s, INDEX(geneName(10)))",
      tmpDef, part2);
freeMem(tmpDef);
sqlRemakeTable(conn, ggt->flatTbl, editDef);
}
Пример #25
0
static void refPepRepairOne(struct sqlConnection *conn,
                            struct brokenRefPep *brp,
                            struct seqTbl* seqTbl,
                            struct extFileTbl* extFileTbl,
                            boolean dryRun)
/* repair a refPep */
{
gbVerbPrStart(2, "%s\t%s\trepair", sqlGetDatabase(conn), brp->protAcc);
gbVerbPrMore(3, "\tsz: %d\toff: %lld\trecSz: %d\tfid: %d\t%s",
             brp->newSeqSize, (long long)brp->newFaOff, brp->newRecSize,
             brp->newFaId, brp->newFaPath);
gbVerbPrMore(2, "\n");
if (!dryRun)
    seqTblMod(seqTbl, brp->protSeqId, -1, brp->newFaId,
              brp->newSeqSize, brp->newFaOff, brp->newRecSize);
}
Пример #26
0
void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash)
/* Return a tree of ranges for sequence gaps in all chromosomes, 
 * assuming an unsplit gap table -- when the table is unsplit, it's 
 * probably for a scaffold assembly where we *really* don't want 
 * to do one query per scaffold! */
{
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap");
struct sqlResult *sr;
char **row;
char *prevChrom = NULL;

sr = sqlGetResult(conn, "NOSQLINJ select * from gap order by chrom");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    if (prevChrom == NULL)
	prevChrom = cloneString(gap.chrom);
    else if (! sameString(prevChrom, gap.chrom))
	{
	setNGap(prevChrom, chromHash, tree);
	freeMem(prevChrom);
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(gap.chrom);
	}
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
if (prevChrom != NULL)
    {
    setNGap(prevChrom, chromHash, tree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}
Пример #27
0
struct genePred *getCurGenePred(struct sqlConnection *conn)
/* Return current gene in genePred. */
{
char *track = genomeSetting("knownGene");
char table[64];
boolean hasBin;
char query[256];
struct sqlResult *sr;
char **row;
struct genePred *gp = NULL;
hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, &hasBin);
sqlSafef(query, sizeof(query),
	"select * from %s where name = '%s' "
	"and chrom = '%s' and txStart=%d and txEnd=%d"
	, table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    gp = genePredLoad(row + hasBin);
sqlFreeResult(&sr);
if (gp == NULL)
    errAbort("getCurGenePred: Can't find %s", query);
return gp;
}
struct mafAli *axtLoadAsMafInRegion(struct sqlConnection *conn, char *table,
	char *chrom, int start, int end,
	char *tPrefix, char *qPrefix, int tSize,  struct hash *qSizeHash)
/* Return list of alignments in region from axt external file as a maf. */
{
char **row;
unsigned int extFileId = 0;
struct lineFile *lf = NULL;
struct mafAli *maf, *mafList = NULL;
struct axt *axt;
int rowOffset;
struct sqlResult *sr = hRangeQuery(conn, table, chrom,
    start, end, NULL, &rowOffset);

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct scoredRef ref;
    scoredRefStaticLoad(row + rowOffset, &ref);
    if (ref.extFile != extFileId)
	{
	char *path = hExtFileName(sqlGetDatabase(conn),"extFile", ref.extFile);
	lf = lineFileOpen(path, TRUE);
	extFileId = ref.extFile;
	}
    lineFileSeek(lf, ref.offset, SEEK_SET);
    axt = axtRead(lf);
    if (axt == NULL)
        internalErr();
    maf = mafFromAxt(axt, tSize, tPrefix, hashIntVal(qSizeHash, axt->qName), qPrefix);
    axtFree(&axt);
    slAddHead(&mafList, maf);
    }
sqlFreeResult(&sr);
lineFileClose(&lf);
slReverse(&mafList);
return mafList;
}
Пример #29
0
static void getRepeats(struct sqlConnection *conn, struct hash *arHash,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome */
{
char *db = sqlGetDatabase(conn);
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char tableName[64];
char query[256];
boolean splitRmsk = TRUE;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

safef(tableName, sizeof(tableName), "%s_rmsk", chrom);
if (! sqlTableExists(conn, tableName))
    {
    safef(tableName, sizeof(tableName), "rmsk");
    if (! sqlTableExists(conn, tableName))
	errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n",
		 chrom, db, chrom, db);
    splitRmsk = FALSE;
    }
if (splitRmsk)
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s",
	    tableName);
else
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s "
	    "where genoName = \"%s\"",
	    tableName, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    char arKey[512];
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]);
    if (arHash != NULL && hashLookup(arHash, arKey))
        {
	lmAllocVar(newTree->lm, range);
	range->start = sqlUnsigned(row[0]);
	range->end = sqlUnsigned(row[1]);
	if (prevNewRange == NULL)
	    prevNewRange = range;
	else if (overlap(range, prevNewRange))
	    {
	    /* merge r into prevR & discard; prevR gets passed forward. */
	    if (range->end > prevNewRange->end)
		prevNewRange->end = range->end;
	    if (range->start < prevNewRange->start)
		prevNewRange->start = range->start;
	    }
	else
	    {
	    rbTreeAdd(allTree, prevNewRange);
	    prevNewRange = range;
	    }
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}
Пример #30
0
int palOutPredList(struct sqlConnection *conn, struct cart *cart,
    struct genePred *list)
/* output a list of genePreds in pal format */
{
if (list == NULL)
    return 0;

char *mafTable = cartString(cart, hgtaCGIGeneMafTable);
char *database = sqlGetDatabase(conn);
struct trackDb *maftdb = hTrackDbForTrack(database, mafTable);
struct wigMafSpecies *wmSpecies;
int groupCnt;

/* get maf parent (if any) */
maftdb->parent = hCompositeTrackDbForSubtrack(database,maftdb);

/* this queries the state of the getSpecies dialog */
wigMafGetSpecies(cart, maftdb, maftdb->track, database, &wmSpecies, &groupCnt);

/* since the species selection dialog doesn't list
 * the reference species, we just automatically include
 * it */
struct slName *includeList = slNameNew(database);

/* now make a list of all species that are on */
for(; wmSpecies; wmSpecies = wmSpecies->next)
    {
    if (wmSpecies->on)
	{
	struct slName *newName = slNameNew(wmSpecies->name);
	slAddHead(&includeList, newName);
	}
    }
slReverse(&includeList);

boolean inExons = cartUsualBoolean(cart, hgtaCGIGeneExons , FALSE);
boolean noTrans = cartUsualBoolean(cart, hgtaCGIGeneNoTrans, FALSE);
boolean outBlank = cartUsualBoolean(cart, hgtaCGIGeneOutBlank, FALSE);
boolean outTable = cartUsualBoolean(cart, hgtaCGIOutTable, FALSE);
boolean truncHeader = cartUsualBoolean(cart, hgtaCGITruncHeader, FALSE);
int numCols = cartUsualInt(cart, hgtaCGINumColumns, 20);
unsigned options = 0;

if (inExons)  options |= MAFGENE_EXONS;
if (noTrans)  options |= MAFGENE_NOTRANS;
if (outBlank) options |= MAFGENE_OUTBLANK;
if (outTable) options |= MAFGENE_OUTTABLE;

if (!truncHeader)
    numCols = -1;

/* send out the alignments */
int outCount = 0;
for( ; list ; list = list->next)
    {
    if (list->cdsStart != list->cdsEnd)
	{
	outCount++;
	mafGeneOutPred(stdout, list, database, mafTable,
	    includeList, options, numCols);
	}
    }

slNameFreeList(&includeList);
return outCount;
}