void saveClonePos(struct clonePos *cloneList, char *database) /* Save sorted clone position list to database. */ { struct sqlConnection *conn = sqlConnect(database); struct clonePos *clone; struct tempName tn; FILE *f; struct dyString *ds = newDyString(2048); /* Create tab file from clone list. */ printf("Creating tab file\n"); makeTempName(&tn, "hgCP", ".tab"); f = mustOpen(tn.forCgi, "w"); for (clone = cloneList; clone != NULL; clone = clone->next) clonePosTabOut(clone, f); fclose(f); /* Create table if it doesn't exist, delete whatever is * already in it, and fill it up from tab file. */ printf("Loading clonePos table\n"); sqlMaybeMakeTable(conn, "clonePos", createClonePos); sqlUpdate(conn, "NOSQLINJ DELETE from clonePos"); sqlDyStringPrintf(ds, "LOAD data local infile '%s' into table clonePos", tn.forCgi); sqlUpdate(conn, ds->string); /* Clean up. */ remove(tn.forCgi); sqlDisconnect(&conn); }
void hgSoftberryHom(char *database, int fileCount, char *files[]) /* hgSoftberryHom - Make table storing Softberry protein homology information. */ { int i; char *fileName; char *table = "softberryHom"; char *tabFileName = "softberryHom.tab"; FILE *f = mustOpen(tabFileName, "w"); struct sqlConnection *conn = NULL; struct dyString *ds = newDyString(2048); for (i=0; i<fileCount; ++i) { fileName = files[i]; printf("Processing %s\n", fileName); makeTabLines(fileName, f); } carefulClose(&f); /* Create table if it doesn't exist, delete whatever is * already in it, and fill it up from tab file. */ conn = sqlConnect(database); printf("Loading %s table\n", table); sqlMaybeMakeTable(conn, table, createTable); sqlDyStringPrintf(ds, "DELETE from %s", table); sqlUpdate(conn, ds->string); dyStringClear(ds); sqlDyStringPrintf(ds, "LOAD data local infile '%s' into table %s", tabFileName, table); sqlUpdate(conn, ds->string); sqlDisconnect(&conn); }
void saveCtgPos(struct ctgPos *ctgList, char *database) /* Save ctgList to database. */ { struct sqlConnection *conn = sqlConnect(database); struct ctgPos *ctg; char *tabFileName = "ctgPos.tab"; FILE *f; struct dyString *ds = newDyString(2048); /* Create tab file from ctg list. */ printf("Creating tab file\n"); f = mustOpen(tabFileName, "w"); for (ctg = ctgList; ctg != NULL; ctg = ctg->next) ctgPosTabOut(ctg, f); fclose(f); /* Create table if it doesn't exist, delete whatever is * already in it, and fill it up from tab file. */ printf("Loading ctgPos table\n"); sqlMaybeMakeTable(conn, "ctgPos", createCtgPos); sqlUpdate(conn, "NOSQLINJ DELETE from ctgPos"); sqlDyStringPrintf(ds, "LOAD data local infile '%s' into table ctgPos", tabFileName); sqlUpdate(conn, ds->string); /* Clean up. */ remove(tabFileName); sqlDisconnect(&conn); }
void makeTable(struct sqlConnection *conn, char *table) /* setup the database table */ { char query[1024]; safef(query, sizeof(query), createSql, table); if (gAppend) sqlMaybeMakeTable(conn, table, query); else sqlRemakeTable(conn, table, query); }
void loadDatabase(char *database, char *fileName) /* Load database table from tab-separated file. */ { struct sqlConnection *conn = sqlConnect(database); char query[256]; printf("Loading %s from %s\n", database, fileName); sqlMaybeMakeTable(conn, "softPromoter", createString); sqlUpdate(conn, "NOSQLINJ delete from softPromoter"); sqlSafef(query, sizeof query, "load data local infile '%s' into table softPromoter", fileName); sqlUpdate(conn, query); sqlDisconnect(&conn); }
void hgGoldGap(char *database, char *agpFile) /* hgGoldGap - Put chromosome .agp file into browser database.. */ { struct dyString *ds = dyStringNew(0); struct sqlConnection *conn = NULL; if (! noLoad) conn = sqlConnect(database); verbose(2,"#\tsimple gold gap, no .gl files produced, from agp file: %s\n", agpFile); splitAgp(agpFile, goldTabName, gapTabName); /* Create gold table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGold, "gold"); dyStringPrintf(ds, goldIndex, maxChromNameSize, maxChromNameSize, maxFragNameSize); verbose(2, "%s", ds->string); if (! noLoad) sqlRemakeTable(conn, "gold", ds->string); dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", goldTabName, "gold"); if (! noLoad) { sqlUpdate(conn, ds->string); remove(goldTabName); } /* Create gap table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGap, "gap"); dyStringPrintf(ds, gapIndex, maxChromNameSize, maxChromNameSize); verbose(2, "%s", ds->string); if (! noLoad) { sqlRemakeTable(conn, "gap", ds->string); sqlMaybeMakeTable(conn, "gap", ds->string); } dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", gapTabName, "gap"); if (! noLoad) { sqlUpdate(conn, ds->string); remove(gapTabName); sqlDisconnect(&conn); } dyStringFree(&ds); }
void setupTable(char *database, struct sqlConnection *conn, char* table) /* create a psl table as needed */ { int minLength = hGetMinIndexLength(database); char *sqlCmd = pslGetCreateSql(table, pslCreateOpts, (pslCreateOpts & PSL_TNAMEIX) ? minLength : 0); if (append) { checkBinConsistent(conn, table); sqlMaybeMakeTable(conn, table, sqlCmd); } else sqlRemakeTable(conn, table, sqlCmd); freez(&sqlCmd); }
void hgFiberglass(char *database, char *fileName) /* hgFiberglass - Turn Fiberglass Annotations into a BED and load into database. */ { struct sqlConnection *conn = sqlConnect(database); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *tabName = "fiberMouse.tab"; FILE *f = mustOpen(tabName, "w"); char *row[3]; struct bed *bedList = NULL, *bed; char *ti; char query[256]; while (lineFileRow(lf, row)) { AllocVar(bed); bed->chrom = "chr22"; bed->chromStart = atoi(row[1])-1; bed->chromEnd = atoi(row[2]); if (bed->chromEnd <= bed->chromStart) errAbort("End before begin line %d of %s", lf->lineIx, lf->fileName); ti = row[0]; if (ti[0] == '|') ti += 1; if (!startsWith("ti|", ti)) errAbort("Trace doesn't start with ti| line %d of %s", lf->lineIx, lf->fileName); bed->name = cloneString(ti); slAddHead(&bedList, bed); } lineFileClose(&lf); printf("Loaded %d ecores from %s\n", slCount(bedList), fileName); slSort(&bedList, bedCmp); /* Write out tab-separated file. */ for (bed = bedList; bed != NULL; bed = bed->next) fprintf(f, "%s\t%d\t%d\t%s\n", bed->chrom, bed->chromStart, bed->chromEnd, bed->name); carefulClose(&f); printf("Loading database\n"); sqlMaybeMakeTable(conn, "fiberMouse", createString); sprintf(query, "LOAD data local infile '%s' into table %s", tabName, "fiberMouse"); sqlUpdate(conn, query); sqlDisconnect(&conn); }
void makeGl(struct sqlConnection *conn, char *chromDir, struct hash *cloneVerHash) /* Read in .gl files in chromDir and use them to create the * gl tables for the corresponding chromosome(s). */ { struct dyString *ds = newDyString(2048); struct fileInfo *fiList, *fi; char dir[256], chrom[128], ext[64]; char *glFileName; char glTable[128]; char *tab = "gl.tab"; fiList = listDirX(chromDir, "*.gl", TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { glFileName = fi->name; printf("Processing %s\n", glFileName); splitPath(glFileName, dir, chrom, ext); sprintf(glTable, "%s_gl", chrom); if ( (! noLoad) && sqlTableExists(conn, glTable)) { dyStringClear(ds); dyStringPrintf(ds, "DROP table %s", glTable); sqlUpdate(conn, ds->string); } dyStringClear(ds); dyStringPrintf(ds, createGl, glTable, maxFragNameSize); verbose(2, "%s", ds->string); if (! noLoad) sqlMaybeMakeTable(conn, glTable, ds->string); dyStringClear(ds); addGlBin(glFileName, tab); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", tab, glTable); if (! noLoad) sqlUpdate(conn, ds->string); } freeDyString(&ds); }
void processRefSeq(char *database, char *faFile, char *raFile, char *pslFile, char *loc2refFile, char *pepFile, char *mim2locFile) /* hgRefSeqMrna - Load refSeq mRNA alignments and other info into * refSeqGene table. */ { struct lineFile *lf; struct hash *raHash, *rsiHash = newHash(0); struct hash *loc2mimHash = newHash(0); struct refSeqInfo *rsiList = NULL, *rsi; char *s, *line, *row[5]; int wordCount, dotMod = 0; int noLocCount = 0; int rsiCount = 0; int noProtCount = 0; struct psl *psl; struct sqlConnection *conn = hgStartUpdate(database); struct hash *productHash = loadNameTable(conn, "productName", 16); struct hash *geneHash = loadNameTable(conn, "geneName", 16); char *kgName = "refGene"; FILE *kgTab = hgCreateTabFile(".", kgName); FILE *productTab = hgCreateTabFile(".", "productName"); FILE *geneTab = hgCreateTabFile(".", "geneName"); FILE *refLinkTab = hgCreateTabFile(".", "refLink"); FILE *refPepTab = hgCreateTabFile(".", "refPep"); FILE *refMrnaTab = hgCreateTabFile(".", "refMrna"); struct exon *exonList = NULL, *exon; char *answer; char cond_str[200]; /* Make refLink and other tables table if they don't exist already. */ sqlMaybeMakeTable(conn, "refLink", refLinkTableDef); sqlUpdate(conn, "NOSQLINJ delete from refLink"); sqlMaybeMakeTable(conn, "refGene", refGeneTableDef); sqlUpdate(conn, "NOSQLINJ delete from refGene"); sqlMaybeMakeTable(conn, "refPep", refPepTableDef); sqlUpdate(conn, "NOSQLINJ delete from refPep"); sqlMaybeMakeTable(conn, "refMrna", refMrnaTableDef); sqlUpdate(conn, "NOSQLINJ delete from refMrna"); /* Scan through locus link to omim ID file and put in hash. */ { char *row[2]; printf("Scanning %s\n", mim2locFile); lf = lineFileOpen(mim2locFile, TRUE); while (lineFileRow(lf, row)) { hashAdd(loc2mimHash, row[1], intToPt(atoi(row[0]))); } lineFileClose(&lf); } /* Scan through .ra file and make up start of refSeqInfo * objects in hash and list. */ printf("Scanning %s\n", raFile); lf = lineFileOpen(raFile, TRUE); while ((raHash = hashNextRa(lf)) != NULL) { if (clDots > 0 && ++dotMod == clDots ) { dotMod = 0; dotOut(); } AllocVar(rsi); slAddHead(&rsiList, rsi); if ((s = hashFindVal(raHash, "acc")) == NULL) errAbort("No acc near line %d of %s", lf->lineIx, lf->fileName); rsi->mrnaAcc = cloneString(s); if ((s = hashFindVal(raHash, "siz")) == NULL) errAbort("No siz near line %d of %s", lf->lineIx, lf->fileName); rsi->size = atoi(s); if ((s = hashFindVal(raHash, "gen")) != NULL) rsi->geneName = cloneString(s); //!!!else //!!! warn("No gene name for %s", rsi->mrnaAcc); if ((s = hashFindVal(raHash, "cds")) != NULL) parseCds(s, 0, rsi->size, &rsi->cdsStart, &rsi->cdsEnd); else rsi->cdsEnd = rsi->size; if ((s = hashFindVal(raHash, "ngi")) != NULL) rsi->ngi = atoi(s); rsi->geneNameId = putInNameTable(geneHash, geneTab, rsi->geneName); s = hashFindVal(raHash, "pro"); if (s != NULL) rsi->productName = cloneString(s); rsi->productNameId = putInNameTable(productHash, productTab, s); hashAdd(rsiHash, rsi->mrnaAcc, rsi); freeHashAndVals(&raHash); } lineFileClose(&lf); if (clDots) printf("\n"); /* Scan through loc2ref filling in some gaps in rsi. */ printf("Scanning %s\n", loc2refFile); lf = lineFileOpen(loc2refFile, TRUE); while (lineFileNext(lf, &line, NULL)) { char *mrnaAcc; if (line[0] == '#') continue; wordCount = chopTabs(line, row); if (wordCount < 5) errAbort("Expecting at least 5 tab-separated words line %d of %s", lf->lineIx, lf->fileName); mrnaAcc = row[1]; mrnaAcc = accWithoutSuffix(mrnaAcc); if (mrnaAcc[2] != '_') warn("%s is and odd name %d of %s", mrnaAcc, lf->lineIx, lf->fileName); if ((rsi = hashFindVal(rsiHash, mrnaAcc)) != NULL) { rsi->locusLinkId = lineFileNeedNum(lf, row, 0); rsi->omimId = ptToInt(hashFindVal(loc2mimHash, row[0])); rsi->proteinAcc = cloneString(accWithoutSuffix(row[4])); } } lineFileClose(&lf); /* Report how many seem to be missing from loc2ref file. * Write out knownInfo file. */ printf("Writing %s\n", "refLink.tab"); for (rsi = rsiList; rsi != NULL; rsi = rsi->next) { ++rsiCount; if (rsi->locusLinkId == 0) ++noLocCount; if (rsi->proteinAcc == NULL) ++noProtCount; fprintf(refLinkTab, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u\n", emptyForNull(rsi->geneName), emptyForNull(rsi->productName), emptyForNull(rsi->mrnaAcc), emptyForNull(rsi->proteinAcc), rsi->geneNameId, rsi->productNameId, rsi->locusLinkId, rsi->omimId); } if (noLocCount) printf("Missing locusLinkIds for %d of %d\n", noLocCount, rsiCount); if (noProtCount) printf("Missing protein accessions for %d of %d\n", noProtCount, rsiCount); /* Process alignments and write them out as genes. */ lf = pslFileOpen(pslFile); dotMod = 0; while ((psl = pslNext(lf)) != NULL) { if (hashFindVal(rsiHash, psl->qName) != NULL) { if (clDots > 0 && ++dotMod == clDots ) { dotMod = 0; dotOut(); } sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", psl->qName); answer = sqlGetField(proteinDB, "spXref2", "displayID", cond_str); if (answer == NULL) { fprintf(stderr, "%s NOT FOUND.\n", psl->qName); fflush(stderr); } if (answer != NULL) { struct genePred *gp = NULL; exonList = pslToExonList(psl); fprintf(kgTab, "%s\t%s\t%c\t%d\t%d\t", psl->qName, psl->tName, psl->strand[0], psl->tStart, psl->tEnd); rsi = hashMustFindVal(rsiHash, psl->qName); gp = genePredFromPsl(psl, rsi->cdsStart, rsi->cdsEnd, genePredStdInsertMergeSize); if (!gp) errAbort("Cannot convert psl (%s) to genePred.\n", psl->qName); fprintf(kgTab, "%d\t%d\t", gp->cdsStart, gp->cdsEnd); fprintf(kgTab, "%d\t", slCount(exonList)); fflush(kgTab); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(kgTab, "%d,", exon->start); fprintf(kgTab, "\t"); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(kgTab, "%d,", exon->end); fprintf(kgTab, "\n"); slFreeList(&exonList); } } else { fprintf(stderr, "%s found in psl, but not in .fa or .ra data files.\n", psl->qName); fflush(stderr); } } if (clDots) printf("\n"); if (!clTest) { writeSeqTable(pepFile, refPepTab, FALSE, TRUE); writeSeqTable(faFile, refMrnaTab, FALSE, FALSE); } carefulClose(&kgTab); carefulClose(&productTab); carefulClose(&geneTab); carefulClose(&refLinkTab); carefulClose(&refPepTab); carefulClose(&refMrnaTab); if (!clTest) { printf("Loading database with %s\n", kgName); fflush(stdout); hgLoadTabFile(conn, ".", kgName, NULL); printf("Loading database with %s\n", "productName"); fflush(stdout); hgLoadTabFile(conn, ".", "productName", NULL); printf("Loading database with %s\n", "geneName"); fflush(stdout); hgLoadTabFile(conn, ".", "geneName", NULL); printf("Loading database with %s\n", "refLink"); fflush(stdout); hgLoadTabFile(conn, ".", "refLink", NULL); printf("Loading database with %s\n", "refPep"); fflush(stdout); hgLoadTabFile(conn, ".", "refPep", NULL); printf("Loading database with %s\n", "refMrna"); fflush(stdout); hgLoadTabFile(conn, ".", "refMrna", NULL); } }
void hgWaba(char *database, char *species, char *chromosome, int chromOffset, int wabaFileCount, char *wabaFile[]) /* hgWaba - load Waba alignments into database. */ { struct sqlConnection *conn = sqlConnect(database); FILE *fullTab, *chromTab; FILE *in; struct xaAli *xa, *xaList = NULL; char fullTabName[512], chromTabName[512]; char fullTable[128], chromTable[128]; char *inFile; int i; struct dyString *query = newDyString(2048); /* Loop through each waba file grabbing sequence into * memory, then sort. */ for (i = 0; i < wabaFileCount; ++i) { inFile = wabaFile[i]; printf("Processing %s\n", inFile); in = xaOpenVerify(inFile); while ((xa = xaReadNext(in, FALSE)) != NULL) { xa->tStart += chromOffset; xa->tEnd += chromOffset; slAddHead(&xaList, xa); } carefulClose(&in); } printf("Sorting %d alignments by chromosome position\n", slCount(xaList)); slSort(&xaList, xaAliCmpTstart); /* Create names of tables and the tables themselves. * Clear anything in the chrom table. */ sprintf(fullTable, "waba%s", species); sprintf(chromTable, "%s_waba%s", chromosome, species); dyStringClear(query); sqlDyStringPrintf(query, wabaFullCreate, fullTable); sqlMaybeMakeTable(conn, fullTable, query->string); dyStringClear(query); sqlDyStringPrintf(query, wabaChromCreate, chromTable); sqlMaybeMakeTable(conn, chromTable, query->string); if (chromOffset == 0) { dyStringClear(query); sqlDyStringPrintf(query, "DELETE from %s", chromTable); sqlUpdate(conn, query->string); } /* Make a temp file for each table we'll update. */ strcpy(fullTabName, "full_waba.tab"); fullTab = mustOpen(fullTabName, "w"); strcpy(chromTabName, "chrom_waba.tab"); chromTab = mustOpen(chromTabName, "w"); /* Write out tab-delimited files. */ printf("Writing tab-delimited files\n"); for (xa = xaList; xa != NULL; xa = xa->next) { int squeezedSize; squeezedSize = squeezeSym(xa->tSym, xa->hSym, xa->symCount, xa->hSym); if( squeezedSize != xa->tEnd - xa->tStart ) { printf("%s squeezedSize: %d, tEnd, tStart: %d, %d, diff: %d\n", xa->query, squeezedSize, xa->tEnd, xa->tStart, xa->tEnd - xa->tStart ); } else { fprintf(fullTab, "%s\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n", /*xa->query, xa->qStart, xa->qEnd, xa->qStrand,*/ xa->name, xa->qStart, xa->qEnd, xa->qStrand, chromosome, xa->tStart, xa->tEnd, xa->milliScore, xa->symCount, xa->qSym, xa->tSym, xa->hSym); assert(squeezedSize == xa->tEnd - xa->tStart); fprintf(chromTab, "%s\t%d\t%d\t%c\t%d\t%s\n", /*xa->query, xa->tStart, xa->tEnd, xa->qStrand,*/ xa->name, xa->tStart, xa->tEnd, xa->qStrand, xa->milliScore, xa->hSym); } } fclose(fullTab); fclose(chromTab); printf("Loading %s table in %s\n", chromTable, database); dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", chromTabName, chromTable); sqlUpdate(conn, query->string); printf("Loading %s table in %s\n", fullTable, database); dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", fullTabName, fullTable); sqlUpdate(conn, query->string); printf("Done!\n"); // remove(fullTabName); // remove(chromTabName); sqlDisconnect(&conn); freeDyString(&query); }
void makeGoldAndGap(struct sqlConnection *conn, char *chromDir) /* Read in .agp files in chromDir and use them to create the * gold and gap tables for the corresponding chromosome(s). */ { struct dyString *ds = newDyString(2048); struct fileInfo *fiList, *fi; char dir[256], chrom[128], ext[64]; char goldName[128], gapName[128]; char *agpName; char *ptr; char goldFileName[128]; char gapFileName[128]; if (! noLoad) { safef(goldFileName, ArraySize(goldFileName), "%s", goldTabName); safef(gapFileName, ArraySize(gapFileName), "%s", gapTabName); } fiList = listDirX(chromDir, "*.agp", TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { /* Get full path name of .agp file and process it * into table names. */ agpName = fi->name; printf("Processing %s\n", agpName); splitPath(agpName, dir, chrom, ext); while ((ptr = strchr(chrom, '.')) != NULL) *ptr = '_'; sprintf(goldName, "%s_gold", chrom); sprintf(gapName, "%s_gap", chrom); if (noLoad) { safef(goldFileName, ArraySize(goldFileName), "%s_gold.tab", chrom); safef(gapFileName, ArraySize(gapFileName), "%s_gap.tab", chrom); } /* Create gold & gap tab separated files. */ splitAgp(fi->name, goldFileName, gapFileName); /* Create gold table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGold, goldName); dyStringPrintf(ds, goldSplitIndex, maxFragNameSize); verbose(2, "%s", ds->string); if (! noLoad) sqlRemakeTable(conn, goldName, ds->string); dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", goldFileName, goldName); if (! noLoad) { sqlUpdate(conn, ds->string); remove(goldFileName); } /* Create gap table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGap, gapName); dyStringAppend(ds, gapSplitIndex); verbose(2, "%s", ds->string); if (! noLoad) { sqlRemakeTable(conn, gapName, ds->string); sqlMaybeMakeTable(conn, gapName, ds->string); } dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", gapFileName, gapName); if (! noLoad) { sqlUpdate(conn, ds->string); remove(gapFileName); } } freeDyString(&ds); }