void outputPartialAs(struct asObject *as, char *newTable, struct slPair *newFieldList, char *newDescription, char *outDir) /* Create outdir/newTable.as based on a subset of as. */ { /* Create output file. */ char outPath[PATH_LEN]; safef(outPath, sizeof(outPath), "%s/%s.as", outDir, newTable); FILE *f = mustOpen(outPath, "w"); /* Print table header. */ fprintf(f, "table %s\n", newTable); fprintf(f, "\"%s\"\n", newDescription); char *indent = " "; fprintf(f, "%s(\n", indent); /* Print selected columns. */ struct slPair *fieldPair; for (fieldPair = newFieldList; fieldPair != NULL; fieldPair = fieldPair->next) { char *newField = fieldPair->name; char *oldField = fieldPair->val; struct asColumn *col = asColumnFind(as, oldField); fprintf(f, "%s%s %s; \"%s\"\n", indent, col->lowType->name, newField, col->comment); } /* Close out table and file. */ fprintf(f, "%s)\n", indent); carefulClose(&f); }
struct bbExIndexMaker *bbExIndexMakerNew(struct slName *extraIndexList, struct asObject *as) /* Return an index maker corresponding to extraIndexList. Checks that all fields * mentioned are in autoSql definition, and for now that they are all text fields. */ { /* Fill in scalar fields and return quickly if no extra indexes. */ struct bbExIndexMaker *eim; AllocVar(eim); eim->indexCount = slCount(extraIndexList); if (eim->indexCount == 0) return eim; // Not much to do in this case /* Allocate arrays according field count. */ AllocArray(eim->indexFields, eim->indexCount); AllocArray(eim->maxFieldSize, eim->indexCount); AllocArray(eim->chunkArrayArray, eim->indexCount); AllocArray(eim->fileOffsets, eim->indexCount); /* Loop through each field checking that it is indeed something we can index * and if so saving information about it */ int indexIx = 0; struct slName *name; for (name = extraIndexList; name != NULL; name = name->next) { struct asColumn *col = asColumnFind(as, name->name); if (col == NULL) errAbort("extraIndex field %s not a standard bed field or found in 'as' file.", name->name); if (!sameString(col->lowType->name, "string")) errAbort("Sorry for now can only index string fields."); eim->indexFields[indexIx] = slIxFromElement(as->columnList, col); ++indexIx; } return eim; }
static void showTableFieldsOnList(char *db, char *rootTable, struct asObject *asObj, struct slName *fieldList, boolean showItemRgb, boolean withGetButton) /* Put up html table with check box, name, description, etc for each field. */ { hTableStart(); struct slName *fieldName; for (fieldName = fieldList; fieldName != NULL; fieldName = fieldName->next) { char *field = fieldName->name; char *var = checkVarName(db, rootTable, field); struct asColumn *asCol; hPrintf("<TR>"); hPrintf("<TD>"); cgiMakeCheckBox(var, varOn(var)); hPrintf("</TD>"); hPrintf("<TD>"); if (showItemRgb && sameWord(field,"reserved")) hPrintf("itemRgb"); else hPrintf("%s", field); hPrintf("</TD>"); if (asObj != NULL) { asCol = asColumnFind(asObj, field); if (asCol != NULL) hPrintf("<TD>%s</TD>", asCol->comment); else hPrintf("<TD> </TD>"); } hPrintf("</TR>"); } hTableEnd(); showTableButtons(db, rootTable, withGetButton); }
struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx) /* Return index associated with fieldName. Aborts if no such index. Optionally return * index in a row of this field. */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct asObject *as = bigBedAsOrDefault(bbi); struct asColumn *col = asColumnFind(as, fieldName); if (col == NULL) errAbort("No field %s in %s", fieldName, bbi->fileName); int colIx = slIxFromElement(as->columnList, col); if (retFieldIx != NULL) *retFieldIx = colIx; asObjectFree(&as); /* See if we have any extra indexes, and if so seek to there. */ bits64 offset = bbi->extraIndexListOffset; if (offset == 0) errAbort("%s has no indexes", bbi->fileName); udcSeek(udc, offset); /* Go through each extra index and see if it's a match */ int i; for (i=0; i<bbi->extraIndexCount; ++i) { bits16 type = udcReadBits16(udc, isSwapped); bits16 fieldCount = udcReadBits16(udc, isSwapped); bits64 fileOffset = udcReadBits64(udc, isSwapped); udcSeekCur(udc, 4); // skip over reserved bits if (type != 0) { warn("Don't understand type %d", type); internalErr(); } if (fieldCount == 1) { bits16 fieldId = udcReadBits16(udc, isSwapped); udcSeekCur(udc, 2); // skip over reserved bits if (fieldId == colIx) { udcSeek(udc, fileOffset); struct bptFile *bpt = bptFileAttach(bbi->fileName, udc); return bpt; } } else { warn("Not yet understanding indexes on multiple fields at once."); internalErr(); } } errAbort("%s is not indexed in %s", fieldName, bbi->fileName); return NULL; }
void checkFieldsInAs(struct slPair *list, char *listFile, struct asObject *as, char *asFile) /* Make sure that all fields in list values are actually in the as. File names are just * for error reporting. */ { struct slPair *el; for (el = list; el != NULL; el = el->next) { char *field = el->val; if (asColumnFind(as, field) == NULL) errAbort("Field %s is in %s but not %s\n", field, listFile, asFile); } }
int *makeNewToOldArray(struct asObject *as, struct slPair *fieldList) /* Return an array where we can lookup old index given new index. */ { int oldFieldCount = slCount(as->columnList); int newFieldCount = slCount(fieldList); int *oldIx; AllocArray(oldIx, newFieldCount); int i; struct slPair *fieldPair; for (i=0, fieldPair = fieldList; i<newFieldCount; ++i, fieldPair = fieldPair->next) { char *oldName = fieldPair->val; struct asColumn *col = asColumnFind(as, oldName); assert(col != NULL); /* We checked earlier but... */ int ix = slIxFromElement(as->columnList, col); assert(ix >= 0 && ix <= oldFieldCount); oldIx[i] = ix; } return oldIx; }
void verticalSplitSqlTable(char *oldTab, char *oldAs, char *splitSpec, char *outDir) /* verticalSplitSqlTable - Split a database table into two new related tables that share a field. */ { struct asObject *as = asParseFile(oldAs); if (as->next != NULL) errAbort("%d records in %s, only 1 allowed\n", slCount(as), oldAs); uglyf("Read %s from %s\n", as->name, oldAs); /* Read fields from splitSpec, and make sure there are no extra. */ struct hash *ra = raReadSingle(splitSpec); char *table1 = mustFindInSplitSpec("table1", ra, splitSpec); char *fields1 = mustFindInSplitSpec("fields1", ra, splitSpec); char *description1 = mustFindInSplitSpec("description1", ra, splitSpec); char *table2 = mustFindInSplitSpec("table2", ra, splitSpec); char *fields2 = mustFindInSplitSpec("fields2", ra, splitSpec); char *description2 = mustFindInSplitSpec("description2", ra, splitSpec); char *sharedKey = mustFindInSplitSpec("sharedKey", ra, splitSpec); if (ra->elCount > 7) errAbort("Extra fields in %s", splitSpec); /* Convert this=that strings to lists of pairs. */ struct slPair *fieldList1 = slPairFromString(fields1); struct slPair *fieldList2 = slPairFromString(fields2); /* Do some more checks */ if (sameString(table1, table2)) errAbort("Error: table1 and table2 are the same (%s) in %s", table1, splitSpec); checkSharedKeyInList(sharedKey, splitSpec, fields1, fieldList1); checkSharedKeyInList(sharedKey, splitSpec, fields2, fieldList2); struct asColumn *keyCol = asColumnFind(as, sharedKey); if (keyCol == NULL) errAbort("The sharedKey '%s' is not in %s", sharedKey, oldAs); /* Make sure that all fields in splitSpec are actually in the oldAs file. */ checkFieldsInAs(fieldList1, splitSpec, as, oldAs); checkFieldsInAs(fieldList2, splitSpec, as, oldAs); /* Make sure that all old table fields are covered */ if (!partialOk) { struct hash *covered = hashNew(0); struct slPair *field; for (field = fieldList1; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); for (field = fieldList2; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) { if (!hashLookup(covered, col->name)) errAbort("Field %s in %s not output, use -partialOk flag if this is intentional", col->name, oldAs); } } /* Ok, input is checked, start on output.. */ if (lastChar(outDir) == '/') trimLastChar(outDir); makeDirsOnPath(outDir); /* Output .as files. */ outputPartialAs(as, table1, fieldList1, description1, outDir); outputPartialAs(as, table2, fieldList2, description2, outDir); /* Output first split file - a straight up subset of columns. */ char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table1); outputPartialTab(oldTab, as, fieldList1, path); /* Output second split file */ char errPath[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table2); safef(errPath, sizeof(path), "%s/mergeErrs.txt", outDir); outputUniqueOnSharedKey(oldTab, as, keyCol, fieldList2, path, errPath); }
void describeFields(char *db, char *table, struct asObject *asObj, struct sqlConnection *conn) /* Print out an HTML table showing table fields and types, and optionally * offering histograms for the text/enum fields. */ { struct sqlResult *sr; char **row; #define TOO_BIG_FOR_HISTO 500000 boolean tooBig = (sqlTableSize(conn, table) > TOO_BIG_FOR_HISTO); char query[256]; struct slName *exampleList, *example; boolean showItemRgb = FALSE; showItemRgb=bedItemRgb(findTdbForTable(db, curTrack, table, ctLookupName)); // should we expect itemRgb instead of "reserved" sqlSafef(query, sizeof(query), "select * from %s limit 1", table); exampleList = storeRow(conn, query); sqlSafef(query, sizeof(query), "describe %s", table); sr = sqlGetResult(conn, query); hTableStart(); hPrintf("<TR><TH>field</TH>"); if (exampleList != NULL) hPrintf("<TH>example</TH>"); hPrintf("<TH>SQL type</TH> "); if (!tooBig) hPrintf("<TH>info</TH> "); if (asObj != NULL) hPrintf("<TH>description</TH> "); puts("</TR>\n"); example = exampleList; while ((row = sqlNextRow(sr)) != NULL) { if (showItemRgb && (sameWord(row[0],"reserved"))) hPrintf("<TR><TD><TT>itemRgb</TT></TD> "); else hPrintf("<TR><TD><TT>%s</TT></TD> ", row[0]); if (exampleList != NULL) { hPrintf("<TD>"); if (example != NULL) hPrintf("%s", cleanExample(example->name)); else hPrintf("n/a"); hPrintf("</TD>"); } // enums/sets with many items can make for painfully wide rows in the table -- // add spaces between quoted list values: if (stringIn("','", row[1])) { struct dyString *spaced = dyStringSub(row[1], "','", "', '"); hPrintf("<TD><TT>%s</TT></TD>", spaced->string); } else hPrintf("<TD><TT>%s</TT></TD>", row[1]); if (!tooBig) { hPrintf(" <TD>"); if ((isSqlStringType(row[1]) && !sameString(row[1], "longblob")) || isSqlEnumType(row[1]) || isSqlSetType(row[1])) { hPrintf("<A HREF=\"%s", getScriptName()); hPrintf("?%s", cartSidUrlString(cart)); hPrintf("&%s=%s", hgtaDatabase, db); hPrintf("&%s=%s", hgtaHistoTable, table); hPrintf("&%s=%s", hgtaDoValueHistogram, row[0]); hPrintf("\">"); hPrintf("values"); hPrintf("</A>"); } else if (isSqlNumType(row[1])) { hPrintf("<A HREF=\"%s", getScriptName()); hPrintf("?%s", cartSidUrlString(cart)); hPrintf("&%s=%s", hgtaDatabase, db); hPrintf("&%s=%s", hgtaHistoTable, table); hPrintf("&%s=%s", hgtaDoValueRange, row[0]); hPrintf("\">"); hPrintf("range"); hPrintf("</A>"); } else { hPrintf(" "); } hPrintf("</TD>"); } if (asObj != NULL) { struct asColumn *asCol = asColumnFind(asObj, row[0]); hPrintf(" <TD>"); if (asCol != NULL) hPrintf("%s", asCol->comment); else { if (sameString("bin", row[0])) hPrintf("Indexing field to speed chromosome range queries."); else hPrintf(" "); } hPrintf("</TD>"); } puts("</TR>"); if (example != NULL) example = example->next; } hTableEnd(); sqlFreeResult(&sr); }