static void getPathsFromFile(struct pfCompile *pfc, char *fileName) /* Read in several fields of pfc from file. */ { struct hash *hash = raReadSingle(fileName); char *libPath; pfc->cfgHash = hash; pfc->cIncludeDir = mustFindSetting(hash, "cIncludeDir", fileName); pfc->runtimeLib = mustFindSetting(hash, "runtimeLib", fileName); pfc->jkwebLib = mustFindSetting(hash, "jkwebLib", fileName); libPath = mustFindSetting(hash, "paraLibPath", fileName); pfc->paraLibPath = parsePath(libPath); }
void vgPatchJax(char *database, char *dir) /* vgPatchJax - Patch Jackson labs part of visiGene database. */ { struct sqlConnection *conn = sqlConnect(database); struct fileInfo *raList, *ra; struct dyString *query = dyStringNew(0); raList = listDirX(dir, "*.ra", TRUE); for (ra = raList; ra != NULL; ra = ra->next) { struct hash *hash = raReadSingle(ra->name); char *submitSet = hashMustFindVal(hash, "submitSet"); char *year = hashMustFindVal(hash, "year"); dyStringClear(query); dyStringPrintf(query, "update submissionSet set year=%s " "where name = '%s'" , year, submitSet); sqlUpdate(conn, query->string); } sqlDisconnect(&conn); }
void verticalSplitSqlTable(char *oldTab, char *oldAs, char *splitSpec, char *outDir) /* verticalSplitSqlTable - Split a database table into two new related tables that share a field. */ { struct asObject *as = asParseFile(oldAs); if (as->next != NULL) errAbort("%d records in %s, only 1 allowed\n", slCount(as), oldAs); uglyf("Read %s from %s\n", as->name, oldAs); /* Read fields from splitSpec, and make sure there are no extra. */ struct hash *ra = raReadSingle(splitSpec); char *table1 = mustFindInSplitSpec("table1", ra, splitSpec); char *fields1 = mustFindInSplitSpec("fields1", ra, splitSpec); char *description1 = mustFindInSplitSpec("description1", ra, splitSpec); char *table2 = mustFindInSplitSpec("table2", ra, splitSpec); char *fields2 = mustFindInSplitSpec("fields2", ra, splitSpec); char *description2 = mustFindInSplitSpec("description2", ra, splitSpec); char *sharedKey = mustFindInSplitSpec("sharedKey", ra, splitSpec); if (ra->elCount > 7) errAbort("Extra fields in %s", splitSpec); /* Convert this=that strings to lists of pairs. */ struct slPair *fieldList1 = slPairFromString(fields1); struct slPair *fieldList2 = slPairFromString(fields2); /* Do some more checks */ if (sameString(table1, table2)) errAbort("Error: table1 and table2 are the same (%s) in %s", table1, splitSpec); checkSharedKeyInList(sharedKey, splitSpec, fields1, fieldList1); checkSharedKeyInList(sharedKey, splitSpec, fields2, fieldList2); struct asColumn *keyCol = asColumnFind(as, sharedKey); if (keyCol == NULL) errAbort("The sharedKey '%s' is not in %s", sharedKey, oldAs); /* Make sure that all fields in splitSpec are actually in the oldAs file. */ checkFieldsInAs(fieldList1, splitSpec, as, oldAs); checkFieldsInAs(fieldList2, splitSpec, as, oldAs); /* Make sure that all old table fields are covered */ if (!partialOk) { struct hash *covered = hashNew(0); struct slPair *field; for (field = fieldList1; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); for (field = fieldList2; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) { if (!hashLookup(covered, col->name)) errAbort("Field %s in %s not output, use -partialOk flag if this is intentional", col->name, oldAs); } } /* Ok, input is checked, start on output.. */ if (lastChar(outDir) == '/') trimLastChar(outDir); makeDirsOnPath(outDir); /* Output .as files. */ outputPartialAs(as, table1, fieldList1, description1, outDir); outputPartialAs(as, table2, fieldList2, description2, outDir); /* Output first split file - a straight up subset of columns. */ char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table1); outputPartialTab(oldTab, as, fieldList1, path); /* Output second split file */ char errPath[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table2); safef(errPath, sizeof(path), "%s/mergeErrs.txt", outDir); outputUniqueOnSharedKey(oldTab, as, keyCol, fieldList2, path, errPath); }
int main(int argc, char *argv[]) { if(argc < 2) usage(); char * configFile = argv[1]; struct hash * config = raReadSingle(configFile); matrix * data = NULL; if(sameString("bioInt", hashMustFindVal(config, "inputType"))) { char * profile = hashMustFindVal(config, "profile"); //TODO: make this optional so if it's not set it defaults char * db = hashMustFindVal(config, "db"); struct sqlConnection *conn = hAllocConnProfile(profile, db); char *tableName = hashMustFindVal(config, "tableName"); data = bioInt_fill_matrix(conn, tableName); hFreeConn(&conn); } else if(sameString("flatfiles", hashMustFindVal(config, "inputType"))) { char * dataFilepath = hashMustFindVal(config, "dataFilepath"); FILE * dataFile; dataFile = fopen(dataFilepath, "r"); if(dataFile == NULL) errAbort("ERROR: Couldn't open the file \"%s\"\n", dataFilepath); data = f_fill_matrix(dataFile, 1); fclose(dataFile); } else errAbort("Unsupported input type"); int itemsInFold = -1; int folds = foldsCountFromConfig(config); char * cv = hashMustFindVal(config, "crossValidation"); if(sameString("k-fold", cv)) itemsInFold = floor(data->cols/folds); else if(sameString("loo", cv)) itemsInFold = 1; if(folds == -1 || itemsInFold == -1) errAbort("Couldn't assign folds or itemsInFold\n"); if(hashFindVal(config, "excludeList")) { matrix * trimmedData = filterColumnsByExcludeList(config, data); free_matrix(data); data = trimmedData; } struct slInt *list = list_indices(data->cols); int split, splits = splitsCountFromConfig(config); matrix * foldReports = NULL; for(split = 1; split <= splits; split++) { struct slInt *shuffledList = seeded_shuffle_indices(list, split); matrix * tmp = reportFolds(config, data, folds,itemsInFold, shuffledList); if(split == 1) { foldReports = copy_matrix(tmp); } else { matrix * tmp2 = append_matrices(foldReports, tmp, 2); free_matrix(foldReports); foldReports = copy_matrix(tmp2); } slFreeList(&shuffledList); } fprint_discreteMatrix(stdout, foldReports); free_matrix(foldReports); freeHash(&config); slFreeList(&list); return 0; }
void bioImageLoad(char *setRaFile, char *itemTabFile) /* bioImageLoad - Load data into bioImage database. */ { struct hash *raHash = raReadSingle(setRaFile); struct hash *rowHash; struct lineFile *lf = lineFileOpen(itemTabFile, TRUE); char *line, *words[256]; struct sqlConnection *conn = sqlConnect(database); int rowSize; int submissionSetId; struct hash *fullDirHash = newHash(0); struct hash *screenDirHash = newHash(0); struct hash *thumbDirHash = newHash(0); struct hash *treatmentHash = newHash(0); struct hash *bodyPartHash = newHash(0); struct hash *sliceTypeHash = newHash(0); struct hash *imageTypeHash = newHash(0); struct hash *sectionSetHash = newHash(0); struct dyString *dy = dyStringNew(0); /* Read first line of tab file, and from it get all the field names. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s appears to be empty", lf->fileName); if (line[0] != '#') errAbort("First line of %s needs to start with #, and then contain field names", lf->fileName); rowHash = hashRowOffsets(line+1); rowSize = rowHash->elCount; if (rowSize >= ArraySize(words)) errAbort("Too many fields in %s", lf->fileName); /* Check that have all required fields */ { char *fieldName; int i; for (i=0; i<ArraySize(requiredSetFields); ++i) { fieldName = requiredSetFields[i]; if (!hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s", fieldName, setRaFile); } for (i=0; i<ArraySize(requiredItemFields); ++i) { fieldName = requiredItemFields[i]; if (!hashLookup(rowHash, fieldName)) errAbort("Field %s is not in %s", fieldName, itemTabFile); } for (i=0; i<ArraySize(requiredFields); ++i) { fieldName = requiredFields[i]; if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile); } } /* Create/find submission record. */ submissionSetId = saveSubmissionSet(conn, raHash); /* Process rest of tab file. */ while (lineFileNextRowTab(lf, words, rowSize)) { int fullDir = cachedId(conn, "location", "name", fullDirHash, "fullDir", raHash, rowHash, words); int screenDir = cachedId(conn, "location", "name", screenDirHash, "screenDir", raHash, rowHash, words); int thumbDir = cachedId(conn, "location", "name", thumbDirHash, "thumbDir", raHash, rowHash, words); int bodyPart = cachedId(conn, "bodyPart", "name", bodyPartHash, "bodyPart", raHash, rowHash, words); int sliceType = cachedId(conn, "sliceType", "name", sliceTypeHash, "sliceType", raHash, rowHash, words); int imageType = cachedId(conn, "imageType", "name", imageTypeHash, "imageType", raHash, rowHash, words); int treatment = cachedId(conn, "treatment", "conditions", treatmentHash, "treatment", raHash, rowHash, words); char *fileName = getVal("fileName", raHash, rowHash, words, NULL); char *submitId = getVal("submitId", raHash, rowHash, words, NULL); char *taxon = getVal("taxon", raHash, rowHash, words, NULL); char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL); char *age = getVal("age", raHash, rowHash, words, NULL); char *sectionSet = getVal("sectionSet", raHash, rowHash, words, ""); char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0"); char *gene = getVal("gene", raHash, rowHash, words, ""); char *locusLink = getVal("locusLink", raHash, rowHash, words, ""); char *refSeq = getVal("refSeq", raHash, rowHash, words, ""); char *genbank = getVal("genbank", raHash, rowHash, words, ""); char *priority = getVal("priority", raHash, rowHash, words, "200"); int sectionId = 0; int oldId; // char *xzy = getVal("xzy", raHash, rowHash, words, xzy); if (sectionSet[0] != 0 && !sameString(sectionSet, "0")) { struct hashEl *hel = hashLookup(sectionSetHash, sectionSet); if (hel != NULL) sectionId = ptToInt(hel->val); else { sqlUpdate(conn, "insert into sectionSet values(default)"); sectionId = sqlLastAutoId(conn); hashAdd(sectionSetHash, sectionSet, intToPt(sectionId)); } } dyStringClear(dy); dyStringAppend(dy, "select id from image "); dyStringPrintf(dy, "where fileName = '%s' ", fileName); dyStringPrintf(dy, "and fullLocation = %d", fullDir); oldId = sqlQuickNum(conn, dy->string); if (oldId != 0) { if (replace) { dyStringClear(dy); dyStringPrintf(dy, "delete from image where id = %d", oldId); sqlUpdate(conn, dy->string); } else errAbort("%s is already in database line %d of %s", fileName, lf->lineIx, lf->fileName); } dyStringClear(dy); dyStringAppend(dy, "insert into image set\n"); dyStringPrintf(dy, " id = default,\n"); dyStringPrintf(dy, " fileName = '%s',\n", fileName); dyStringPrintf(dy, " fullLocation = %d,\n", fullDir); dyStringPrintf(dy, " screenLocation = %d,\n", screenDir); dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir); dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId); dyStringPrintf(dy, " sectionSet = %d,\n", sectionId); dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx); dyStringPrintf(dy, " submitId = '%s',\n", submitId); dyStringPrintf(dy, " gene = '%s',\n", gene); dyStringPrintf(dy, " locusLink = '%s',\n", locusLink); dyStringPrintf(dy, " refSeq = '%s',\n", refSeq); dyStringPrintf(dy, " genbank = '%s',\n", genbank); dyStringPrintf(dy, " priority = %s,\n", priority); dyStringPrintf(dy, " taxon = %s,\n", taxon); dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo); dyStringPrintf(dy, " age = %s,\n", age); dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart); dyStringPrintf(dy, " sliceType = %d,\n", sliceType); dyStringPrintf(dy, " imageType = %d,\n", imageType); dyStringPrintf(dy, " treatment = %d\n", treatment); sqlUpdate(conn, dy->string); } }