Beispiel #1
0
static void getPathsFromFile(struct pfCompile *pfc, char *fileName)
/* Read in several fields of pfc from file. */
{
struct hash *hash = raReadSingle(fileName);
char *libPath;
pfc->cfgHash = hash;
pfc->cIncludeDir = mustFindSetting(hash, "cIncludeDir", fileName);
pfc->runtimeLib = mustFindSetting(hash, "runtimeLib", fileName);
pfc->jkwebLib = mustFindSetting(hash, "jkwebLib", fileName);
libPath = mustFindSetting(hash, "paraLibPath", fileName);
pfc->paraLibPath = parsePath(libPath);
}
void vgPatchJax(char *database, char *dir)
/* vgPatchJax - Patch Jackson labs part of visiGene database. */
{
struct sqlConnection *conn = sqlConnect(database);
struct fileInfo *raList, *ra;
struct dyString *query = dyStringNew(0);

raList = listDirX(dir, "*.ra", TRUE);
for (ra = raList; ra != NULL; ra = ra->next)
    {
    struct hash *hash = raReadSingle(ra->name);
    char *submitSet = hashMustFindVal(hash, "submitSet");
    char *year = hashMustFindVal(hash, "year");
    dyStringClear(query);
    dyStringPrintf(query,
    	"update submissionSet set year=%s "
	"where name = '%s'"
	, year, submitSet);
    sqlUpdate(conn, query->string);
    }

sqlDisconnect(&conn);
}
Beispiel #3
0
void verticalSplitSqlTable(char *oldTab, char *oldAs, char *splitSpec, char *outDir)
/* verticalSplitSqlTable - Split a database table into two new related tables that share a field. */
{
struct asObject *as = asParseFile(oldAs);
if (as->next != NULL)
    errAbort("%d records in %s, only 1 allowed\n", slCount(as), oldAs);
uglyf("Read %s from %s\n", as->name, oldAs);

/* Read fields from splitSpec, and make sure there are no extra. */
struct hash *ra = raReadSingle(splitSpec);
char *table1 = mustFindInSplitSpec("table1", ra, splitSpec);
char *fields1 = mustFindInSplitSpec("fields1", ra, splitSpec);
char *description1 = mustFindInSplitSpec("description1", ra, splitSpec);
char *table2 = mustFindInSplitSpec("table2", ra, splitSpec);
char *fields2 = mustFindInSplitSpec("fields2", ra, splitSpec);
char *description2 = mustFindInSplitSpec("description2", ra, splitSpec);
char *sharedKey = mustFindInSplitSpec("sharedKey", ra, splitSpec);
if (ra->elCount > 7)
    errAbort("Extra fields in %s", splitSpec);

/* Convert this=that strings to lists of pairs. */
struct slPair *fieldList1 = slPairFromString(fields1);
struct slPair *fieldList2 = slPairFromString(fields2);

/* Do some more checks */
if (sameString(table1, table2))
    errAbort("Error: table1 and table2 are the same (%s) in %s", table1, splitSpec);
checkSharedKeyInList(sharedKey, splitSpec, fields1, fieldList1);
checkSharedKeyInList(sharedKey, splitSpec, fields2, fieldList2);
struct asColumn *keyCol = asColumnFind(as, sharedKey);
if (keyCol == NULL)
    errAbort("The sharedKey '%s' is not in %s", sharedKey, oldAs);

/* Make sure that all fields in splitSpec are actually in the oldAs file. */
checkFieldsInAs(fieldList1, splitSpec, as, oldAs);
checkFieldsInAs(fieldList2, splitSpec, as, oldAs);

/* Make sure that all old table fields are covered */
if (!partialOk)
    {
    struct hash *covered = hashNew(0);
    struct slPair *field;
    for (field = fieldList1; field != NULL; field = field->next)
        hashAdd(covered, field->val, NULL);
    for (field = fieldList2; field != NULL; field = field->next)
        hashAdd(covered, field->val, NULL);
    struct asColumn *col;
    for (col = as->columnList; col != NULL; col = col->next)
        {
	if (!hashLookup(covered, col->name))
	    errAbort("Field %s in %s not output, use -partialOk flag if this is intentional",
		col->name, oldAs);
	}
    }

/* Ok, input is checked, start on output.. */
if (lastChar(outDir) == '/')
    trimLastChar(outDir);
makeDirsOnPath(outDir);

/* Output .as files. */
outputPartialAs(as, table1, fieldList1, description1, outDir);
outputPartialAs(as, table2, fieldList2, description2, outDir);

/* Output first split file - a straight up subset of columns. */
char path[PATH_LEN];
safef(path, sizeof(path), "%s/%s.tab", outDir, table1);
outputPartialTab(oldTab, as, fieldList1, path);


/* Output second split file */
char errPath[PATH_LEN];
safef(path, sizeof(path), "%s/%s.tab", outDir, table2);
safef(errPath, sizeof(path), "%s/mergeErrs.txt", outDir);
outputUniqueOnSharedKey(oldTab, as, keyCol, fieldList2, path, errPath);
}
int main(int argc, char *argv[])
{
if(argc < 2) 
	usage();
char * configFile = argv[1];
struct hash * config = raReadSingle(configFile);

matrix * data = NULL;
if(sameString("bioInt", hashMustFindVal(config, "inputType")))
	{
	char * profile = hashMustFindVal(config, "profile"); //TODO: make this optional so if it's not set it defaults
	char * db = hashMustFindVal(config, "db");
	struct sqlConnection *conn = hAllocConnProfile(profile, db);
	char *tableName = hashMustFindVal(config, "tableName");
	data = bioInt_fill_matrix(conn, tableName);
	hFreeConn(&conn);
	}
else if(sameString("flatfiles", hashMustFindVal(config, "inputType")))
	{
	char * dataFilepath = hashMustFindVal(config, "dataFilepath");
	FILE * dataFile;
	dataFile = fopen(dataFilepath, "r");
	if(dataFile == NULL)
   		errAbort("ERROR: Couldn't open the file \"%s\"\n", dataFilepath);
	data = f_fill_matrix(dataFile, 1);
	fclose(dataFile);
	}
else
	errAbort("Unsupported input type");

int itemsInFold = -1;
int folds = foldsCountFromConfig(config);
char * cv = hashMustFindVal(config, "crossValidation");
if(sameString("k-fold", cv))
    itemsInFold = floor(data->cols/folds);
else if(sameString("loo", cv))
    itemsInFold = 1;
if(folds == -1 || itemsInFold == -1)
    errAbort("Couldn't assign folds or itemsInFold\n");

if(hashFindVal(config, "excludeList"))
    {
    matrix * trimmedData = filterColumnsByExcludeList(config, data);
    free_matrix(data);
    data = trimmedData;
    }

struct slInt *list = list_indices(data->cols); 

int split, splits = splitsCountFromConfig(config);
matrix * foldReports = NULL;
for(split = 1; split <= splits; split++)
	{
	struct slInt *shuffledList = seeded_shuffle_indices(list, split);

	matrix * tmp = reportFolds(config, data, folds,itemsInFold, shuffledList);
	if(split == 1)
		{
		foldReports = copy_matrix(tmp);
		}
	else
		{
		matrix * tmp2 = append_matrices(foldReports, tmp, 2);
		free_matrix(foldReports);
		foldReports = copy_matrix(tmp2);
		}
	slFreeList(&shuffledList);
	}
fprint_discreteMatrix(stdout, foldReports);

free_matrix(foldReports);
freeHash(&config);
slFreeList(&list);
return 0;
}
void bioImageLoad(char *setRaFile, char *itemTabFile)
/* bioImageLoad - Load data into bioImage database. */
{
struct hash *raHash = raReadSingle(setRaFile);
struct hash *rowHash;
struct lineFile *lf = lineFileOpen(itemTabFile, TRUE);
char *line, *words[256];
struct sqlConnection *conn = sqlConnect(database);
int rowSize;
int submissionSetId;
struct hash *fullDirHash = newHash(0);
struct hash *screenDirHash = newHash(0);
struct hash *thumbDirHash = newHash(0);
struct hash *treatmentHash = newHash(0);
struct hash *bodyPartHash = newHash(0);
struct hash *sliceTypeHash = newHash(0);
struct hash *imageTypeHash = newHash(0);
struct hash *sectionSetHash = newHash(0);
struct dyString *dy = dyStringNew(0);

/* Read first line of tab file, and from it get all the field names. */
if (!lineFileNext(lf, &line, NULL))
    errAbort("%s appears to be empty", lf->fileName);
if (line[0] != '#')
    errAbort("First line of %s needs to start with #, and then contain field names",
    	lf->fileName);
rowHash = hashRowOffsets(line+1);
rowSize = rowHash->elCount;
if (rowSize >= ArraySize(words))
    errAbort("Too many fields in %s", lf->fileName);

/* Check that have all required fields */
    {
    char *fieldName;
    int i;

    for (i=0; i<ArraySize(requiredSetFields); ++i)
        {
	fieldName = requiredSetFields[i];
	if (!hashLookup(raHash, fieldName))
	    errAbort("Field %s is not in %s", fieldName, setRaFile);
	}

    for (i=0; i<ArraySize(requiredItemFields); ++i)
        {
	fieldName = requiredItemFields[i];
	if (!hashLookup(rowHash, fieldName))
	    errAbort("Field %s is not in %s", fieldName, itemTabFile);
	}

    for (i=0; i<ArraySize(requiredFields); ++i)
        {
	fieldName = requiredFields[i];
	if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName))
	    errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile);
	}
    }

/* Create/find submission record. */
submissionSetId = saveSubmissionSet(conn, raHash);

/* Process rest of tab file. */
while (lineFileNextRowTab(lf, words, rowSize))
    {
    int fullDir = cachedId(conn, "location", "name", 
    	fullDirHash, "fullDir", raHash, rowHash, words);
    int screenDir = cachedId(conn, "location", "name", 
    	screenDirHash, "screenDir", raHash, rowHash, words);
    int thumbDir = cachedId(conn, "location", 
    	"name", thumbDirHash, "thumbDir", raHash, rowHash, words);
    int bodyPart = cachedId(conn, "bodyPart", 
    	"name", bodyPartHash, "bodyPart", raHash, rowHash, words);
    int sliceType = cachedId(conn, "sliceType", 
    	"name", sliceTypeHash, "sliceType", raHash, rowHash, words);
    int imageType = cachedId(conn, "imageType", 
    	"name", imageTypeHash, "imageType", raHash, rowHash, words);
    int treatment = cachedId(conn, "treatment", 
    	"conditions", treatmentHash, "treatment", raHash, rowHash, words);
    char *fileName = getVal("fileName", raHash, rowHash, words, NULL);
    char *submitId = getVal("submitId", raHash, rowHash, words, NULL);
    char *taxon = getVal("taxon", raHash, rowHash, words, NULL);
    char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL);
    char *age = getVal("age", raHash, rowHash, words, NULL);
    char *sectionSet = getVal("sectionSet", raHash, rowHash, words, "");
    char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0");
    char *gene = getVal("gene", raHash, rowHash, words, "");
    char *locusLink = getVal("locusLink", raHash, rowHash, words, "");
    char *refSeq = getVal("refSeq", raHash, rowHash, words, "");
    char *genbank = getVal("genbank", raHash, rowHash, words, "");
    char *priority = getVal("priority", raHash, rowHash, words, "200");
    int sectionId = 0;
    int oldId;
    // char *xzy = getVal("xzy", raHash, rowHash, words, xzy);

    if (sectionSet[0] != 0 && !sameString(sectionSet, "0"))
        {
	struct hashEl *hel = hashLookup(sectionSetHash, sectionSet);
	if (hel != NULL)
	    sectionId = ptToInt(hel->val);
	else
	    {
	    sqlUpdate(conn, "insert into sectionSet values(default)");
	    sectionId = sqlLastAutoId(conn);
	    hashAdd(sectionSetHash, sectionSet, intToPt(sectionId));
	    }
	}

    dyStringClear(dy);
    dyStringAppend(dy, "select id from image ");
    dyStringPrintf(dy, "where fileName = '%s' ", fileName);
    dyStringPrintf(dy, "and fullLocation = %d",  fullDir);
    oldId = sqlQuickNum(conn, dy->string);
    if (oldId != 0)
        {
	if (replace)
	    {
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from image where id = %d", oldId);
	    sqlUpdate(conn, dy->string);
	    }
	else
	    errAbort("%s is already in database line %d of %s", 
	    	fileName, lf->lineIx, lf->fileName);
	}

    dyStringClear(dy);
    dyStringAppend(dy, "insert into image set\n");
    dyStringPrintf(dy, " id = default,\n");
    dyStringPrintf(dy, " fileName = '%s',\n", fileName);
    dyStringPrintf(dy, " fullLocation = %d,\n", fullDir);
    dyStringPrintf(dy, " screenLocation = %d,\n", screenDir);
    dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir);
    dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId);
    dyStringPrintf(dy, " sectionSet = %d,\n", sectionId);
    dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx);
    dyStringPrintf(dy, " submitId = '%s',\n", submitId);
    dyStringPrintf(dy, " gene = '%s',\n", gene);
    dyStringPrintf(dy, " locusLink = '%s',\n", locusLink);
    dyStringPrintf(dy, " refSeq = '%s',\n", refSeq);
    dyStringPrintf(dy, " genbank = '%s',\n", genbank);
    dyStringPrintf(dy, " priority = %s,\n", priority);
    dyStringPrintf(dy, " taxon = %s,\n", taxon);
    dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo);
    dyStringPrintf(dy, " age = %s,\n", age);
    dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart);
    dyStringPrintf(dy, " sliceType = %d,\n", sliceType);
    dyStringPrintf(dy, " imageType = %d,\n", imageType);
    dyStringPrintf(dy, " treatment = %d\n", treatment);

    sqlUpdate(conn, dy->string);
    }
}