void edwCorrectFileTags(char *tabFileName)
/* edwCorrectFileTags - Use this to correct tags in the edwFile table and corresponding fields 
 * in the edwValidFile table without forcing a validateManifest rerun or a reupload.. */
{
struct sqlConnection *conn = edwConnectReadWrite();
char *requiredFields[] = {"accession",};
char *forbiddenFields[] = {"md5_sum", "size", "valid_key", "file_name"};
struct fieldedTable *table = fieldedTableFromTabFile(tabFileName, tabFileName,
	requiredFields, ArraySize(requiredFields));
checkForbiddenFields(table, forbiddenFields, ArraySize(forbiddenFields));
int accessionIx = stringArrayIx("accession", table->fields, table->fieldCount);

struct fieldedRow *fr;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char *acc = fr->row[accessionIx];
    long long id = edwNeedFileIdForLicensePlate(conn, acc);
    struct edwFile *ef = edwFileFromId(conn, id);
    int i;
    char *tags = ef->tags;
    for (i=0; i<table->fieldCount; ++i)
        {
	if (i != accessionIx)
	    tags = cgiStringNewValForVar(tags, table->fields[i], fr->row[i]);
	}
    edwFileResetTags(conn, ef, tags);
    edwFileFree(&ef);
    }
}
Exemple #2
0
void changeFormat(struct sqlConnection *conn, struct edwValidFile *vf, char *format)
/* Set up vf to change format. */
{
struct edwFile *ef = edwFileFromId(conn, vf->fileId);
char *newTags = cgiStringNewValForVar(ef->tags, tagToChange, format);
edwFileResetTags(conn, ef, newTags);
edwFileFree(&ef);
}
Exemple #3
0
static int handleOldFileTags(struct sqlConnection *conn, struct submitFileRow *sfrList,
    boolean update)
/* Check metadata on files mentioned in manifest that by MD5 sum we already have in
 * warehouse.   We may want to update metadata on these. This returns the number
 * of files with tags updated. */
{
struct submitFileRow *sfr;
int updateCount = 0;
for (sfr = sfrList; sfr != NULL; sfr = sfr->next)
    {
    struct edwFile *newFile = sfr->file;
    struct edwFile *oldFile = edwFileFromId(conn, sfr->md5MatchFileId);
    verbose(2, "looking at old file %s (%s)\n", oldFile->submitFileName, newFile->submitFileName);
    struct cgiDictionary *newTags = cgiDictionaryFromEncodedString(newFile->tags);
    struct cgiDictionary *oldTags = cgiDictionaryFromEncodedString(oldFile->tags);
    boolean updateName = !sameString(oldFile->submitFileName, newFile->submitFileName);
    boolean updateTags = !cgiDictionarySame(oldTags, newTags);
    if (updateName)
	{
	if (!update)
	    errAbort("%s already uploaded with name %s.  Please use the 'update' option if you "
	             "want to give it a new name.",  
		     newFile->submitFileName, oldFile->submitFileName);
        updateSubmitName(conn, oldFile->id,  newFile->submitFileName);
	}
    if (updateTags)
	{
	if (!update)
	    {
	    char *name="", *oldVal="", *newVal="";
	    cgiDictionaryFirstDiff(oldTags, newTags, &name, &oldVal, &newVal);
	    errAbort("%s is duplicate of %s in warehouse, but %s column went from %s to %s.\n"
	             "Please use the 'update' option if you are meaning to update the information\n"
		     "associated with this file and try again if this is intentional.",
		     newFile->submitFileName, oldFile->edwFileName,
		     name, oldVal, newVal);
	    }
	edwFileResetTags(conn, oldFile, newFile->tags, TRUE);
	}
    if (updateTags || updateName)
	++updateCount;
    cgiDictionaryFree(&oldTags);
    cgiDictionaryFree(&newTags);
    }
return updateCount;
}
Exemple #4
0
void redoOne(struct sqlConnection *conn, struct edwFile *redoEf)
/* Redo one file. */
{
/* Figure out submit file name of the gtf file. */
char gtfFileName[PATH_LEN];
strcpy(gtfFileName, redoEf->submitFileName);
chopSuffix(gtfFileName);
strcat(gtfFileName, ".gz");

/* Get edwFile record for gtf file. */
char query[PATH_LEN+64];
safef(query, sizeof(query), "select * from edwFile where submitFileName='%s'", gtfFileName);
struct edwFile *sourceEf = edwFileLoadByQuery(conn, query);
assert(slCount(sourceEf) == 1);

/* Get UCSC database */
safef(query, sizeof(query), "select ucscDb from edwValidFile where fileId=%u", sourceEf->id);
char ucscDb[64] = "";
sqlQuickQuery(conn, query, ucscDb, sizeof(ucscDb));
assert(ucscDb[0] != 0);

/* Remake the big bed file. */
char sourceFileName[PATH_LEN], destFileName[PATH_LEN];
safef(sourceFileName, sizeof(sourceFileName), "%s%s", edwRootDir, sourceEf->edwFileName);
safef(destFileName, sizeof(destFileName), "%s%s", edwRootDir, redoEf->edwFileName);
makeGtfBigBed(ucscDb, sourceFileName, destFileName);

/* Recalculate size and md5 sum and validation key. */
char *md5 = md5HexForFile(destFileName);
long long size = fileSize(destFileName);
char *validKey = encode3CalcValidationKey(md5, size);

/* Issue command to update md5 in database. */
char command[2*PATH_LEN];
safef(command, sizeof(command),
    "hgsql -e 'update edwFile set md5=\"%s\" where id=%u' encodeDataWarehouse", md5, redoEf->id);
doSystem(command);

/* Issue command to update tags in database. */
char *newTags = cgiStringNewValForVar(redoEf->tags, "valid_key", validKey); 
if (doReal)
    {
    edwFileResetTags(conn, redoEf, newTags);
    }
}