void updateSumAndAll(struct encode2Manifest *mi, char *md5, char *rootDir)
/* Update mi to reflect new md5.  Since this means file has changed we'll go
 * look for file and get new size and update time.  We also update validation key. */
{
char path[PATH_LEN];
safef(path, sizeof(path), "%s/%s", rootDir, mi->fileName);
off_t size = fileSize(path);
if (size == -1)
    errAbort("%s doesn't exist", path);
mi->size = size;
mi->modifiedTime = fileModTime(path);
mi->md5sum = md5;
mi->validKey = encode3CalcValidationKey(md5, size);
}
Esempio n. 2
0
void redoOne(struct sqlConnection *conn, struct edwFile *redoEf)
/* Redo one file. */
{
/* Figure out submit file name of the gtf file. */
char gtfFileName[PATH_LEN];
strcpy(gtfFileName, redoEf->submitFileName);
chopSuffix(gtfFileName);
strcat(gtfFileName, ".gz");

/* Get edwFile record for gtf file. */
char query[PATH_LEN+64];
safef(query, sizeof(query), "select * from edwFile where submitFileName='%s'", gtfFileName);
struct edwFile *sourceEf = edwFileLoadByQuery(conn, query);
assert(slCount(sourceEf) == 1);

/* Get UCSC database */
safef(query, sizeof(query), "select ucscDb from edwValidFile where fileId=%u", sourceEf->id);
char ucscDb[64] = "";
sqlQuickQuery(conn, query, ucscDb, sizeof(ucscDb));
assert(ucscDb[0] != 0);

/* Remake the big bed file. */
char sourceFileName[PATH_LEN], destFileName[PATH_LEN];
safef(sourceFileName, sizeof(sourceFileName), "%s%s", edwRootDir, sourceEf->edwFileName);
safef(destFileName, sizeof(destFileName), "%s%s", edwRootDir, redoEf->edwFileName);
makeGtfBigBed(ucscDb, sourceFileName, destFileName);

/* Recalculate size and md5 sum and validation key. */
char *md5 = md5HexForFile(destFileName);
long long size = fileSize(destFileName);
char *validKey = encode3CalcValidationKey(md5, size);

/* Issue command to update md5 in database. */
char command[2*PATH_LEN];
safef(command, sizeof(command),
    "hgsql -e 'update edwFile set md5=\"%s\" where id=%u' encodeDataWarehouse", md5, redoEf->id);
doSystem(command);

/* Issue command to update tags in database. */
char *newTags = cgiStringNewValForVar(redoEf->tags, "valid_key", validKey); 
if (doReal)
    {
    edwFileResetTags(conn, redoEf, newTags);
    }
}
Esempio n. 3
0
void edwParseSubmitFile(struct sqlConnection *conn, char *submitLocalPath, char *submitUrl, 
    struct submitFileRow **retSubmitList)
/* Load and parse up this file as fielded table, make sure all required fields are there,
 * and calculate indexes of required fields.   This produces an edwFile list, but with
 * still quite a few fields missing - just what can be filled in from submit filled in. 
 * The submitUrl is just used for error reporting.  If it's local, just make it the
 * same as submitLocalPath. */
{
char *requiredFields[] = {"file_name", "format", "output_type", "experiment", "replicate", 
    "enriched_in", "md5_sum", "size",  "modified", "valid_key"};
struct fieldedTable *table = fieldedTableFromTabFile(submitLocalPath, submitUrl,
	requiredFields, ArraySize(requiredFields));

/* Get offsets of all required fields */
int fileIx = stringArrayIx("file_name", table->fields, table->fieldCount);
int formatIx = stringArrayIx("format", table->fields, table->fieldCount);
int outputIx = stringArrayIx("output_type", table->fields, table->fieldCount);
int experimentIx = stringArrayIx("experiment", table->fields, table->fieldCount);
int replicateIx = stringArrayIx("replicate", table->fields, table->fieldCount);
int enrichedIx = stringArrayIx("enriched_in", table->fields, table->fieldCount);
int md5Ix = stringArrayIx("md5_sum", table->fields, table->fieldCount);
int sizeIx = stringArrayIx("size", table->fields, table->fieldCount);
int modifiedIx = stringArrayIx("modified", table->fields, table->fieldCount);
int validIx = stringArrayIx("valid_key", table->fields, table->fieldCount);

/* See if we're doing replacement and check have all columns needed if so. */
int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount);
int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount);
boolean doReplace = (replacesIx != -1);
if (doReplace)
    if (replaceReasonIx == -1)
        errAbort("Error: got \"%s\" column without \"%s\" column in %s.", 
	    replacesTag, replaceReasonTag, submitUrl);

/* Loop through and make sure all field values are ok */
struct fieldedRow *fr;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char **row = fr->row;
    char *fileName = row[fileIx];
    allGoodFileNameChars(fileName);
    char *format = row[formatIx];
    if (!isSupportedFormat(format))
	errAbort("Format %s is not supported", format);
    allGoodSymbolChars(row[outputIx]);
    char *experiment = row[experimentIx];
    if (!isExperimentId(experiment))
        errAbort("%s in experiment field does not seem to be an encode experiment", experiment);
    char *replicate = row[replicateIx];
    if (differentString(replicate, "pooled") && differentString(replicate, "n/a") )
	if (!isAllNum(replicate))
	    errAbort("%s is not a good value for the replicate column", replicate);
    char *enriched = row[enrichedIx];
    if (!encode3CheckEnrichedIn(enriched))
        errAbort("Enriched_in %s is not supported", enriched);
    char *md5 = row[md5Ix];
    if (strlen(md5) != 32 || !isAllHexLower(md5))
        errAbort("md5 '%s' is not in all lower case 32 character hexadecimal format.", md5);
    char *size = row[sizeIx];
    if (!isAllNum(size))
        errAbort("Invalid size '%s'", size);
    char *modified = row[modifiedIx];
    if (!isAllNum(modified))
        errAbort("Invalid modification time '%s'", modified);
    char *validIn = row[validIx];
    char *realValid = encode3CalcValidationKey(md5, sqlLongLong(size));
    if (!sameString(validIn, realValid))
        errAbort("The valid_key %s for %s doesn't fit", validIn, fileName);
    freez(&realValid);

    if (doReplace)
	{
	char *replaces = row[replacesIx];
	char *reason = row[replaceReasonIx];
	if (!isEmptyOrNa(replaces))
	    {
	    char *prefix = edwLicensePlateHead(conn);
	    if (!startsWith(prefix, replaces))
		errAbort("%s in replaces column is not an ENCODE file accession", replaces);
	    if (isEmptyOrNa(reason))
		errAbort("Replacing %s without a reason\n", replaces);
	    }
	}
    }

*retSubmitList = submitFileRowFromFieldedTable(conn, table, 
    fileIx, md5Ix, sizeIx, modifiedIx, replacesIx, replaceReasonIx);
}