void updateSumAndAll(struct encode2Manifest *mi, char *md5, char *rootDir) /* Update mi to reflect new md5. Since this means file has changed we'll go * look for file and get new size and update time. We also update validation key. */ { char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s", rootDir, mi->fileName); off_t size = fileSize(path); if (size == -1) errAbort("%s doesn't exist", path); mi->size = size; mi->modifiedTime = fileModTime(path); mi->md5sum = md5; mi->validKey = encode3CalcValidationKey(md5, size); }
void redoOne(struct sqlConnection *conn, struct edwFile *redoEf) /* Redo one file. */ { /* Figure out submit file name of the gtf file. */ char gtfFileName[PATH_LEN]; strcpy(gtfFileName, redoEf->submitFileName); chopSuffix(gtfFileName); strcat(gtfFileName, ".gz"); /* Get edwFile record for gtf file. */ char query[PATH_LEN+64]; safef(query, sizeof(query), "select * from edwFile where submitFileName='%s'", gtfFileName); struct edwFile *sourceEf = edwFileLoadByQuery(conn, query); assert(slCount(sourceEf) == 1); /* Get UCSC database */ safef(query, sizeof(query), "select ucscDb from edwValidFile where fileId=%u", sourceEf->id); char ucscDb[64] = ""; sqlQuickQuery(conn, query, ucscDb, sizeof(ucscDb)); assert(ucscDb[0] != 0); /* Remake the big bed file. */ char sourceFileName[PATH_LEN], destFileName[PATH_LEN]; safef(sourceFileName, sizeof(sourceFileName), "%s%s", edwRootDir, sourceEf->edwFileName); safef(destFileName, sizeof(destFileName), "%s%s", edwRootDir, redoEf->edwFileName); makeGtfBigBed(ucscDb, sourceFileName, destFileName); /* Recalculate size and md5 sum and validation key. */ char *md5 = md5HexForFile(destFileName); long long size = fileSize(destFileName); char *validKey = encode3CalcValidationKey(md5, size); /* Issue command to update md5 in database. */ char command[2*PATH_LEN]; safef(command, sizeof(command), "hgsql -e 'update edwFile set md5=\"%s\" where id=%u' encodeDataWarehouse", md5, redoEf->id); doSystem(command); /* Issue command to update tags in database. */ char *newTags = cgiStringNewValForVar(redoEf->tags, "valid_key", validKey); if (doReal) { edwFileResetTags(conn, redoEf, newTags); } }
void edwParseSubmitFile(struct sqlConnection *conn, char *submitLocalPath, char *submitUrl, struct submitFileRow **retSubmitList) /* Load and parse up this file as fielded table, make sure all required fields are there, * and calculate indexes of required fields. This produces an edwFile list, but with * still quite a few fields missing - just what can be filled in from submit filled in. * The submitUrl is just used for error reporting. If it's local, just make it the * same as submitLocalPath. */ { char *requiredFields[] = {"file_name", "format", "output_type", "experiment", "replicate", "enriched_in", "md5_sum", "size", "modified", "valid_key"}; struct fieldedTable *table = fieldedTableFromTabFile(submitLocalPath, submitUrl, requiredFields, ArraySize(requiredFields)); /* Get offsets of all required fields */ int fileIx = stringArrayIx("file_name", table->fields, table->fieldCount); int formatIx = stringArrayIx("format", table->fields, table->fieldCount); int outputIx = stringArrayIx("output_type", table->fields, table->fieldCount); int experimentIx = stringArrayIx("experiment", table->fields, table->fieldCount); int replicateIx = stringArrayIx("replicate", table->fields, table->fieldCount); int enrichedIx = stringArrayIx("enriched_in", table->fields, table->fieldCount); int md5Ix = stringArrayIx("md5_sum", table->fields, table->fieldCount); int sizeIx = stringArrayIx("size", table->fields, table->fieldCount); int modifiedIx = stringArrayIx("modified", table->fields, table->fieldCount); int validIx = stringArrayIx("valid_key", table->fields, table->fieldCount); /* See if we're doing replacement and check have all columns needed if so. */ int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount); int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount); boolean doReplace = (replacesIx != -1); if (doReplace) if (replaceReasonIx == -1) errAbort("Error: got \"%s\" column without \"%s\" column in %s.", replacesTag, replaceReasonTag, submitUrl); /* Loop through and make sure all field values are ok */ struct fieldedRow *fr; for (fr = table->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; char *fileName = row[fileIx]; allGoodFileNameChars(fileName); char *format = row[formatIx]; if (!isSupportedFormat(format)) errAbort("Format %s is not supported", format); allGoodSymbolChars(row[outputIx]); char *experiment = row[experimentIx]; if (!isExperimentId(experiment)) errAbort("%s in experiment field does not seem to be an encode experiment", experiment); char *replicate = row[replicateIx]; if (differentString(replicate, "pooled") && differentString(replicate, "n/a") ) if (!isAllNum(replicate)) errAbort("%s is not a good value for the replicate column", replicate); char *enriched = row[enrichedIx]; if (!encode3CheckEnrichedIn(enriched)) errAbort("Enriched_in %s is not supported", enriched); char *md5 = row[md5Ix]; if (strlen(md5) != 32 || !isAllHexLower(md5)) errAbort("md5 '%s' is not in all lower case 32 character hexadecimal format.", md5); char *size = row[sizeIx]; if (!isAllNum(size)) errAbort("Invalid size '%s'", size); char *modified = row[modifiedIx]; if (!isAllNum(modified)) errAbort("Invalid modification time '%s'", modified); char *validIn = row[validIx]; char *realValid = encode3CalcValidationKey(md5, sqlLongLong(size)); if (!sameString(validIn, realValid)) errAbort("The valid_key %s for %s doesn't fit", validIn, fileName); freez(&realValid); if (doReplace) { char *replaces = row[replacesIx]; char *reason = row[replaceReasonIx]; if (!isEmptyOrNa(replaces)) { char *prefix = edwLicensePlateHead(conn); if (!startsWith(prefix, replaces)) errAbort("%s in replaces column is not an ENCODE file accession", replaces); if (isEmptyOrNa(reason)) errAbort("Replacing %s without a reason\n", replaces); } } } *retSubmitList = submitFileRowFromFieldedTable(conn, table, fileIx, md5Ix, sizeIx, modifiedIx, replacesIx, replaceReasonIx); }