void edwCorrectFileTags(char *tabFileName) /* edwCorrectFileTags - Use this to correct tags in the edwFile table and corresponding fields * in the edwValidFile table without forcing a validateManifest rerun or a reupload.. */ { struct sqlConnection *conn = edwConnectReadWrite(); char *requiredFields[] = {"accession",}; char *forbiddenFields[] = {"md5_sum", "size", "valid_key", "file_name"}; struct fieldedTable *table = fieldedTableFromTabFile(tabFileName, tabFileName, requiredFields, ArraySize(requiredFields)); checkForbiddenFields(table, forbiddenFields, ArraySize(forbiddenFields)); int accessionIx = stringArrayIx("accession", table->fields, table->fieldCount); struct fieldedRow *fr; for (fr = table->rowList; fr != NULL; fr = fr->next) { char *acc = fr->row[accessionIx]; long long id = edwNeedFileIdForLicensePlate(conn, acc); struct edwFile *ef = edwFileFromId(conn, id); int i; char *tags = ef->tags; for (i=0; i<table->fieldCount; ++i) { if (i != accessionIx) tags = cgiStringNewValForVar(tags, table->fields[i], fr->row[i]); } edwFileResetTags(conn, ef, tags); edwFileFree(&ef); } }
void changeFormat(struct sqlConnection *conn, struct edwValidFile *vf, char *format) /* Set up vf to change format. */ { struct edwFile *ef = edwFileFromId(conn, vf->fileId); char *newTags = cgiStringNewValForVar(ef->tags, tagToChange, format); edwFileResetTags(conn, ef, newTags); edwFileFree(&ef); }
static int handleOldFileTags(struct sqlConnection *conn, struct submitFileRow *sfrList, boolean update) /* Check metadata on files mentioned in manifest that by MD5 sum we already have in * warehouse. We may want to update metadata on these. This returns the number * of files with tags updated. */ { struct submitFileRow *sfr; int updateCount = 0; for (sfr = sfrList; sfr != NULL; sfr = sfr->next) { struct edwFile *newFile = sfr->file; struct edwFile *oldFile = edwFileFromId(conn, sfr->md5MatchFileId); verbose(2, "looking at old file %s (%s)\n", oldFile->submitFileName, newFile->submitFileName); struct cgiDictionary *newTags = cgiDictionaryFromEncodedString(newFile->tags); struct cgiDictionary *oldTags = cgiDictionaryFromEncodedString(oldFile->tags); boolean updateName = !sameString(oldFile->submitFileName, newFile->submitFileName); boolean updateTags = !cgiDictionarySame(oldTags, newTags); if (updateName) { if (!update) errAbort("%s already uploaded with name %s. Please use the 'update' option if you " "want to give it a new name.", newFile->submitFileName, oldFile->submitFileName); updateSubmitName(conn, oldFile->id, newFile->submitFileName); } if (updateTags) { if (!update) { char *name="", *oldVal="", *newVal=""; cgiDictionaryFirstDiff(oldTags, newTags, &name, &oldVal, &newVal); errAbort("%s is duplicate of %s in warehouse, but %s column went from %s to %s.\n" "Please use the 'update' option if you are meaning to update the information\n" "associated with this file and try again if this is intentional.", newFile->submitFileName, oldFile->edwFileName, name, oldVal, newVal); } edwFileResetTags(conn, oldFile, newFile->tags, TRUE); } if (updateTags || updateName) ++updateCount; cgiDictionaryFree(&oldTags); cgiDictionaryFree(&newTags); } return updateCount; }
void redoOne(struct sqlConnection *conn, struct edwFile *redoEf) /* Redo one file. */ { /* Figure out submit file name of the gtf file. */ char gtfFileName[PATH_LEN]; strcpy(gtfFileName, redoEf->submitFileName); chopSuffix(gtfFileName); strcat(gtfFileName, ".gz"); /* Get edwFile record for gtf file. */ char query[PATH_LEN+64]; safef(query, sizeof(query), "select * from edwFile where submitFileName='%s'", gtfFileName); struct edwFile *sourceEf = edwFileLoadByQuery(conn, query); assert(slCount(sourceEf) == 1); /* Get UCSC database */ safef(query, sizeof(query), "select ucscDb from edwValidFile where fileId=%u", sourceEf->id); char ucscDb[64] = ""; sqlQuickQuery(conn, query, ucscDb, sizeof(ucscDb)); assert(ucscDb[0] != 0); /* Remake the big bed file. */ char sourceFileName[PATH_LEN], destFileName[PATH_LEN]; safef(sourceFileName, sizeof(sourceFileName), "%s%s", edwRootDir, sourceEf->edwFileName); safef(destFileName, sizeof(destFileName), "%s%s", edwRootDir, redoEf->edwFileName); makeGtfBigBed(ucscDb, sourceFileName, destFileName); /* Recalculate size and md5 sum and validation key. */ char *md5 = md5HexForFile(destFileName); long long size = fileSize(destFileName); char *validKey = encode3CalcValidationKey(md5, size); /* Issue command to update md5 in database. */ char command[2*PATH_LEN]; safef(command, sizeof(command), "hgsql -e 'update edwFile set md5=\"%s\" where id=%u' encodeDataWarehouse", md5, redoEf->id); doSystem(command); /* Issue command to update tags in database. */ char *newTags = cgiStringNewValForVar(redoEf->tags, "valid_key", validKey); if (doReal) { edwFileResetTags(conn, redoEf, newTags); } }