Ejemplo n.º 1
void edwCorrectFileTags(char *tabFileName)
/* edwCorrectFileTags - Use this to correct tags in the edwFile table and corresponding fields 
 * in the edwValidFile table without forcing a validateManifest rerun or a reupload.. */
struct sqlConnection *conn = edwConnectReadWrite();
char *requiredFields[] = {"accession",};
char *forbiddenFields[] = {"md5_sum", "size", "valid_key", "file_name"};
struct fieldedTable *table = fieldedTableFromTabFile(tabFileName, tabFileName,
	requiredFields, ArraySize(requiredFields));
checkForbiddenFields(table, forbiddenFields, ArraySize(forbiddenFields));
int accessionIx = stringArrayIx("accession", table->fields, table->fieldCount);

struct fieldedRow *fr;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    char *acc = fr->row[accessionIx];
    long long id = edwNeedFileIdForLicensePlate(conn, acc);
    struct edwFile *ef = edwFileFromId(conn, id);
    int i;
    char *tags = ef->tags;
    for (i=0; i<table->fieldCount; ++i)
	if (i != accessionIx)
	    tags = cgiStringNewValForVar(tags, table->fields[i], fr->row[i]);
    edwFileResetTags(conn, ef, tags);
Ejemplo n.º 2
void edwMakeContaminationQa(int startId, int endId)
/* edwMakeContaminationQa - Screen for contaminants by aligning against contaminant genomes.. */
/* Make list with all files in ID range */
struct sqlConnection *conn = edwConnectReadWrite();
struct edwFile *ef, *efList = edwFileLoadIdRange(conn, startId, endId);

for (ef = efList; ef != NULL; ef = ef->next)
    doContaminationQa(conn, ef);
Ejemplo n.º 3
void edwAddQaContamTarget(char *assemblyName)
/* edwAddQaContamTarget - Add a new contamination target to warehouse.. */
struct sqlConnection *conn = edwConnectReadWrite();
char query[256 + PATH_LEN];
sqlSafef(query, sizeof(query), "select id from edwAssembly where name='%s'", assemblyName);
int assemblyId = sqlQuickNum(conn, query);
if (assemblyId == 0)
    errAbort("Assembly %s doesn't exist in warehouse. Typo or time for edwAddAssembly?", 
sqlSafef(query, sizeof(query), "insert edwQaContamTarget(assemblyId) values(%d)", assemblyId);
sqlUpdate(conn, query);
printf("Added target %s\n", assemblyName);
Ejemplo n.º 4
void edwFixGtfBigBed(char *how)
/* edwFixGtfBigBed - In original import the .gtf.bigBed files were bad about half the time.  Cricket 
 * caught this because a bunch of them ended up with the same md5 sum.  This program regenerates them 
 * all. */
doReal = sameString(how, "real");
struct sqlConnection *conn = edwConnectReadWrite();
struct edwFile *redoEf, *redoList = 
    edwFileLoadByQuery(conn, "select * from edwFile where submitFileName like '%.gtf.bigBed'");
for (redoEf = redoList; redoEf != NULL; redoEf = redoEf->next)
    redoOne(conn, redoEf);
Ejemplo n.º 5
void doMiddle()
/* doMiddle - put up middle part of web page, not including http and html headers/footers */
printf("<FORM ACTION=\"../cgi-bin/edwWebDeprecate\" METHOD=GET>\n");
struct sqlConnection *conn = edwConnectReadWrite(edwDatabase);
userEmail = edwGetEmailAndVerify();
if (userEmail == NULL)
else if (cgiVarExists("fileList") && cgiVarExists("reason"))
Ejemplo n.º 6
void edwRunOnIds(char *program, char *queryString)
/* edwRunOnIds - Run a edw command line program (one that takes startId endId as it's two parameters) for a range of ids, 
 * putting it on edwJob queue. */
struct sqlConnection *conn = edwConnectReadWrite();
struct slName *id, *idList = sqlQuickList(conn, queryString);
for (id = idList; id != NULL; id = id->next)
    char query[512];
    sqlSafef(query, sizeof(query), "insert into %s (commandLine) values ('%s %s %s')",
	runTable, program, id->name, id->name);
    sqlUpdate(conn, query);

Ejemplo n.º 7
void edwMakeRepeatQa(int startFileId, int endFileId)
/* edwMakeRepeatQa - Figure out what proportion of things align to repeats.. */
struct sqlConnection *conn = edwConnectReadWrite();
struct edwFile *ef, *efList = edwFileAllIntactBetween(conn, startFileId, endFileId);
for (ef = efList; ef != NULL; ef = ef->next)
    struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
    if (vf != NULL)
	if (sameString(vf->format, "fastq"))
	    fastqRepeatQa(conn, ef, vf);
Ejemplo n.º 8
void edwCreateNewUser(char *email)
/* Create new user, checking that user does not already exist. */
/* Now make sure user is not already in user table. */
struct sqlConnection *conn = edwConnectReadWrite();
struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, "select count(*) from edwUser where email = '%s'", email);
if (sqlQuickNum(conn, query->string) > 0)
    errAbort("User %s already exists", email);

/* Do database insert. */
sqlDyStringPrintf(query, "insert into edwUser (email) values('%s')", email);
sqlUpdate(conn, query->string);

Ejemplo n.º 9
void edwFixTargetSeq(char *when)
/* edwFixTargetSeq - Fill in new fields about target seq to edwBamFile and edwAssembly.. */
struct sqlConnection *conn = edwConnectReadWrite();
struct edwAssembly *as, *asList = edwAssemblyLoadByQuery(conn, "select * from edwAssembly");
char query[512];
for (as = asList; as != NULL; as = as->next)
    char *twoBitFileName = edwPathForFileId(conn, as->twoBitId);
    struct twoBitFile *tbf = twoBitOpen(twoBitFileName);
    safef(query, sizeof(query), "update edwAssembly set seqCount=%u where id=%u",
	tbf->seqCount, as->id);
    sqlUpdate(conn, query);

struct edwBamFile *bam, *bamList = edwBamFileLoadByQuery(conn, "select * from edwBamFile");
for (bam = bamList; bam != NULL; bam = bam->next)
    char *fileName = edwPathForFileId(conn, bam->fileId);
    samfile_t *sf = samopen(fileName, "rb", NULL);
    if (sf == NULL)
	errnoAbort("Couldn't open %s.\n", fileName);
    bam_header_t *head = sf->header;
    if (head == NULL)
	errAbort("Aborting ... Bad BAM header in file: %s", fileName);

    /* Sum up some target sizes. */
    long long targetBaseCount = 0;   /* Total size of all bases in target seq */
    int i;
    for (i=0; i<head->n_targets; ++i)
	targetBaseCount  += head->target_len[i];

    safef(query, sizeof(query), 
	"update edwBamFile set targetBaseCount=%lld,targetSeqCount=%u where id=%u",
	targetBaseCount, (unsigned)head->n_targets, bam->id);
    sqlUpdate(conn, query);


Ejemplo n.º 10
void doMiddle()
/* doMiddle - put up middle part of web page, not including http and html headers/footers */
printf("<FORM ACTION=\"../cgi-bin/edwWebSubmit\" METHOD=GET>\n");
struct sqlConnection *conn = edwConnectReadWrite(edwDatabase);
userEmail = edwGetEmailAndVerify();
if (userEmail == NULL)
else if (cgiVarExists(stopButtonName))
else if (cgiVarExists("submitUrl"))
else if (cgiVarExists("monitor"))
Ejemplo n.º 11
void loadEapDb(char *dir)
/* Load up EAP portion of database from tab file directory. */
struct sqlConnection *conn = edwConnectReadWrite();
char *tables[] = {"eapJob", "eapSoftware", "eapSwVersion", "eapStep", "eapStepSoftware", "eapStepVersion",
"eapStepSwVersion", "eapAnalysis", "eapInput", "eapOutput",};
int i;
for (i=0; i<ArraySize(tables); ++i)
    char *table = tables[i];

    /* Make up tab separated file name and ask database to load files into the table. */
    char tabName[PATH_LEN];
    safef(tabName, PATH_LEN, "%s/%s.tab", dir, table);
    char query[2*PATH_MAX];
    sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tabName, table);
    verbose(2, "%s\n", query);
    sqlUpdate(conn, query);
Ejemplo n.º 12
void edwChangeFormat(char *format, int idCount, char *idStrings[])
/* edwChangeFormat - Change format and force a revalidation for a file.. */
struct sqlConnection *conn = edwConnectReadWrite();

/* Convert ascii id's to valid file ids so we catch errors early. */
long long ids[idCount];
struct edwValidFile *vfs[idCount];
int i;
for (i=0; i<idCount; ++i)
    long long id = ids[i] = sqlLongLong(idStrings[i]);
    struct edwValidFile *vf = vfs[i] = edwValidFileFromFileId(conn, id);
    if (vf == NULL)
        errAbort("%lld is not a fileId in the edwValidFile table", id);

/* Loop through each file and change format. */
for (i=0; i<idCount; ++i)
    changeFormat(conn, vfs[i], format);
Ejemplo n.º 13
void doMiddle()
/* Write what goes between BODY and /BODY */
if (!cgiServerHttpsIsOn())
struct sqlConnection *conn = edwConnectReadWrite();
printf("<FORM ACTION=\"edwWebRegisterScript\" METHOD=POST>\n");
printf("<B>Register Script with ENCODE Data Warehouse</B><BR>\n");
#ifdef SOON
if (userEmail == NULL)
    printf("Please sign in:");
    printf("<INPUT TYPE=BUTTON NAME=\"signIn\" VALUE=\"sign in\" id=\"signin\">");
else if (cgiVarExists("description"))
    struct edwUser *user = edwUserFromEmail(conn, userEmail);
    if (user == NULL)
	char password[HEXED_32_SIZE];
	char babyName[HEXED_32_SIZE];
	edwRandomBabble(babyName, sizeof(babyName));

	edwRegisterScript(conn, user, babyName, password, cgiString("description"));
	printf("Script now registered.<BR>\n");
	printf("The script user name is %s.<BR>\n", babyName);
	printf("The script password is %s.<BR>\n", password);
	printf("Please save the script user name and password somewhere. ");
	puts("Please pass these two and the URL");
	puts(" of your validated manifest file (validated.txt) to our server to submit data.");
	puts("Construct a URL of the form:<BR>");
	       babyName, password,
	puts("That is pass the CGI encoded variables user, password, and url to the ");
	puts("web services CGI at");
	puts("https://encodedcc.sdsc.edu/cgi-bin/edwScriptSubmit. ");
	puts("You can use the http://encodedcc.sdsc.edu/cgi-bin/edwWebBrowse site to ");
	puts("monitor your submission interactively. Please contact your wrangler if you ");
	puts("have any questions.<BR>");
	cgiMakeButton("submit", "Register another script");
    printf(" ");
    struct edwUser *user = edwUserFromEmail(conn, userEmail);
    if (user == NULL)
	printf("%s is authorized to register a new script<BR>\n", userEmail);
	printf("<BR>Script description:\n");
	cgiMakeTextVar("description", NULL, 80);
Ejemplo n.º 14
void edwSubmit(char *submitUrl, char *email)
/* edwSubmit - Submit URL with validated.txt to warehouse. */
/* Parse out url a little into submitDir and submitFile */
char *lastSlash = strrchr(submitUrl, '/');
if (lastSlash == NULL)
    errAbort("%s is not a valid URL - it has no '/' in it.", submitUrl);
char *submitFile = lastSlash+1;
int submitDirSize = submitFile - submitUrl;
char submitDir[submitDirSize+1];
memcpy(submitDir, submitUrl, submitDirSize);
submitDir[submitDirSize] = 0;  // Add trailing zero

/* Make sure user has access. */
struct sqlConnection *conn = edwConnectReadWrite();
struct edwUser *user = edwMustGetUserFromEmail(conn, email);
int userId = user->id;

/* See if we are already running on same submission.  If so council patience and quit. */
notOverlappingSelf(conn, submitUrl);

/* Make a submit record. */
int submitId = makeNewEmptySubmitRecord(conn, submitUrl, userId);

/* The next errCatch block will fill these in if all goes well. */
struct submitFileRow *sfrList = NULL, *oldList = NULL, *newList = NULL; 
int oldCount = 0;
long long oldBytes = 0, newBytes = 0, byteCount = 0;

/* Start catching errors from here and writing them in submitId.  If we don't
 * throw we'll end up having a list of all files in the submit in sfrList. */
struct errCatch *errCatch = errCatchNew();
char query[1024];
if (errCatchStart(errCatch))
    /* Make sure they got a bit of space, enough for a reasonable submit file. 
     * We do this here just because we can make error message more informative. */
    long long diskFreeSpace = freeSpaceOnFileSystem(edwRootDir);
    if (diskFreeSpace < 4*1024*1024)
	errAbort("No space left in warehouse!");

    /* Open remote submission file.  This is most likely where we will fail. */
    int hostId=0, submitDirId = 0;
    long long startUploadTime = edwNow();
    int remoteFd = edwOpenAndRecordInDir(conn, submitDir, submitFile, submitUrl, 
	&hostId, &submitDirId);

    /* Copy to local temp file. */
    char tempSubmitFile[PATH_LEN];
    fetchFdToTempFile(remoteFd, tempSubmitFile);
    long long endUploadTime = edwNow();

    /* Calculate MD5 sum, and see if we already have such a file. */
    char *md5 = md5HexForFile(tempSubmitFile);
    int fileId = findFileGivenMd5AndSubmitDir(conn, md5, submitDirId);

    /* If we already have it, then delete temp file, otherwise put file in file table. */
    char submitLocalPath[PATH_LEN];
    if (fileId != 0)
	char submitRelativePath[PATH_LEN];
	sqlSafef(query, sizeof(query), "select edwFileName from edwFile where id=%d", fileId);
	sqlNeedQuickQuery(conn, query, submitRelativePath, sizeof(submitRelativePath));
	safef(submitLocalPath, sizeof(submitLocalPath), "%s%s", edwRootDir, submitRelativePath);
	/* Looks like it's the first time we've seen this submission file, so
	 * save the file itself.  We'll get to the records inside the file in a bit. */
	fileId = makeNewEmptyFileRecord(conn, submitId, submitDirId, submitFile, 0);

	/* Get file/path names for submission file inside warehouse. */
	char edwFile[PATH_LEN];
	edwMakeFileNameAndPath(fileId, submitFile, edwFile, submitLocalPath);

	/* Move file to final resting place and get update time and size from local file system.  */
	mustRename(tempSubmitFile, submitLocalPath);
	time_t updateTime = fileModTime(submitLocalPath);
	off_t size = fileSize(submitLocalPath);

	/* Update file table which now should be complete including updateTime. */
	sqlSafef(query, sizeof(query), 
	    "update edwFile set "
	    " updateTime=%lld, size=%lld, md5='%s', edwFileName='%s',"
	    " startUploadTime=%lld, endUploadTime=%lld"
	    " where id=%u\n",
	    (long long)updateTime, (long long)size, md5, edwFile, 
	    startUploadTime, endUploadTime, fileId);
	sqlUpdate(conn, query);

    /* By now there is a submit file on the local file system.  We parse it out. */
    edwParseSubmitFile(conn, submitLocalPath, submitUrl, &sfrList);

    /* Save our progress so far to submit table. */
    sqlSafef(query, sizeof(query), 
	"update edwSubmit"
	"  set submitFileId=%lld, submitDirId=%lld, fileCount=%d where id=%d",  
	    (long long)fileId, (long long)submitDirId, slCount(sfrList), submitId);
    sqlUpdate(conn, query);

    /* Weed out files we already have. */
    struct submitFileRow *sfr, *sfrNext;
    for (sfr = sfrList; sfr != NULL; sfr = sfrNext)
	sfrNext = sfr->next;
	struct edwFile *bf = sfr->file;
	long long fileId;
	if ((fileId = edwGotFile(conn, submitDir, bf->submitFileName, bf->md5, bf->size)) >= 0)
	    oldBytes += bf->size;
	    sfr->md5MatchFileId = fileId;
	    slAddHead(&oldList, sfr);
	    slAddHead(&newList, sfr);
	byteCount += bf->size;
    sfrList = NULL;

    /* Update database with oldFile count. */
    sqlSafef(query, sizeof(query), 
	"update edwSubmit set oldFiles=%d,oldBytes=%lld,byteCount=%lld where id=%u",  
	    oldCount, oldBytes, byteCount, submitId);
    sqlUpdate(conn, query);

    /* Deal with old files. This may throw an error.  We do it before downloading new
     * files since we want to fail fast if we are going to fail. */
    int updateCount = handleOldFileTags(conn, oldList, doUpdate);
    sqlSafef(query, sizeof(query), 
	"update edwSubmit set metaChangeCount=%d where id=%u",  updateCount, submitId);
    sqlUpdate(conn, query);
if (errCatch->gotError)
    handleSubmitError(conn, submitId, errCatch->message->string);
    /* The handleSubmitError will keep on throwing. */

/* Go through list attempting to load the files if we don't already have them. */
struct submitFileRow *sfr;
for (sfr = newList; sfr != NULL; sfr = sfr->next)
    if (edwSubmitShouldStop(conn, submitId))
    struct edwFile *bf = sfr->file;
    int submitUrlSize = strlen(submitDir) + strlen(bf->submitFileName) + 1;
    char submitUrl[submitUrlSize];
    safef(submitUrl, submitUrlSize, "%s%s", submitDir, bf->submitFileName);
    if (edwGotFile(conn, submitDir, bf->submitFileName, bf->md5, bf->size)<0)
	/* We can't get a ID for this file. There's two possible reasons - 
	 * either somebody is in the middle of fetching it or nobody's started. 
	 * If somebody is in the middle of fetching it, assume they died
	 * if they took more than an hour,  and start up another fetch.
	 * So here we fetch unless somebody else is fetching recently. */
	if (edwGettingFile(conn, submitDir, bf->submitFileName) < 0)
	    verbose(1, "Fetching %s\n", bf->submitFileName);
	    getSubmittedFile(conn, bf, submitDir, submitUrl, submitId);
	    newBytes += bf->size;
	    sqlSafef(query, sizeof(query), 
		"update edwSubmit set newFiles=newFiles+1,newBytes=%lld where id=%d", 
		newBytes, submitId);
	    sqlUpdate(conn, query);

	verbose(2, "Already got %s\n", bf->submitFileName);
	sqlSafef(query, sizeof(query), "update edwSubmit set oldFiles=oldFiles+1 where id=%d", 
	sqlUpdate(conn, query);

    if (sfr->replacesFile != 0)
	/* What happens when the replacement doesn't validate? */
	verbose(2, "Replacing %s with %s\n", sfr->replaces,  bf->submitFileName);
	sqlSafef(query, sizeof(query), 
	    "update edwFile set replacedBy=%u, deprecated='%s' where id=%u",
		  bf->id, sfr->replaceReason,  sfr->replacesFile);
	sqlUpdate(conn, query);

/* If we made it here, update submit endUploadTime */
sqlSafef(query, sizeof(query),
	"update edwSubmit set endUploadTime=%lld where id=%d", 
	edwNow(), submitId);
sqlUpdate(conn, query);

/* Get a real submission record and then set things up so mail user when all done. */
struct edwSubmit *submit = edwSubmitFromId(conn, submitId);
sqlDisconnect(&conn);	// We'll be waiting a while so free connection
waitForValidationAndSendEmail(submit, email);