void gensatFtpImages(char *checkMd5, char *outDir)
/* gensatFtpImages - Download images guided by output of gensatFtpList. */
{
int err;
char source[PATH_LEN], nativeImage[PATH_LEN], jpgImage[PATH_LEN];
char dir[PATH_LEN], file[PATH_LEN], ext[PATH_LEN];
struct lineFile *lf = lineFileOpen(checkMd5, TRUE);
char *line, *md5, *relativePath;

struct dyString *command = dyStringNew(0);

while(lineFileNext(lf, &line, NULL))
    {
    /* Parse out two columns of checkMd5 file. */
    md5 = nextWord(&line);
    relativePath = skipLeadingSpaces(line);

    /* Figure out output path, and if file already exists skip it. */
    safef(nativeImage, sizeof(nativeImage), "%s/%s", outDir, relativePath);
    strcpy(jpgImage, nativeImage);
    if (endsWith(jpgImage, ".bz2"))
	chopSuffix(jpgImage);
    if (endsWith(jpgImage, ".png") || endsWith(jpgImage, ".tif") ||
	endsWith(jpgImage, ".tiff") || endsWith(jpgImage, ".jpeg") ||
    	endsWith(jpgImage, ".jpg") || endsWith(jpgImage, ".JPG") )
        {
	chopSuffix(jpgImage);
	strcat(jpgImage, ".jpg");
	}
    else if (endsWith(jpgImage, ".txt") || endsWith(jpgImage, ".zip")
        || endsWith(jpgImage, ".doc"))
        continue;
    else
        errAbort("Unrecognized image type in file %s", jpgImage);

    if (!fileExists(jpgImage))
	{
	/* Create any directories needed. */
	splitPath(relativePath, dir, file, ext);
	dyStringClear(command);
	dyStringPrintf(command, "mkdir -p '%s/%s'", outDir, dir);
	system(command->string);

	/* wget the file. */
	safef(source, sizeof(source), "%s/%s", uri, relativePath);
	if (safeGetOne(source, md5, nativeImage))
	    {
	    if (endsWith(nativeImage, ".bz2"))
	        {
		dyStringClear(command);
		dyStringPrintf(command, "bunzip2 '%s'", nativeImage);
		verbose(1, "%s\n", command->string);
		err = system(command->string);
		if (err != 0)
		    errAbort("err %d on %s", err, command->string);
		chopSuffix(nativeImage);
		}
	    if (!endsWith(nativeImage, ".jpg") )
	        {
		dyStringClear(command);
		dyStringPrintf(command, "convert '%s' '%s'", nativeImage, jpgImage);
		verbose(1, "%s\n", command->string);
		err = system(command->string);
		if (err != 0)
		    errAbort("err %d on %s", err, command->string);
		remove(nativeImage);
		}
	    }
	else
	    {
	    if (++errCount > maxErrs)
	       errAbort("Aborting after %d errors", errCount);
	    }
	}
    else
        {
	verbose(1, "Already have %s\n", jpgImage);
	}
    }
}
Beispiel #2
0
void gensatImageDownload(char *gensatXml, char *outDir, char *outLog)
/* gensatImageDownload - Download images from gensat guided by xml file.. */
{
struct xap *xap;
struct gsGensatImage *image;
char *ftpUri = "ftp://ftp.ncbi.nih.gov/pub/gensat";
char *jpgCgiUri = "http://www.ncbi.nlm.nih.gov/projects/gensat/gensat_img.cgi?action=image&mode=full&fmt=jpeg&id=";
char finalJpg[PATH_LEN];
char finalDir[PATH_LEN];
char wgetSource[PATH_LEN];
struct hash *dirHash = newHash(16);
struct dyString *mkdir = dyStringNew(0);
int imageIx = 0;

fLog = mustOpen(outLog, "a");
fprintf(fLog, "starting gensatImageDownload from %s to %s\n", gensatXml, outDir);
xap = xapListOpen(gensatXml, "GensatImageSet", gsStartHandler, gsEndHandler);


while ((image = xapListNext(xap, "GensatImage")) != NULL)
    {
    int id = image->gsGensatImageId->text;
    char *imageFile = image->gsGensatImageImageInfo->gsGensatImageImageInfoFullImg
    			->gsGensatImageInfo->gsGensatImageInfoFilename->text;

    /* Mangle file name a little */
    subChar(imageFile, '(', '_');
    stripChar(imageFile, ')');

    /* Figure out name of jpeg file in outDir. */
    verbose(1, "image %d, id %d\n", ++imageIx, id);
    safef(finalJpg, sizeof(finalJpg), "%s/%s", outDir, imageFile);
    stripString(finalJpg, ".full"); /* Image magick can't handle two suffixes */
    chopSuffix(finalJpg);
    strcat(finalJpg, ".jpg");

    /* Create directory that it goes in if necessary */
    splitPath(finalJpg, finalDir, NULL, NULL);
    if (!hashLookup(dirHash, finalDir))
        {
	hashAdd(dirHash, finalDir, NULL);
	dyStringClear(mkdir);
	dyStringPrintf(mkdir, "mkdir -p %s", finalDir);
	if (system(mkdir->string) != 0)
	    errAbort("Couldn't %s", mkdir->string);
	}

    /* Download it - either directly via ftp, or indirectly via cgi. */
    if (fileExists(finalJpg))
	{
	verbose(1, "already have %s\n", imageFile);
	fprintf(fLog, "%s already downloaded\n", finalJpg);
	}
    else
        {
	if (endsWith(imageFile, ".jpg"))
	    {
	    safef(wgetSource, sizeof(wgetSource), "%s/%s", ftpUri, imageFile);
	    if (safeGetOne(wgetSource, finalJpg))
	        fprintf(fLog, "Got via ftp %s\n", finalJpg);
	    }
	else
	    {
	    safef(wgetSource, sizeof(wgetSource), "%s%d", jpgCgiUri, id);
	    if (safeGetOne(wgetSource, finalJpg))
	        fprintf(fLog, "Got via cgi %s\n", finalJpg);
	    }
	}
    }
carefulClose(&fLog);
}