void gensatFtpImages(char *checkMd5, char *outDir) /* gensatFtpImages - Download images guided by output of gensatFtpList. */ { int err; char source[PATH_LEN], nativeImage[PATH_LEN], jpgImage[PATH_LEN]; char dir[PATH_LEN], file[PATH_LEN], ext[PATH_LEN]; struct lineFile *lf = lineFileOpen(checkMd5, TRUE); char *line, *md5, *relativePath; struct dyString *command = dyStringNew(0); while(lineFileNext(lf, &line, NULL)) { /* Parse out two columns of checkMd5 file. */ md5 = nextWord(&line); relativePath = skipLeadingSpaces(line); /* Figure out output path, and if file already exists skip it. */ safef(nativeImage, sizeof(nativeImage), "%s/%s", outDir, relativePath); strcpy(jpgImage, nativeImage); if (endsWith(jpgImage, ".bz2")) chopSuffix(jpgImage); if (endsWith(jpgImage, ".png") || endsWith(jpgImage, ".tif") || endsWith(jpgImage, ".tiff") || endsWith(jpgImage, ".jpeg") || endsWith(jpgImage, ".jpg") || endsWith(jpgImage, ".JPG") ) { chopSuffix(jpgImage); strcat(jpgImage, ".jpg"); } else if (endsWith(jpgImage, ".txt") || endsWith(jpgImage, ".zip") || endsWith(jpgImage, ".doc")) continue; else errAbort("Unrecognized image type in file %s", jpgImage); if (!fileExists(jpgImage)) { /* Create any directories needed. */ splitPath(relativePath, dir, file, ext); dyStringClear(command); dyStringPrintf(command, "mkdir -p '%s/%s'", outDir, dir); system(command->string); /* wget the file. */ safef(source, sizeof(source), "%s/%s", uri, relativePath); if (safeGetOne(source, md5, nativeImage)) { if (endsWith(nativeImage, ".bz2")) { dyStringClear(command); dyStringPrintf(command, "bunzip2 '%s'", nativeImage); verbose(1, "%s\n", command->string); err = system(command->string); if (err != 0) errAbort("err %d on %s", err, command->string); chopSuffix(nativeImage); } if (!endsWith(nativeImage, ".jpg") ) { dyStringClear(command); dyStringPrintf(command, "convert '%s' '%s'", nativeImage, jpgImage); verbose(1, "%s\n", command->string); err = system(command->string); if (err != 0) errAbort("err %d on %s", err, command->string); remove(nativeImage); } } else { if (++errCount > maxErrs) errAbort("Aborting after %d errors", errCount); } } else { verbose(1, "Already have %s\n", jpgImage); } } }
void gensatImageDownload(char *gensatXml, char *outDir, char *outLog) /* gensatImageDownload - Download images from gensat guided by xml file.. */ { struct xap *xap; struct gsGensatImage *image; char *ftpUri = "ftp://ftp.ncbi.nih.gov/pub/gensat"; char *jpgCgiUri = "http://www.ncbi.nlm.nih.gov/projects/gensat/gensat_img.cgi?action=image&mode=full&fmt=jpeg&id="; char finalJpg[PATH_LEN]; char finalDir[PATH_LEN]; char wgetSource[PATH_LEN]; struct hash *dirHash = newHash(16); struct dyString *mkdir = dyStringNew(0); int imageIx = 0; fLog = mustOpen(outLog, "a"); fprintf(fLog, "starting gensatImageDownload from %s to %s\n", gensatXml, outDir); xap = xapListOpen(gensatXml, "GensatImageSet", gsStartHandler, gsEndHandler); while ((image = xapListNext(xap, "GensatImage")) != NULL) { int id = image->gsGensatImageId->text; char *imageFile = image->gsGensatImageImageInfo->gsGensatImageImageInfoFullImg ->gsGensatImageInfo->gsGensatImageInfoFilename->text; /* Mangle file name a little */ subChar(imageFile, '(', '_'); stripChar(imageFile, ')'); /* Figure out name of jpeg file in outDir. */ verbose(1, "image %d, id %d\n", ++imageIx, id); safef(finalJpg, sizeof(finalJpg), "%s/%s", outDir, imageFile); stripString(finalJpg, ".full"); /* Image magick can't handle two suffixes */ chopSuffix(finalJpg); strcat(finalJpg, ".jpg"); /* Create directory that it goes in if necessary */ splitPath(finalJpg, finalDir, NULL, NULL); if (!hashLookup(dirHash, finalDir)) { hashAdd(dirHash, finalDir, NULL); dyStringClear(mkdir); dyStringPrintf(mkdir, "mkdir -p %s", finalDir); if (system(mkdir->string) != 0) errAbort("Couldn't %s", mkdir->string); } /* Download it - either directly via ftp, or indirectly via cgi. */ if (fileExists(finalJpg)) { verbose(1, "already have %s\n", imageFile); fprintf(fLog, "%s already downloaded\n", finalJpg); } else { if (endsWith(imageFile, ".jpg")) { safef(wgetSource, sizeof(wgetSource), "%s/%s", ftpUri, imageFile); if (safeGetOne(wgetSource, finalJpg)) fprintf(fLog, "Got via ftp %s\n", finalJpg); } else { safef(wgetSource, sizeof(wgetSource), "%s%d", jpgCgiUri, id); if (safeGetOne(wgetSource, finalJpg)) fprintf(fLog, "Got via cgi %s\n", finalJpg); } } } carefulClose(&fLog); }