int s3_op_internal(ObjectStream* os) { IOBuf* b = &os->iob; __attribute__ ((unused)) AWSContext* ctx = b->context; // run the GET or PUT int is_get = (os->flags & OSF_READING); if (is_get) { LOG(LOG_INFO, "GET '%s/%s/%s'\n", (ctx ? ctx->S3Host : "*"), (ctx ? ctx->Bucket : "*"), os->url); AWS4C_CHECK1( s3_get(b, os->url) ); /* create empty object with user metadata */ } else { LOG(LOG_INFO, "PUT '%s/%s/%s'\n", (ctx ? ctx->S3Host : "*"), (ctx ? ctx->Bucket : "*"), os->url); // If you are getting errors here, the comments above the "#if // ((LIBCURL_VERSION ...", in stream_sync(), *might* be relevant. AWS4C_CHECK1( s3_put(b, os->url) ); /* create empty object with user metadata */ } // s3_get with byte-range can leave streaming_writefunc() waiting for // a curl callback that never comes. This happens if there is still writable // space in the buffer, when the last bytes in the request are processed. // This can happen because caller (e.g. fuse) may ask for more bytes than are present, // and provide a buffer big enought o receive them. if (is_get && (b->code == 206)) { // should we do something with os->iob_full? set os->flags & EOF? LOG(LOG_INFO, "GET complete\n"); os->flags |= OSF_EOF; POST(&os->iob_full); return 0; } else if (AWS4C_OK(b) ) { LOG(LOG_INFO, "%s complete\n", ((is_get) ? "GET" : "PUT")); return 0; } LOG(LOG_ERR, "CURL ERROR: %lx %d '%s'\n", (size_t)b, b->code, b->result); return -1; }
int get_file( IOBuf * aws_buf, char *name ) { char * filename; filename = getFilenameFromPath(name); int rv = s3_get(aws_buf, filename); if(rv == 0 && aws_buf->code == 200 && aws_buf->len != 0) { FILE * fp; char writebuf[BUF_SIZE]; // Write out the downloaded file // Check if file exists if( (fp = fopen(name, "rb")) != NULL) { fprintf(stdout, "WARNING: The specified file already exist. \n" "Refuse to overwrite. \n"); fclose(fp); return -1; } // File doesn't exist yet. Okay to write :) else { if( (fp = fopen(name, "w+")) == NULL) { fprintf(stdout, "ERROR: Unable to create the specified file. \n"); return -1; } int n, sz; while( ( sz = aws_iobuf_getdata ( aws_buf, writebuf, sizeof(writebuf))) != 0 ) { n = fwrite(writebuf, sizeof(unsigned char), sz, fp); if(n != sz) { fprintf(stdout, "ERROR: Error writing to file. \n"); return -1; } } fclose(fp); } } return rv; }
static IOR_offset_t S3_Xfer_internal(int access, void* file, IOR_size_t* buffer, IOR_offset_t length, IOR_param_t* param, int multi_part_upload_p ) { if (param->verbose >= VERBOSE_2) { printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", access, (char*)file, buffer, length, param); } char* fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */ size_t remaining = (size_t)length; char* data_ptr = (char *)buffer; off_t offset = param->offset; // easier to think int n_to_n = param->filePerProc; int n_to_1 = (! n_to_n); int segmented = (param->segmentCount == 1); if (access == WRITE) { /* WRITE */ if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", rank, remaining, param->offset + length - remaining); } if (multi_part_upload_p) { // For N:1, part-numbers must have a global ordering for the // components of the final object. param->part_number is // incremented by 1 per write, on each rank. This lets us use it // to compute a global part-numbering. // // In the N:N case, we only need to increment part-numbers within // each rank. // // In the N:1 case, the global order of part-numbers we're writing // depends on whether wer're writing strided or segmented, in // other words, how <offset> and <remaining> are acutally // positioning the parts being written. [See discussion at // S3_Close_internal().] // // NOTE: 's3curl.pl --debug' shows StringToSign having partNumber // first, even if I put uploadId first in the URL. Maybe // that's what the server will do. GetStringToSign() in // aws4c is not clever about this, so we spoon-feed args in // the proper order. size_t part_number; if (n_to_1) { if (segmented) { // segmented size_t parts_per_rank = param->blockSize / param->transferSize; part_number = (rank * parts_per_rank) + param->part_number; } else // strided part_number = (param->part_number * param->numTasks) + rank; } else part_number = param->part_number; ++ param->part_number; // if (verbose >= VERBOSE_3) { // fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n", // rank, // param->numTasks, // (n_to_1 ? "N:1" : "N:N"), // (segmented ? "segmented" : "strided"), // part_number); // } snprintf(buff, BUFF_SIZE, "%s?partNumber=%d&uploadId=%s", fname, part_number, param->UploadId); // For performance, we append <data_ptr> directly into the linked list // of data in param->io_buf. We are "appending" rather than // "extending", so the added buffer is seen as written data, rather // than empty storage. // // aws4c parses some header-fields automatically for us (into members // of the IOBuf). After s3_put2(), we can just read the etag from // param->io_buf->eTag. The server actually returns literal // quote-marks, at both ends of the string. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK( s3_put(param->io_buf, buff) ); AWS4C_CHECK_OK( param->io_buf ); // if (verbose >= VERBOSE_3) { // printf("rank %d: read ETag = '%s'\n", rank, param->io_buf->eTag); // if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ // fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", // rank, ETAG_SIZE); // exit(1); // } // } if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", rank, param->numTasks, (n_to_1 ? "N:1" : "N:N"), (segmented ? "segmented" : "strided"), offset, part_number, param->io_buf->eTag); // incl quote-marks at [0] and [len-1] } if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", rank, ETAG_SIZE); exit(1); } // save the eTag for later // // memcpy(etag, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // etag[ETAG_SIZE] = 0; aws_iobuf_append(param->etags, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // DEBUGGING if (verbose >= VERBOSE_4) { printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); } // drop ptrs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } else { // use EMC's byte-range write-support, instead of MPU // NOTE: You must call 's3_enable_EMC_extensions(1)' for // byte-ranges to work for writes. if (n_to_n) s3_set_byte_range(-1,-1); // EMC header "Range: bytes=-1-" means "append" else s3_set_byte_range(offset, remaining); // For performance, we append <data_ptr> directly into the linked list // of data in param->io_buf. We are "appending" rather than // "extending", so the added buffer is seen as written data, rather // than empty storage. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK ( s3_put(param->io_buf, file) ); AWS4C_CHECK_OK( param->io_buf ); // drop ptrs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } if ( param->fsyncPerWrite == TRUE ) { WARN("S3 doesn't support 'fsync'" ); /* does it? */ } } else { /* READ or CHECK */ if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d reading from offset %lld\n", rank, param->offset + length - remaining ); } // read specific byte-range from the object // [This is included in the "pure" S3 spec.] s3_set_byte_range(offset, remaining); // For performance, we append <data_ptr> directly into the linked // list of data in param->io_buf. In this case (i.e. reading), // we're "extending" rather than "appending". That means the // buffer represents empty storage, which will be filled by the // libcurl writefunction, invoked via aws4c. aws_iobuf_reset(param->io_buf); aws_iobuf_extend_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK( s3_get(param->io_buf, file) ); if (param->io_buf->code != 206) { /* '206 Partial Content' */ snprintf(buff, BUFF_SIZE, "Unexpected result (%d, '%s')", param->io_buf->code, param->io_buf->result); ERR_SIMPLE(buff); } // drop refs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } if (param->verbose >= VERBOSE_2) { printf("<- S3_Xfer\n"); } return ( length ); }
/****************************************************************************** * Name pack_objects * * This function traverses the object and file link lists and reads object * data for repacking into a new object. ******************************************************************************/ int pack_objects(File_Handles *file_info, repack_objects *objects) { struct stat statbuf; char *path = "/"; // repack_objects *objects; //struct stat statbuf; stat(path, &statbuf); size_t write_offset = 0; size_t obj_raw_size; size_t obj_size; size_t offset; //MarFS_XattrPre pre_struct; //MarFS_XattrPre* pre = &pre_struct; MarFS_XattrPre pre; IOBuf *nb = aws_iobuf_new(); char test_obj[2048]; obj_files *files; int ret; char *obj_ptr; CURLcode s3_return; char pre_str[MARFS_MAX_XATTR_SIZE]; // Also, if file_count =1 do i make uni or? // // while (objects) { // need inner loop to get files for each object // If chunk_count == file count no need to pack // and garbage collection took care of it if (objects->chunk_count == objects->pack_count) { objects=objects->next; continue; } //No need to pack if only one file specified in xattr and only //one file found if (objects->chunk_count == 1 && objects->pack_count ==1 ) { objects=objects->next; continue; } // Not quite sure how this next condition could happen // TO DO: make only one contion chunk_count > file_count // all others continue if (objects->pack_count > objects->chunk_count) { objects=objects->next; continue; } LOG(LOG_INFO,"object = %s\n", objects->objid); LOG(LOG_INFO, "file count = %ld chunks = %ld\n", objects->pack_count, objects->chunk_count); files = objects->files_ptr; write_offset = 0; ret=str_2_pre(&pre, objects->objid, NULL); sprintf(test_obj,"%s.teste",objects->objid); //Make this a unique object since it derived from an existing object pre.unique++; LOG(LOG_INFO,"stdout,new object name =%s\n", test_obj); //aws_iobuf_reset(nb); while (files) { //fprintf(stdout, "file = %s offset=%ld\n", files->filename, files->offset); stat(files->filename, &statbuf); obj_raw_size = statbuf.st_size; obj_size = obj_raw_size + MARFS_REC_UNI_SIZE; files->size = obj_size; //fprintf(stdout, "obj_size = %ld REC SIZE = %d\n", obj_size,MARFS_REC_UNI_SIZE); //write_offset+=obj_size; if ((obj_ptr = (char *)malloc(obj_size))==NULL) { fprintf(stderr, "Error allocating memory\n"); return -1; } check_security_access(&pre); update_pre(&pre); s3_set_host(pre.host); //offset = objects->files_ptr->offset; offset = files->original_offset; //fprintf(stdout, "file %s will get re-written at offset %ld\n", // files->filename, write_offset); // get object_data // Using byte range to get data for particular offsets s3_set_byte_range(offset, obj_size); // Use extend to get more buffering capability on each get aws_iobuf_extend_dynamic(nb, obj_ptr, obj_size); LOG(LOG_INFO, "going to get file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size); fprintf(file_info->outfd, "Getting file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size); s3_return = s3_get(nb,objects->objid); check_S3_error(s3_return, nb, S3_GET); LOG(LOG_INFO, "Read buffer write count = %ld len = %ld\n", nb->write_count, nb->len); // may have to copy nb to a new buffer // then write files->new_offset = write_offset; write_offset += obj_size; files = files->next; } // create object string for put pre_2_str(pre_str, MARFS_MAX_XATTR_SIZE,&pre); strcpy(objects->new_objid, pre_str); LOG(LOG_INFO, "Going to write to object %s\n", pre_str); fprintf(file_info->outfd, "Writing file to object %s\n", pre_str); // Write data back to new object s3_put(nb,pre_str); check_S3_error(s3_return, nb, S3_PUT); aws_iobuf_reset_hard(nb); objects=objects->next; } return 0; }
static int makeflow_s3_archive_copy_task_files(struct archive_instance *a, char *id, char *task_path, struct batch_task *t){ char *taskTarFile = string_format("%s/%s",task_path,id); // Check to see if the task is already in the local archive so it is not downloaded twice if(access(taskTarFile,R_OK) != 0){ // Copy tar file from the s3 bucket struct timeval start_time; struct timeval end_time; char *copyTar = string_format("%s/%s",task_path,id); FILE *taskFile = fopen(copyTar,"wb"); gettimeofday(&start_time,NULL); if(s3_get(taskFile,id) != 0){ gettimeofday(&end_time,NULL); float run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0; total_down_time += run_time; debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to fail downloading to %s",run_time, id, a->s3_dir); debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time); free(copyTar); return 0; } gettimeofday(&end_time,NULL); float run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0; total_down_time += run_time; printf("Download %s from %s/%s\n",id,a->s3_dir,id); debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to download from %s",run_time, id, a->s3_dir); debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time); free(copyTar); fclose(taskFile); char *extractTar = string_format("tar -xzvf %s/%s -C %s",task_path,id,task_path); if(system(extractTar) == -1){ free(extractTar); return 0; } free(extractTar); struct batch_file *f; struct list_cursor *cur = list_cursor_create(t->output_files); // Iterate through output files for(list_seek(cur, 0); list_get(cur, (void**)&f); list_next(cur)) { char *output_file_path = string_format("%s/output_files/%s", task_path, basename(f->inner_name)); char buf[1024]; ssize_t len; // Read what the symlink is actually pointing to if((len = readlink(output_file_path, buf, sizeof(buf)-1)) != -1) buf[len] = '\0'; free(output_file_path); // Grabs the actual name of the file from the buffer char *file_name = basename(buf); debug(D_MAKEFLOW_HOOK,"The FILE_NAME is %s",file_name); // Check to see if the file was already copied to the /files/ directory char *filePath = string_format("%s/files/%.2s/%s",a->dir,file_name,file_name); char *fileDir = string_format("%s/files/%.2s",a->dir,file_name); if(access(filePath,R_OK) != 0){ debug(D_MAKEFLOW_HOOK,"COPYING %s to /files/ from the s3 bucket",file_name); // Copy the file to the local archive /files/ directory gettimeofday(&start_time,NULL); create_dir(fileDir,0777); FILE *fileLocal = fopen(filePath, "wb"); if(s3_get(fileLocal, file_name) != 0){ gettimeofday(&end_time,NULL); run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0; total_down_time += run_time; debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to fail downloading from %s",run_time, id, a->s3_dir); debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time); return 0; } gettimeofday(&end_time,NULL); run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0; total_down_time += run_time; printf("Download %s from %s/%s\n",file_name,a->s3_dir, file_name); debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to download from %s",run_time, id, a->s3_dir); debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time); fclose(fileLocal); //Extract the tar file of a directory (always run even if it isnt a tar file) char *extractDirTar = string_format("tar -xzvf %s -C %s/foo >&/dev/null",filePath,fileDir); char *makeDir = string_format("mkdir %s/foo",fileDir); system(makeDir); free(makeDir); if(system(extractDirTar) != 0){ debug(D_MAKEFLOW_HOOK,"%s is either a file or the tar file could not be extracted",file_name); free(extractDirTar); char *removeFooDir = string_format("rm -rf %s/foo",fileDir); system(removeFooDir); continue; } char *removeTar = string_format("rm %s",filePath); system(removeTar); free(removeTar); char *renameFile = string_format("mv %s/foo %s", fileDir, filePath); system(renameFile); free(renameFile); free(extractDirTar); } free(fileDir); free(filePath); } free(taskTarFile); return 1; } debug(D_MAKEFLOW_HOOK,"TASK already exist in local archive, not downloading from s3 bucket"); free(taskTarFile); return 1; }