/****************************************************************************** * Name pack_objects * * This function traverses the object and file link lists and reads object * data for repacking into a new object. ******************************************************************************/ int pack_objects(File_Handles *file_info, repack_objects *objects) { struct stat statbuf; char *path = "/"; // repack_objects *objects; //struct stat statbuf; stat(path, &statbuf); size_t write_offset = 0; size_t obj_raw_size; size_t obj_size; size_t offset; //MarFS_XattrPre pre_struct; //MarFS_XattrPre* pre = &pre_struct; MarFS_XattrPre pre; IOBuf *nb = aws_iobuf_new(); char test_obj[2048]; obj_files *files; int ret; char *obj_ptr; CURLcode s3_return; char pre_str[MARFS_MAX_XATTR_SIZE]; // Also, if file_count =1 do i make uni or? // // while (objects) { // need inner loop to get files for each object // If chunk_count == file count no need to pack // and garbage collection took care of it if (objects->chunk_count == objects->pack_count) { objects=objects->next; continue; } //No need to pack if only one file specified in xattr and only //one file found if (objects->chunk_count == 1 && objects->pack_count ==1 ) { objects=objects->next; continue; } // Not quite sure how this next condition could happen // TO DO: make only one contion chunk_count > file_count // all others continue if (objects->pack_count > objects->chunk_count) { objects=objects->next; continue; } LOG(LOG_INFO,"object = %s\n", objects->objid); LOG(LOG_INFO, "file count = %ld chunks = %ld\n", objects->pack_count, objects->chunk_count); files = objects->files_ptr; write_offset = 0; ret=str_2_pre(&pre, objects->objid, NULL); sprintf(test_obj,"%s.teste",objects->objid); //Make this a unique object since it derived from an existing object pre.unique++; LOG(LOG_INFO,"stdout,new object name =%s\n", test_obj); //aws_iobuf_reset(nb); while (files) { //fprintf(stdout, "file = %s offset=%ld\n", files->filename, files->offset); stat(files->filename, &statbuf); obj_raw_size = statbuf.st_size; obj_size = obj_raw_size + MARFS_REC_UNI_SIZE; files->size = obj_size; //fprintf(stdout, "obj_size = %ld REC SIZE = %d\n", obj_size,MARFS_REC_UNI_SIZE); //write_offset+=obj_size; if ((obj_ptr = (char *)malloc(obj_size))==NULL) { fprintf(stderr, "Error allocating memory\n"); return -1; } check_security_access(&pre); update_pre(&pre); s3_set_host(pre.host); //offset = objects->files_ptr->offset; offset = files->original_offset; //fprintf(stdout, "file %s will get re-written at offset %ld\n", // files->filename, write_offset); // get object_data // Using byte range to get data for particular offsets s3_set_byte_range(offset, obj_size); // Use extend to get more buffering capability on each get aws_iobuf_extend_dynamic(nb, obj_ptr, obj_size); LOG(LOG_INFO, "going to get file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size); fprintf(file_info->outfd, "Getting file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size); s3_return = s3_get(nb,objects->objid); check_S3_error(s3_return, nb, S3_GET); LOG(LOG_INFO, "Read buffer write count = %ld len = %ld\n", nb->write_count, nb->len); // may have to copy nb to a new buffer // then write files->new_offset = write_offset; write_offset += obj_size; files = files->next; } // create object string for put pre_2_str(pre_str, MARFS_MAX_XATTR_SIZE,&pre); strcpy(objects->new_objid, pre_str); LOG(LOG_INFO, "Going to write to object %s\n", pre_str); fprintf(file_info->outfd, "Writing file to object %s\n", pre_str); // Write data back to new object s3_put(nb,pre_str); check_S3_error(s3_return, nb, S3_PUT); aws_iobuf_reset_hard(nb); objects=objects->next; } return 0; }
static IOR_offset_t S3_Xfer_internal(int access, void* file, IOR_size_t* buffer, IOR_offset_t length, IOR_param_t* param, int multi_part_upload_p ) { if (param->verbose >= VERBOSE_2) { printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", access, (char*)file, buffer, length, param); } char* fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */ size_t remaining = (size_t)length; char* data_ptr = (char *)buffer; off_t offset = param->offset; // easier to think int n_to_n = param->filePerProc; int n_to_1 = (! n_to_n); int segmented = (param->segmentCount == 1); if (access == WRITE) { /* WRITE */ if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", rank, remaining, param->offset + length - remaining); } if (multi_part_upload_p) { // For N:1, part-numbers must have a global ordering for the // components of the final object. param->part_number is // incremented by 1 per write, on each rank. This lets us use it // to compute a global part-numbering. // // In the N:N case, we only need to increment part-numbers within // each rank. // // In the N:1 case, the global order of part-numbers we're writing // depends on whether wer're writing strided or segmented, in // other words, how <offset> and <remaining> are acutally // positioning the parts being written. [See discussion at // S3_Close_internal().] // // NOTE: 's3curl.pl --debug' shows StringToSign having partNumber // first, even if I put uploadId first in the URL. Maybe // that's what the server will do. GetStringToSign() in // aws4c is not clever about this, so we spoon-feed args in // the proper order. size_t part_number; if (n_to_1) { if (segmented) { // segmented size_t parts_per_rank = param->blockSize / param->transferSize; part_number = (rank * parts_per_rank) + param->part_number; } else // strided part_number = (param->part_number * param->numTasks) + rank; } else part_number = param->part_number; ++ param->part_number; // if (verbose >= VERBOSE_3) { // fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n", // rank, // param->numTasks, // (n_to_1 ? "N:1" : "N:N"), // (segmented ? "segmented" : "strided"), // part_number); // } snprintf(buff, BUFF_SIZE, "%s?partNumber=%d&uploadId=%s", fname, part_number, param->UploadId); // For performance, we append <data_ptr> directly into the linked list // of data in param->io_buf. We are "appending" rather than // "extending", so the added buffer is seen as written data, rather // than empty storage. // // aws4c parses some header-fields automatically for us (into members // of the IOBuf). After s3_put2(), we can just read the etag from // param->io_buf->eTag. The server actually returns literal // quote-marks, at both ends of the string. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK( s3_put(param->io_buf, buff) ); AWS4C_CHECK_OK( param->io_buf ); // if (verbose >= VERBOSE_3) { // printf("rank %d: read ETag = '%s'\n", rank, param->io_buf->eTag); // if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ // fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", // rank, ETAG_SIZE); // exit(1); // } // } if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", rank, param->numTasks, (n_to_1 ? "N:1" : "N:N"), (segmented ? "segmented" : "strided"), offset, part_number, param->io_buf->eTag); // incl quote-marks at [0] and [len-1] } if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", rank, ETAG_SIZE); exit(1); } // save the eTag for later // // memcpy(etag, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // etag[ETAG_SIZE] = 0; aws_iobuf_append(param->etags, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // DEBUGGING if (verbose >= VERBOSE_4) { printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); } // drop ptrs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } else { // use EMC's byte-range write-support, instead of MPU // NOTE: You must call 's3_enable_EMC_extensions(1)' for // byte-ranges to work for writes. if (n_to_n) s3_set_byte_range(-1,-1); // EMC header "Range: bytes=-1-" means "append" else s3_set_byte_range(offset, remaining); // For performance, we append <data_ptr> directly into the linked list // of data in param->io_buf. We are "appending" rather than // "extending", so the added buffer is seen as written data, rather // than empty storage. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK ( s3_put(param->io_buf, file) ); AWS4C_CHECK_OK( param->io_buf ); // drop ptrs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } if ( param->fsyncPerWrite == TRUE ) { WARN("S3 doesn't support 'fsync'" ); /* does it? */ } } else { /* READ or CHECK */ if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d reading from offset %lld\n", rank, param->offset + length - remaining ); } // read specific byte-range from the object // [This is included in the "pure" S3 spec.] s3_set_byte_range(offset, remaining); // For performance, we append <data_ptr> directly into the linked // list of data in param->io_buf. In this case (i.e. reading), // we're "extending" rather than "appending". That means the // buffer represents empty storage, which will be filled by the // libcurl writefunction, invoked via aws4c. aws_iobuf_reset(param->io_buf); aws_iobuf_extend_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK( s3_get(param->io_buf, file) ); if (param->io_buf->code != 206) { /* '206 Partial Content' */ snprintf(buff, BUFF_SIZE, "Unexpected result (%d, '%s')", param->io_buf->code, param->io_buf->result); ERR_SIMPLE(buff); } // drop refs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } if (param->verbose >= VERBOSE_2) { printf("<- S3_Xfer\n"); } return ( length ); }