// Reset everything except URL. Also, use aws_iob_reset() to reset // os->iob. // // NOTE: This is used when stream_open gets an OS that has been previously // opened and closed. Therefore, we can assume that sems have been // destroyed, and thread has been joined or killed. // void stream_reset(ObjectStream* os, uint8_t preserve_os_written) { if (! (os->flags & OSF_CLOSED)) { LOG(LOG_ERR, "We require a stream that was previously opened\n"); return; } #if 0 char* before_ptr = (char*)os; size_t before_size = (char*)os->url - (char*)os; char* after_ptr = (char*)os->url + MARFS_MAX_URL_SIZE; size_t after_size = (char*)os + sizeof(ObjectStream) - after_ptr; memset(before_ptr, 0, before_size); memset(after_ptr, 0, after_size); #else aws_iobuf_reset(&os->iob); os->op_rc = 0; os->flags = 0; // os->open_flags = 0; if (! preserve_os_written) os->written = 0; #endif }
static void EMC_Delete( char *testFileName, IOR_param_t * param ) { if (param->verbose >= VERBOSE_2) { printf("-> EMC_Delete(%s)\n", testFileName); } /* maybe initialize curl */ s3_connect( param ); #if 0 // EMC BUG: If file was written with appends, and is deleted, // Then any future recreation will result in an object that can't be read. // this AWS4C_CHECK( s3_delete(param->io_buf, testFileName) ); #else // just replace with a zero-length object for now aws_iobuf_reset(param->io_buf); AWS4C_CHECK ( s3_put(param->io_buf, testFileName) ); #endif AWS4C_CHECK_OK( param->io_buf ); if (param->verbose >= VERBOSE_2) printf("<- EMC_Delete\n"); }
// Accept as much as <size>, from the streaming GET, into caller's <buf>. // We may discover EOF at any time. In that case, we'll return however // much was actually read. The next call // will just short-circuit to return 0, signalling EOF to caller. // // return -1 with errno, for failures. // else return number of chars we get. // ssize_t stream_get(ObjectStream* os, char* buf, size_t size) { static const int get_timeout_sec = 10; /* totally made up out of thin air */ IOBuf* b = &os->iob; // shorthand LOG(LOG_INFO, "entry\n"); if (! (os->flags & OSF_OPEN)) { LOG(LOG_ERR, "%s isn't open\n", os->url); errno = EINVAL; /* ?? */ return -1; } if (! (os->flags & OSF_READING)) { LOG(LOG_ERR, "%s isn't open for reading\n", os->url); errno = EINVAL; /* ?? */ return -1; } if (os->flags & OSF_EOF) { LOG(LOG_INFO, "already at EOF\n"); return 0; // b->write_count; } os->flags &= ~(OSF_EOB); aws_iobuf_reset(b); // doesn't affect <user_data> aws_iobuf_extend_static(b, (char*)buf, size); LOG(LOG_INFO, "got %ld-byte buffer for writefn\n", size); // let writefn move data POST(&os->iob_empty); // wait for writefn to fill our buffer LOG(LOG_INFO, "waiting for writefn\n"); SAFE_WAIT(&os->iob_full, get_timeout_sec, os); // SAFE_WAIT_KILL(&os->iob_full, get_timeout_sec, os); // writefn detected CURL EOF? if (os->flags & OSF_EOF) { LOG(LOG_INFO, "EOF is asserted\n"); } if (os->flags & OSF_EOB) { LOG(LOG_INFO, "EOB is asserted\n"); } os->written += b->write_count; LOG(LOG_INFO, "returning %ld (total=%ld)\n", b->write_count, os->written); return (b->write_count); }
int stream_open(ObjectStream* os, IsPut put, curl_off_t content_length, uint8_t preserve_os_written) { LOG(LOG_INFO, "%s\n", ((put) ? "PUT" : "GET")); if (os->flags & OSF_OPEN) { LOG(LOG_ERR, "%s is already open\n", os->url); errno = EINVAL; return -1; // already open } if (os->flags) { if (os->flags & OSF_CLOSED) { LOG(LOG_INFO, "stream being re-opened with %s\n", os->url); stream_reset(os, preserve_os_written); // previously-used } else { LOG(LOG_ERR, "%s has flags asserted, but is not CLOSED\n", os->url); errno = EINVAL; return -1; } } os->flags |= OSF_OPEN; if (put) os->flags |= OSF_WRITING; else os->flags |= OSF_READING; if (! preserve_os_written) os->written = 0; // total read/written through OS // caller's open-flags, in case we need to close/repoen // (e.g. for Multi, or marfs_ftruncate()) // // os->open_flags = open_flags; // shorthand IOBuf* b = &os->iob; // readfunc/writefunc just get the IOBuf from libaws4c, but they need // the ObjectStream. So IOBuf now has a pointer to allow this. b->user_data = os; // install copy of global default-context as per-connection context if (! b->context) { LOG(LOG_INFO, "No context. Cloning from defaults.\n"); aws_iobuf_context(b, aws_context_clone()); } AWSContext* ctx = b->context; os->content_len = content_length; if (content_length) { s3_set_content_length_r(content_length, ctx); // os->flags |= OSF_LENGTH; } else s3_chunked_transfer_encoding_r(1, ctx); aws_iobuf_reset(b); // doesn't affect <user_data> or <context> if (put) { SEM_INIT(&os->iob_empty, 0, 0); SEM_INIT(&os->iob_full, 0, 0); aws_iobuf_readfunc(b, &streaming_readfunc); } else { SEM_INIT(&os->iob_empty, 0, 0); SEM_INIT(&os->iob_full, 0, 0); aws_iobuf_headerfunc(b, &streaming_writeheaderfunc); aws_iobuf_writefunc(b, &streaming_writefunc); } // thread runs the GET/PUT, with the iobuf in <os> LOG(LOG_INFO, "starting thread\n"); if (pthread_create(&os->op, NULL, &s3_op, os)) { LOG(LOG_ERR, "pthread_create failed: '%s'\n", strerror(errno)); return -1; } return 0; }
// Hand <buf> over to the streaming_readfunc(), so it can be added into // the ongoing streaming PUT. You must call stream_open() first. // // NOTE: Doing this a little differently from the test_aws.c (case 12) // approach. We're forcing *synchronous* interaction with the // readfunc, because we don't want caller's <buf> to go out of scope // until the readfunc is finished with it. // int stream_put(ObjectStream* os, const char* buf, size_t size) { // static const int put_timeout_sec = 10; /* totally made up out of thin air */ static const int put_timeout_sec = 20; /* totally made up out of thin air */ LOG(LOG_INFO, "(%08lx) entry\n", (size_t)os); if (! (os->flags & OSF_OPEN)) { LOG(LOG_ERR, "(%08lx) %s isn't open\n", (size_t)os, os->url); errno = EINVAL; /* ?? */ return -1; } if (! (os->flags & OSF_WRITING)) { LOG(LOG_ERR, "(%08lx) %s isn't open for writing\n", (size_t)os, os->url); errno = EINVAL; /* ?? */ return -1; } IOBuf* b = &os->iob; // shorthand #if 0 // QUESTION: Does it improve performance to copy the caller's buffer, // so we can return immediately? // // ANSWER: No. LOG(LOG_INFO, "(%08lx) waiting for IOBuf\n", (size_t)os); // readfunc done with IOBuf? SAFE_WAIT(&os->iob_empty, put_timeout_sec, os); // SAFE_WAIT_KILL(&os->iob_empty, put_timeout_sec, os); static size_t tmp_size = 0; static char* tmp_buf = NULL; if (size > tmp_size) { if (tmp_size) free(tmp_buf); tmp_size = size; tmp_buf = (char*) malloc(size); if (! tmp_buf) { errno = ENOMEM; return -1; } } memcpy(tmp_buf, buf, size); // install buffer into IOBuf aws_iobuf_reset(b); // doesn't affect <user_data> aws_iobuf_append_static(b, tmp_buf, size); LOG(LOG_INFO, "(%08lx) installed buffer (%ld bytes) for readfn\n", (size_t)os, size); // let readfunc move data POST(&os->iob_full); #else // install buffer into IOBuf aws_iobuf_reset(b); // doesn't affect <user_data> aws_iobuf_append_static(b, (char*)buf, size); LOG(LOG_INFO, "(%08lx) installed buffer (%ld bytes) for readfn\n", (size_t)os, size); // let readfunc move data POST(&os->iob_full); LOG(LOG_INFO, "(%08lx) waiting for IOBuf\n", (size_t)os); // readfunc done with IOBuf? SAFE_WAIT(&os->iob_empty, put_timeout_sec, os); // SAFE_WAIT_KILL(&os->iob_empty, put_timeout_sec, os); #endif LOG(LOG_INFO, "(%08lx) buffer done\n", (size_t)os); // readfunc done with IOBuf? return size; }
static IOR_offset_t S3_Xfer_internal(int access, void* file, IOR_size_t* buffer, IOR_offset_t length, IOR_param_t* param, int multi_part_upload_p ) { if (param->verbose >= VERBOSE_2) { printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", access, (char*)file, buffer, length, param); } char* fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */ size_t remaining = (size_t)length; char* data_ptr = (char *)buffer; off_t offset = param->offset; // easier to think int n_to_n = param->filePerProc; int n_to_1 = (! n_to_n); int segmented = (param->segmentCount == 1); if (access == WRITE) { /* WRITE */ if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", rank, remaining, param->offset + length - remaining); } if (multi_part_upload_p) { // For N:1, part-numbers must have a global ordering for the // components of the final object. param->part_number is // incremented by 1 per write, on each rank. This lets us use it // to compute a global part-numbering. // // In the N:N case, we only need to increment part-numbers within // each rank. // // In the N:1 case, the global order of part-numbers we're writing // depends on whether wer're writing strided or segmented, in // other words, how <offset> and <remaining> are acutally // positioning the parts being written. [See discussion at // S3_Close_internal().] // // NOTE: 's3curl.pl --debug' shows StringToSign having partNumber // first, even if I put uploadId first in the URL. Maybe // that's what the server will do. GetStringToSign() in // aws4c is not clever about this, so we spoon-feed args in // the proper order. size_t part_number; if (n_to_1) { if (segmented) { // segmented size_t parts_per_rank = param->blockSize / param->transferSize; part_number = (rank * parts_per_rank) + param->part_number; } else // strided part_number = (param->part_number * param->numTasks) + rank; } else part_number = param->part_number; ++ param->part_number; // if (verbose >= VERBOSE_3) { // fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n", // rank, // param->numTasks, // (n_to_1 ? "N:1" : "N:N"), // (segmented ? "segmented" : "strided"), // part_number); // } snprintf(buff, BUFF_SIZE, "%s?partNumber=%d&uploadId=%s", fname, part_number, param->UploadId); // For performance, we append <data_ptr> directly into the linked list // of data in param->io_buf. We are "appending" rather than // "extending", so the added buffer is seen as written data, rather // than empty storage. // // aws4c parses some header-fields automatically for us (into members // of the IOBuf). After s3_put2(), we can just read the etag from // param->io_buf->eTag. The server actually returns literal // quote-marks, at both ends of the string. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK( s3_put(param->io_buf, buff) ); AWS4C_CHECK_OK( param->io_buf ); // if (verbose >= VERBOSE_3) { // printf("rank %d: read ETag = '%s'\n", rank, param->io_buf->eTag); // if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ // fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", // rank, ETAG_SIZE); // exit(1); // } // } if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", rank, param->numTasks, (n_to_1 ? "N:1" : "N:N"), (segmented ? "segmented" : "strided"), offset, part_number, param->io_buf->eTag); // incl quote-marks at [0] and [len-1] } if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", rank, ETAG_SIZE); exit(1); } // save the eTag for later // // memcpy(etag, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // etag[ETAG_SIZE] = 0; aws_iobuf_append(param->etags, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // DEBUGGING if (verbose >= VERBOSE_4) { printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); } // drop ptrs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } else { // use EMC's byte-range write-support, instead of MPU // NOTE: You must call 's3_enable_EMC_extensions(1)' for // byte-ranges to work for writes. if (n_to_n) s3_set_byte_range(-1,-1); // EMC header "Range: bytes=-1-" means "append" else s3_set_byte_range(offset, remaining); // For performance, we append <data_ptr> directly into the linked list // of data in param->io_buf. We are "appending" rather than // "extending", so the added buffer is seen as written data, rather // than empty storage. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK ( s3_put(param->io_buf, file) ); AWS4C_CHECK_OK( param->io_buf ); // drop ptrs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } if ( param->fsyncPerWrite == TRUE ) { WARN("S3 doesn't support 'fsync'" ); /* does it? */ } } else { /* READ or CHECK */ if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d reading from offset %lld\n", rank, param->offset + length - remaining ); } // read specific byte-range from the object // [This is included in the "pure" S3 spec.] s3_set_byte_range(offset, remaining); // For performance, we append <data_ptr> directly into the linked // list of data in param->io_buf. In this case (i.e. reading), // we're "extending" rather than "appending". That means the // buffer represents empty storage, which will be filled by the // libcurl writefunction, invoked via aws4c. aws_iobuf_reset(param->io_buf); aws_iobuf_extend_static(param->io_buf, data_ptr, remaining); AWS4C_CHECK( s3_get(param->io_buf, file) ); if (param->io_buf->code != 206) { /* '206 Partial Content' */ snprintf(buff, BUFF_SIZE, "Unexpected result (%d, '%s')", param->io_buf->code, param->io_buf->result); ERR_SIMPLE(buff); } // drop refs to <data_ptr>, in param->io_buf aws_iobuf_reset(param->io_buf); } if (param->verbose >= VERBOSE_2) { printf("<- S3_Xfer\n"); } return ( length ); }
static void * S3_Create_Or_Open_internal(char* testFileName, IOR_param_t* param, unsigned char createFile, int multi_part_upload_p ) { if (param->verbose >= VERBOSE_2) { printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n", testFileName, createFile, multi_part_upload_p); } /* initialize curl, if needed */ s3_connect( param ); /* Check for unsupported flags */ if ( param->openFlags & IOR_EXCL ) { fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" ); } if ( param->useO_DIRECT == TRUE ) { fprintf( stdout, "Direct I/O mode is not implemented in S3\n" ); } // easier to think int n_to_n = param->filePerProc; int n_to_1 = ! n_to_n; /* check whether object needs reset to zero-length */ int needs_reset = 0; if (! multi_part_upload_p) needs_reset = 1; /* so "append" can work */ else if ( param->openFlags & IOR_TRUNC ) needs_reset = 1; /* so "append" can work */ else if (createFile) { // AWS4C_CHECK( s3_head(param->io_buf, testFileName) ); // if ( ! AWS4C_OK(param->io_buf) ) needs_reset = 1; } if ( param->open == WRITE ) { /* initializations for N:1 or N:N writes using multi-part upload */ if (multi_part_upload_p) { // For N:N, all ranks do their own MPU open/close. For N:1, only // rank0 does that. Either way, the response from the server // includes an "uploadId", which must be used to upload parts to // the same object. if ( n_to_n || (rank == 0) ) { // rank0 handles truncate if ( needs_reset) { aws_iobuf_reset(param->io_buf); AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); /* 0-length write */ AWS4C_CHECK_OK( param->io_buf ); } // POST request with URL+"?uploads" initiates multi-part upload snprintf(buff, BUFF_SIZE, "%s?uploads", testFileName); IOBuf* response = aws_iobuf_new(); AWS4C_CHECK( s3_post2(param->io_buf, buff, NULL, response) ); AWS4C_CHECK_OK( param->io_buf ); // parse XML returned from server, into a tree structure aws_iobuf_realloc(response); xmlDocPtr doc = xmlReadMemory(response->first->buf, response->first->len, NULL, NULL, 0); if (doc == NULL) ERR_SIMPLE("Rank0 Failed to find POST response\n"); // navigate parsed XML-tree to find UploadId xmlNode* root_element = xmlDocGetRootElement(doc); const char* upload_id = find_element_named(root_element, (char*)"UploadId"); if (! upload_id) ERR_SIMPLE("couldn't find 'UploadId' in returned XML\n"); if (param->verbose >= VERBOSE_3) printf("got UploadId = '%s'\n", upload_id); const size_t upload_id_len = strlen(upload_id); if (upload_id_len > MAX_UPLOAD_ID_SIZE) { snprintf(buff, BUFF_SIZE, "UploadId length %d exceeds expected max (%d)", upload_id_len, MAX_UPLOAD_ID_SIZE); ERR_SIMPLE(buff); } // save the UploadId we found memcpy(param->UploadId, upload_id, upload_id_len); param->UploadId[upload_id_len] = 0; // free storage for parsed XML tree xmlFreeDoc(doc); aws_iobuf_free(response); // For N:1, share UploadId across all ranks if (n_to_1) MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm); } else // N:1, and we're not rank0. recv UploadID from Rank 0 MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm); } /* initializations for N:N or N:1 writes using EMC byte-range extensions */ else { /* maybe reset to zero-length, so "append" can work */ if (needs_reset) { if (verbose >= VERBOSE_3) { fprintf( stdout, "rank %d resetting\n", rank); } aws_iobuf_reset(param->io_buf); AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); AWS4C_CHECK_OK( param->io_buf ); } } } if (param->verbose >= VERBOSE_2) { printf("<- S3_Create_Or_Open\n"); } return ((void *) testFileName ); }
// After finalizing an S3 multi-part-upload, you must reset some things // before you can use multi-part-upload again. This will also avoid (one // particular set of) memory-leaks. void s3_MPU_reset(IOR_param_t* param) { aws_iobuf_reset(param->io_buf); aws_iobuf_reset(param->etags); param->part_number = 0; }