Beispiel #1
0
// Reset everything except URL.  Also, use aws_iob_reset() to reset
// os->iob.
//
// NOTE: This is used when stream_open gets an OS that has been previously
//       opened and closed.  Therefore, we can assume that sems have been
//       destroyed, and thread has been joined or killed.
//
void stream_reset(ObjectStream* os,
                  uint8_t       preserve_os_written) {
   if (! (os->flags & OSF_CLOSED)) {
      LOG(LOG_ERR, "We require a stream that was previously opened\n");
      return;
   }

#if 0
   char*  before_ptr  = (char*)os;
   size_t before_size = (char*)os->url - (char*)os;

   char*  after_ptr   = (char*)os->url + MARFS_MAX_URL_SIZE;
   size_t after_size  = (char*)os + sizeof(ObjectStream) - after_ptr;

   memset(before_ptr, 0, before_size);
   memset(after_ptr,  0, after_size);

#else
   aws_iobuf_reset(&os->iob);
   os->op_rc      = 0;
   os->flags      = 0;
   // os->open_flags = 0;

   if (! preserve_os_written)
      os->written = 0;
#endif
}
Beispiel #2
0
static
void
EMC_Delete( char *testFileName, IOR_param_t * param ) {

	if (param->verbose >= VERBOSE_2) {
		printf("-> EMC_Delete(%s)\n", testFileName);
	}

	/* maybe initialize curl */
	s3_connect( param );

#if 0
	// EMC BUG: If file was written with appends, and is deleted,
	//      Then any future recreation will result in an object that can't be read.
	//      this
	AWS4C_CHECK( s3_delete(param->io_buf, testFileName) );
#else
	// just replace with a zero-length object for now
	aws_iobuf_reset(param->io_buf);
	AWS4C_CHECK   ( s3_put(param->io_buf, testFileName) );
#endif

	AWS4C_CHECK_OK( param->io_buf );

	if (param->verbose >= VERBOSE_2)
		printf("<- EMC_Delete\n");
}
Beispiel #3
0
// Accept as much as <size>, from the streaming GET, into caller's <buf>.
// We may discover EOF at any time.  In that case, we'll return however
// much was actually read.  The next call
// will just short-circuit to return 0, signalling EOF to caller.
// 
// return -1 with errno, for failures.
// else return number of chars we get.
//
ssize_t stream_get(ObjectStream* os,
                   char*         buf,
                   size_t        size) {

   static const int get_timeout_sec = 10; /* totally made up out of thin air */

   IOBuf* b = &os->iob;     // shorthand

   LOG(LOG_INFO, "entry\n");
   if (! (os->flags & OSF_OPEN)) {
      LOG(LOG_ERR, "%s isn't open\n", os->url);
      errno = EINVAL;            /* ?? */
      return -1;
   }
   if (! (os->flags & OSF_READING)) {
      LOG(LOG_ERR, "%s isn't open for reading\n", os->url);
      errno = EINVAL;            /* ?? */
      return -1;
   }
   if (os->flags & OSF_EOF) {
      LOG(LOG_INFO, "already at EOF\n");
      return 0; // b->write_count;
   }
   os->flags &= ~(OSF_EOB);

   aws_iobuf_reset(b);          // doesn't affect <user_data>
   aws_iobuf_extend_static(b, (char*)buf, size);
   LOG(LOG_INFO, "got %ld-byte buffer for writefn\n", size);

   // let writefn move data
   POST(&os->iob_empty);

   // wait for writefn to fill our buffer
   LOG(LOG_INFO, "waiting for writefn\n");
   SAFE_WAIT(&os->iob_full, get_timeout_sec, os);
   //   SAFE_WAIT_KILL(&os->iob_full, get_timeout_sec, os);

   // writefn detected CURL EOF?
   if (os->flags & OSF_EOF) {
      LOG(LOG_INFO, "EOF is asserted\n");
   }
   if (os->flags & OSF_EOB) {
      LOG(LOG_INFO, "EOB is asserted\n");
   }

   os->written += b->write_count;
   LOG(LOG_INFO, "returning %ld (total=%ld)\n", b->write_count, os->written);
   return (b->write_count);
}
Beispiel #4
0
int stream_open(ObjectStream* os,
                IsPut         put,
                curl_off_t    content_length,
                uint8_t       preserve_os_written) {
   LOG(LOG_INFO, "%s\n", ((put) ? "PUT" : "GET"));

   if (os->flags & OSF_OPEN) {
      LOG(LOG_ERR, "%s is already open\n", os->url);
      errno = EINVAL;
      return -1;                // already open
   }
   if (os->flags) {
      if (os->flags & OSF_CLOSED) {
         LOG(LOG_INFO, "stream being re-opened with %s\n", os->url);
         stream_reset(os, preserve_os_written); // previously-used
      }
      else {
         LOG(LOG_ERR, "%s has flags asserted, but is not CLOSED\n", os->url);
         errno = EINVAL;
         return -1;
      }
   }

   os->flags |= OSF_OPEN;
   if (put)
      os->flags |= OSF_WRITING;
   else
      os->flags |= OSF_READING;

   if (! preserve_os_written)
      os->written = 0;          // total read/written through OS

   // caller's open-flags, in case we need to close/repoen
   // (e.g. for Multi, or marfs_ftruncate())
   //
   //   os->open_flags = open_flags;

   // shorthand
   IOBuf* b = &os->iob;

   // readfunc/writefunc just get the IOBuf from libaws4c, but they need
   // the ObjectStream.  So IOBuf now has a pointer to allow this.
   b->user_data = os;

   // install copy of global default-context as per-connection context 
   if (! b->context) {
      LOG(LOG_INFO, "No context.  Cloning from defaults.\n");
      aws_iobuf_context(b, aws_context_clone());
   }

   AWSContext* ctx = b->context;

   os->content_len = content_length;
   if (content_length) {
      s3_set_content_length_r(content_length, ctx);
      // os->flags |= OSF_LENGTH;
   }
   else
      s3_chunked_transfer_encoding_r(1, ctx);

   aws_iobuf_reset(b);          // doesn't affect <user_data> or <context>
   if (put) {
      SEM_INIT(&os->iob_empty, 0, 0);
      SEM_INIT(&os->iob_full,  0, 0);
      aws_iobuf_readfunc(b, &streaming_readfunc);
   }
   else {
      SEM_INIT(&os->iob_empty, 0, 0);
      SEM_INIT(&os->iob_full,  0, 0);
      aws_iobuf_headerfunc(b, &streaming_writeheaderfunc);
      aws_iobuf_writefunc(b, &streaming_writefunc);
   }

   // thread runs the GET/PUT, with the iobuf in <os>
   LOG(LOG_INFO, "starting thread\n");
   if (pthread_create(&os->op, NULL, &s3_op, os)) {
      LOG(LOG_ERR, "pthread_create failed: '%s'\n", strerror(errno));
      return -1;
   }
   return 0;
}
Beispiel #5
0
// Hand <buf> over to the streaming_readfunc(), so it can be added into
// the ongoing streaming PUT.  You must call stream_open() first.
//
// NOTE: Doing this a little differently from the test_aws.c (case 12)
//       approach.  We're forcing *synchronous* interaction with the
//       readfunc, because we don't want caller's <buf> to go out of scope
//       until the readfunc is finished with it.
//
int stream_put(ObjectStream* os,
               const char*   buf,
               size_t        size) {

   //   static const int put_timeout_sec = 10; /* totally made up out of thin air */
   static const int put_timeout_sec = 20; /* totally made up out of thin air */

   LOG(LOG_INFO, "(%08lx) entry\n", (size_t)os);
   if (! (os->flags & OSF_OPEN)) {
      LOG(LOG_ERR, "(%08lx) %s isn't open\n", (size_t)os, os->url);
      errno = EINVAL;            /* ?? */
      return -1;
   }
   if (! (os->flags & OSF_WRITING)) {
      LOG(LOG_ERR, "(%08lx) %s isn't open for writing\n", (size_t)os, os->url);
      errno = EINVAL;            /* ?? */
      return -1;
   }
   IOBuf* b = &os->iob;         // shorthand

#if 0
   // QUESTION: Does it improve performance to copy the caller's buffer,
   //    so we can return immediately?
   //
   // ANSWER: No.
   LOG(LOG_INFO, "(%08lx) waiting for IOBuf\n", (size_t)os); // readfunc done with IOBuf?
   SAFE_WAIT(&os->iob_empty, put_timeout_sec, os);
   //   SAFE_WAIT_KILL(&os->iob_empty, put_timeout_sec, os);

   static size_t tmp_size = 0;
   static char*  tmp_buf = NULL;
   if (size > tmp_size) {
      if (tmp_size)
         free(tmp_buf);
      tmp_size = size;
      tmp_buf = (char*) malloc(size);
      if (! tmp_buf) {
         errno = ENOMEM;
         return -1;
      }
   }
   memcpy(tmp_buf, buf, size);
   
   // install buffer into IOBuf
   aws_iobuf_reset(b);          // doesn't affect <user_data>
   aws_iobuf_append_static(b, tmp_buf, size);
   LOG(LOG_INFO, "(%08lx) installed buffer (%ld bytes) for readfn\n", (size_t)os, size);

   // let readfunc move data
   POST(&os->iob_full);

#else
   // install buffer into IOBuf
   aws_iobuf_reset(b);          // doesn't affect <user_data>
   aws_iobuf_append_static(b, (char*)buf, size);
   LOG(LOG_INFO, "(%08lx) installed buffer (%ld bytes) for readfn\n", (size_t)os, size);

   // let readfunc move data
   POST(&os->iob_full);

   LOG(LOG_INFO, "(%08lx) waiting for IOBuf\n", (size_t)os); // readfunc done with IOBuf?
   SAFE_WAIT(&os->iob_empty, put_timeout_sec, os);
   //   SAFE_WAIT_KILL(&os->iob_empty, put_timeout_sec, os);

#endif

   LOG(LOG_INFO, "(%08lx) buffer done\n", (size_t)os); // readfunc done with IOBuf?
   return size;
}
Beispiel #6
0
static
IOR_offset_t
S3_Xfer_internal(int          access,
					  void*        file,
					  IOR_size_t*  buffer,
					  IOR_offset_t length,
					  IOR_param_t* param,
					  int          multi_part_upload_p ) {

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n",
				 access, (char*)file, buffer, length, param);
	}

	char*      fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */
	size_t     remaining = (size_t)length;
	char*      data_ptr = (char *)buffer;
	off_t      offset = param->offset;

	// easier to think
	int        n_to_n    = param->filePerProc;
	int        n_to_1    = (! n_to_n);
	int        segmented = (param->segmentCount == 1);


	if (access == WRITE) {	/* WRITE */

		if (verbose >= VERBOSE_3) {
			fprintf( stdout, "rank %d writing length=%lld to offset %lld\n",
						rank,
                  remaining,
						param->offset + length - remaining);
		}


		if (multi_part_upload_p) {

			// For N:1, part-numbers must have a global ordering for the
			// components of the final object.  param->part_number is
			// incremented by 1 per write, on each rank.  This lets us use it
			// to compute a global part-numbering.
         //
         // In the N:N case, we only need to increment part-numbers within
			// each rank.
         //
         // In the N:1 case, the global order of part-numbers we're writing
         // depends on whether wer're writing strided or segmented, in
         // other words, how <offset> and <remaining> are acutally
         // positioning the parts being written. [See discussion at
         // S3_Close_internal().]
         //
			// NOTE: 's3curl.pl --debug' shows StringToSign having partNumber
			//       first, even if I put uploadId first in the URL.  Maybe
			//       that's what the server will do.  GetStringToSign() in
			//       aws4c is not clever about this, so we spoon-feed args in
			//       the proper order.

			size_t part_number;
			if (n_to_1) {
            if (segmented) {      // segmented
               size_t parts_per_rank = param->blockSize / param->transferSize;
               part_number = (rank * parts_per_rank) + param->part_number;
            }
            else                // strided
               part_number = (param->part_number * param->numTasks) + rank;
         }
         else
				part_number = param->part_number;
         ++ param->part_number;


         //         if (verbose >= VERBOSE_3) {
         //            fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n",
         //                     rank,
         //                     param->numTasks,
         //                     (n_to_1 ? "N:1" : "N:N"),
         //                     (segmented ? "segmented" : "strided"),
         //                     part_number);
         //         }

			snprintf(buff, BUFF_SIZE,
						"%s?partNumber=%d&uploadId=%s",
						fname, part_number, param->UploadId);

			// For performance, we append <data_ptr> directly into the linked list
			// of data in param->io_buf.  We are "appending" rather than
			// "extending", so the added buffer is seen as written data, rather
			// than empty storage.
			//
			// aws4c parses some header-fields automatically for us (into members
			// of the IOBuf).  After s3_put2(), we can just read the etag from
			// param->io_buf->eTag.  The server actually returns literal
			// quote-marks, at both ends of the string.

			aws_iobuf_reset(param->io_buf);
			aws_iobuf_append_static(param->io_buf, data_ptr, remaining);
			AWS4C_CHECK( s3_put(param->io_buf, buff) );
			AWS4C_CHECK_OK( param->io_buf );

         //			if (verbose >= VERBOSE_3) {
         //				printf("rank %d: read ETag = '%s'\n", rank, param->io_buf->eTag);
         //				if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */
         //					fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n",
         //							  rank, ETAG_SIZE);
         //					exit(1);
         //				}
         //			}

         if (verbose >= VERBOSE_3) {
            fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n",
                     rank,
                     param->numTasks,
                     (n_to_1 ? "N:1" : "N:N"),
                     (segmented ? "segmented" : "strided"),
                     offset,
                     part_number,
                     param->io_buf->eTag); // incl quote-marks at [0] and [len-1]
         }
         if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */
					fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n",
							  rank, ETAG_SIZE);
					exit(1);
         }

			// save the eTag for later
			//
			//		memcpy(etag, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2);
			//		etag[ETAG_SIZE] = 0;
			aws_iobuf_append(param->etags,
								  param->io_buf->eTag +1,
								  strlen(param->io_buf->eTag) -2);
			// DEBUGGING
			if (verbose >= VERBOSE_4) {
				printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag);
			}

			// drop ptrs to <data_ptr>, in param->io_buf
			aws_iobuf_reset(param->io_buf);
		}
		else {	 // use EMC's byte-range write-support, instead of MPU


			// NOTE: You must call 's3_enable_EMC_extensions(1)' for
			//       byte-ranges to work for writes.
			if (n_to_n)
				s3_set_byte_range(-1,-1); // EMC header "Range: bytes=-1-" means "append"
			else
				s3_set_byte_range(offset, remaining);

			// For performance, we append <data_ptr> directly into the linked list
			// of data in param->io_buf.  We are "appending" rather than
			// "extending", so the added buffer is seen as written data, rather
			// than empty storage.
			aws_iobuf_reset(param->io_buf);
			aws_iobuf_append_static(param->io_buf, data_ptr, remaining);
			AWS4C_CHECK   ( s3_put(param->io_buf, file) );
			AWS4C_CHECK_OK( param->io_buf );

			// drop ptrs to <data_ptr>, in param->io_buf
			aws_iobuf_reset(param->io_buf);
		}


		if ( param->fsyncPerWrite == TRUE ) {
			WARN("S3 doesn't support 'fsync'" ); /* does it? */
		}

	}
	else {				/* READ or CHECK */

		if (verbose >= VERBOSE_3) {
			fprintf( stdout, "rank %d reading from offset %lld\n",
						rank,
						param->offset + length - remaining );
		}

		// read specific byte-range from the object
      // [This is included in the "pure" S3 spec.]
		s3_set_byte_range(offset, remaining);

		// For performance, we append <data_ptr> directly into the linked
		// list of data in param->io_buf.  In this case (i.e. reading),
		// we're "extending" rather than "appending".  That means the
		// buffer represents empty storage, which will be filled by the
		// libcurl writefunction, invoked via aws4c.
		aws_iobuf_reset(param->io_buf);
		aws_iobuf_extend_static(param->io_buf, data_ptr, remaining);
		AWS4C_CHECK( s3_get(param->io_buf, file) );
		if (param->io_buf->code != 206) { /* '206 Partial Content' */
			snprintf(buff, BUFF_SIZE,
						"Unexpected result (%d, '%s')",
						param->io_buf->code, param->io_buf->result);
			ERR_SIMPLE(buff);
		}

		// drop refs to <data_ptr>, in param->io_buf
		aws_iobuf_reset(param->io_buf);
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Xfer\n");
	}
	return ( length );
}
Beispiel #7
0
static
void *
S3_Create_Or_Open_internal(char*         testFileName,
                           IOR_param_t*  param,
                           unsigned char createFile,
									int           multi_part_upload_p ) {

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n",
				 testFileName, createFile, multi_part_upload_p);
	}

	/* initialize curl, if needed */
	s3_connect( param );

	/* Check for unsupported flags */
	if ( param->openFlags & IOR_EXCL ) {
		fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" );
	}
	if ( param->useO_DIRECT == TRUE ) {
		fprintf( stdout, "Direct I/O mode is not implemented in S3\n" );
	}

	// easier to think
	int n_to_n = param->filePerProc;
	int n_to_1 = ! n_to_n;

	/* check whether object needs reset to zero-length */
	int needs_reset = 0;
	if (! multi_part_upload_p)
		needs_reset = 1;			  /* so "append" can work */
	else if ( param->openFlags & IOR_TRUNC )
		needs_reset = 1;			  /* so "append" can work */
	else if (createFile) {
		// AWS4C_CHECK( s3_head(param->io_buf, testFileName) );
		// if ( ! AWS4C_OK(param->io_buf) )
			needs_reset = 1;
	}

	if ( param->open == WRITE ) {

		/* initializations for N:1 or N:N writes using multi-part upload */
		if (multi_part_upload_p) {

			// For N:N, all ranks do their own MPU open/close.  For N:1, only
			// rank0 does that. Either way, the response from the server
			// includes an "uploadId", which must be used to upload parts to
			// the same object.
			if ( n_to_n || (rank == 0) ) {

				// rank0 handles truncate
				if ( needs_reset) {
					aws_iobuf_reset(param->io_buf);
					AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); /* 0-length write */
					AWS4C_CHECK_OK( param->io_buf );
				}

				// POST request with URL+"?uploads" initiates multi-part upload
				snprintf(buff, BUFF_SIZE, "%s?uploads", testFileName);
				IOBuf* response = aws_iobuf_new();
				AWS4C_CHECK( s3_post2(param->io_buf, buff, NULL, response) );
				AWS4C_CHECK_OK( param->io_buf );

				// parse XML returned from server, into a tree structure
				aws_iobuf_realloc(response);
				xmlDocPtr doc = xmlReadMemory(response->first->buf,
														response->first->len,
														NULL, NULL, 0);
				if (doc == NULL)
					ERR_SIMPLE("Rank0 Failed to find POST response\n");

				// navigate parsed XML-tree to find UploadId
				xmlNode* root_element = xmlDocGetRootElement(doc);
				const char* upload_id = find_element_named(root_element, (char*)"UploadId");
				if (! upload_id)
					ERR_SIMPLE("couldn't find 'UploadId' in returned XML\n");

				if (param->verbose >= VERBOSE_3)
					printf("got UploadId = '%s'\n", upload_id);

				const size_t upload_id_len = strlen(upload_id);
				if (upload_id_len > MAX_UPLOAD_ID_SIZE) {
					snprintf(buff, BUFF_SIZE,
								"UploadId length %d exceeds expected max (%d)",
								upload_id_len, MAX_UPLOAD_ID_SIZE);
					ERR_SIMPLE(buff);
				}

				// save the UploadId we found
				memcpy(param->UploadId, upload_id, upload_id_len);
				param->UploadId[upload_id_len] = 0;

				// free storage for parsed XML tree
				xmlFreeDoc(doc);
				aws_iobuf_free(response);

				// For N:1, share UploadId across all ranks
				if (n_to_1)
					MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm);
			}
			else
				// N:1, and we're not rank0. recv UploadID from Rank 0
				MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm);
		}

		/* initializations for N:N or N:1 writes using EMC byte-range extensions */
		else {

			/* maybe reset to zero-length, so "append" can work */
			if (needs_reset) {

            if (verbose >= VERBOSE_3) {
               fprintf( stdout, "rank %d resetting\n",
                        rank);
            }

				aws_iobuf_reset(param->io_buf);
				AWS4C_CHECK( s3_put(param->io_buf, testFileName) );
				AWS4C_CHECK_OK( param->io_buf );
			}
		}
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Create_Or_Open\n");
	}
	return ((void *) testFileName );
}
Beispiel #8
0
// After finalizing an S3 multi-part-upload, you must reset some things
// before you can use multi-part-upload again.  This will also avoid (one
// particular set of) memory-leaks.
void
s3_MPU_reset(IOR_param_t* param) {
	aws_iobuf_reset(param->io_buf);
	aws_iobuf_reset(param->etags);
	param->part_number = 0;
}