Exemplo n.º 1
0
size_t streaming_readfunc(void* ptr, size_t size, size_t nmemb, void* stream) {
   LOG(LOG_INFO, "entry\n");

   IOBuf*        b     = (IOBuf*)stream;
   ObjectStream* os    = (ObjectStream*)b->user_data;
   size_t        total = (size * nmemb);
   LOG(LOG_INFO, "(%08lx) curl buff %ld\n", (size_t)os, total);

   // wait for producer to fill buffers
   WAIT(&os->iob_full);
   LOG(LOG_INFO, "(%08lx) avail-data: %ld\n", (size_t)os, b->avail);

   // maybe we were requested to quit or abort?
   if (b->write_count == 0) {
      // called by stream_sync()
      LOG(LOG_INFO, "(%08lx) got EOF\n", (size_t)os);
      POST(&os->iob_empty); // polite
      return 0;
   }
   else if (b->first->buf == (char*)1) {
      // called by stream_abort()
      LOG(LOG_INFO, "(%08lx) got ABORT\n", (size_t)os);
      POST(&os->iob_empty); // polite
      return CURL_READFUNC_ABORT;
   }

   // move producer's data into curl buffers.
   // (Might take more than one callback)
   size_t move_req = ((total <= b->avail) ? total : b->avail);
   size_t moved    = aws_iobuf_get_raw(b, (char*)ptr, move_req);

   // track total size
   os->written += moved;
   LOG(LOG_INFO, "(%08lx) moved %ld  (total: %ld)\n", (size_t)os, moved, os->written);

   if (b->avail) {
      LOG(LOG_INFO, "(%08lx) iterating (avail: %ld)\n", (size_t)os, b->avail);
      POST(&os->iob_full);  // next callback is pre-approved
   }
   else {
      LOG(LOG_INFO, "(%08lx) done with buffer (total written %ld)\n", (size_t)os, os->written);
      POST(&os->iob_empty); // tell producer that buffer is used
   }

   return moved;
}
Exemplo n.º 2
0
static
void
S3_Close_internal( void*         fd,
						 IOR_param_t*  param,
						 int           multi_part_upload_p ) {

	char* fname = (char*)fd; /* see NOTE above S3_Create_Or_Open() */

	// easier to think
	int n_to_n    = param->filePerProc;
	int n_to_1    = (! n_to_n);
   int segmented = (param->segmentCount == 1);

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Close('%s', ,%d) %s\n",
				 fname,
             multi_part_upload_p,
             ((n_to_n) ? "N:N" : ((segmented) ? "N:1(seg)" : "N:1(str)")));
	}

	if (param->open == WRITE) {


		// finalizing Multi-Part Upload (for N:1 or N:N)
		if (multi_part_upload_p) {


			size_t etag_data_size = param->etags->write_count; /* local ETag data (bytes) */
			size_t etags_per_rank = etag_data_size / ETAG_SIZE;		/* number of local etags */

			// --- create XML containing ETags in an IOBuf for "close" request
			IOBuf* xml = NULL;


			if (n_to_1) {

				// for N:1, gather all Etags at Rank0
				MPI_Datatype mpi_size_t;
				if (sizeof(size_t) == sizeof(int))
					mpi_size_t = MPI_INT;
				else if (sizeof(size_t) == sizeof(long))
					mpi_size_t = MPI_LONG;
				else
					mpi_size_t = MPI_LONG_LONG;

				// Everybody should have the same number of ETags (?)
				size_t etag_count_max = 0;		 /* highest number on any proc */
				MPI_Allreduce(&etags_per_rank, &etag_count_max,
								  1, mpi_size_t, MPI_MAX, param->testComm);
				if (etags_per_rank != etag_count_max) {
					printf("Rank %d: etag count mismatch: max:%d, mine:%d\n",
							 rank, etag_count_max, etags_per_rank);
					MPI_Abort(param->testComm, 1);
				}

				// collect ETag data at Rank0
				aws_iobuf_realloc(param->etags);             /* force single contiguous buffer */
				char* etag_data = param->etags->first->buf;  /* per-rank data, contiguous */

				if (rank == 0)  {
					char* etag_ptr;
					int   i;
					int   j;
					int   rnk;

					char* etag_vec = (char*)malloc((param->numTasks * etag_data_size) +1);
					if (! etag_vec) {
						fprintf(stderr, "rank 0 failed to malloc %d bytes\n",
								  param->numTasks * etag_data_size);
						MPI_Abort(param->testComm, 1);
					}
					MPI_Gather(etag_data, etag_data_size, MPI_BYTE,
								  etag_vec,  etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);

					// --- debugging: show the gathered etag data
					//     (This shows the raw concatenated etag-data from each node.)
					if (param->verbose >= VERBOSE_4) {

						printf("rank 0: gathered %d etags from all ranks:\n", etags_per_rank);
						etag_ptr=etag_vec;
						for (rnk=0; rnk<param->numTasks; ++rnk) {
							printf("\t[%d]: '", rnk);

							int ii;
							for (ii=0; ii<etag_data_size; ++ii)	/* NOT null-terminated! */
								printf("%c", etag_ptr[ii]);

							printf("'\n");
							etag_ptr += etag_data_size;
						}
					}


					// add XML for *all* the parts.  The XML must be ordered by
					// part-number.  Each rank wrote <etags_per_rank> parts,
					// locally.  At rank0, the etags for each rank are now
					// stored as a continguous block of text, with the blocks
					// stored in rank order in etag_vec.  In other words, our
					// internal rep at rank 0 matches the "segmented" format.
					// From this, we must select etags in an order matching how
					// they appear in the actual object, and give sequential
					// part-numbers to the resulting sequence.
					//
					// That ordering of parts in the actual written object
					// varies according to whether we wrote in the "segmented"
					// or "strided" format.
					//
					//     supposing N ranks, and P parts per rank:
					//
					// segmented:
					//
					//     all parts for a given rank are consecutive.
					//     rank r writes these parts:
					//
					//     rP, rP+1, ... (r+1)P -1
					//
					//     i.e. rank0 writes parts 0,1,2,3 ... P-1
					//
					//
					// strided:
					//
					//     rank r writes every P-th part, starting with r.
					//
					//     r, P+r, ... (P-1)P + r
					//
					//     i.e. rank0 writes parts 0,P,2P,3P ... (P-1)P
					//
					//
					// NOTE: If we knew ahead of time how many parts each rank was
					//       going to write, we could assign part-number ranges, per
					//       rank, and then have nice locality here.
					//
					//       Alternatively, we could have everyone format their own
					//       XML text and send that, instead of just the tags.  This
					//       would increase the amount of data being sent, but would
					//       reduce the work for rank0 to format everything.

               size_t  i_max;            // outer-loop
               size_t  j_max;            // inner loop
					size_t  start_multiplier; // initial offset in collected data
					size_t  stride;           // in etag_vec

					if (segmented) {          // segmented
                  i_max            = param->numTasks;
                  j_max            = etags_per_rank;
						start_multiplier = etag_data_size;		/* one rank's-worth of Etag data */
						stride           = ETAG_SIZE;				/* one ETag */
					}
					else {                    // strided
                  i_max            = etags_per_rank;
                  j_max            = param->numTasks;
						start_multiplier = ETAG_SIZE;				/* one ETag */
						stride           = etag_data_size;		/* one rank's-worth of Etag data */
					}


					xml = aws_iobuf_new();
					aws_iobuf_growth_size(xml, 1024 * 8);

					// write XML header ...
					aws_iobuf_append_str(xml, "<CompleteMultipartUpload>\n");

					int part = 0;
					for (i=0; i<i_max; ++i) {

						etag_ptr=etag_vec + (i * start_multiplier);

						for (j=0; j<j_max; ++j) {

							// etags were saved as contiguous text.  Extract the next one.
							char etag[ETAG_SIZE +1];
							memcpy(etag, etag_ptr, ETAG_SIZE);
							etag[ETAG_SIZE] = 0;

							// write XML for next part, with Etag ...
							snprintf(buff, BUFF_SIZE,
										"  <Part>\n"
										"    <PartNumber>%d</PartNumber>\n"
										"    <ETag>%s</ETag>\n"
										"  </Part>\n",
										part, etag);

							aws_iobuf_append_str(xml, buff);

							etag_ptr += stride;
							++ part;
						}
					}

					// write XML tail ...
					aws_iobuf_append_str(xml, "</CompleteMultipartUpload>\n");
				}

				else {
					MPI_Gather(etag_data, etag_data_size, MPI_BYTE,
								  NULL,      etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);
				}
			}

			else {   /* N:N */

				xml = aws_iobuf_new();
				aws_iobuf_growth_size(xml, 1024 * 8);

				// write XML header ...
				aws_iobuf_append_str(xml, "<CompleteMultipartUpload>\n");

				// all parts of our object were written from this rank.
				char etag[ETAG_SIZE +1];
				int  part = 0;
				int  i;
				for (i=0; i<etags_per_rank; ++i) {

					// TBD: Instead of reading into etag, then sprintf'ing, then
					// copying into xml, we could just read directly into xml
					int sz = aws_iobuf_get_raw(param->etags, etag, ETAG_SIZE);
					if (sz != ETAG_SIZE) {
						snprintf(buff, BUFF_SIZE,
									"Read of ETag %d had length %d (not %d)\n",
									rank, i, sz, ETAG_SIZE);
						ERR_SIMPLE(buff);
					}
					etag[ETAG_SIZE] = 0;


					// write XML for next part, with Etag ...
					snprintf(buff, BUFF_SIZE,
								"  <Part>\n"
								"    <PartNumber>%d</PartNumber>\n"
								"    <ETag>%s</ETag>\n"
								"  </Part>\n",
								part, etag);

					aws_iobuf_append_str(xml, buff);

					++ part;
				}

				// write XML tail ...
				aws_iobuf_append_str(xml, "</CompleteMultipartUpload>\n");
			}



			// send request to finalize MPU
			if (n_to_n || (rank == 0)) {

				// DEBUGGING: show the XML we constructed
				if (param->verbose >= VERBOSE_3)
					debug_iobuf(xml, 1, 1);

				// --- POST our XML to the server.
				snprintf(buff, BUFF_SIZE,
							"%s?uploadId=%s",
							fname, param->UploadId);

				AWS4C_CHECK   ( s3_post(xml, buff) );
				AWS4C_CHECK_OK( xml );

				aws_iobuf_free(xml);
			}


			// everybody reset MPU info.  Allows another MPU, and frees memory.
			s3_MPU_reset(param);

			// Everybody meetup, so non-zero ranks won't go trying to stat the
			// N:1 file until rank0 has finished the S3 multi-part finalize.
			// The object will not appear to exist, until then.
			if (n_to_1)
				MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");
		}
		else {

			// No finalization is needed, when using EMC's byte-range writing
         // support.  However, we do need to make sure everyone has
         // finished writing, before anyone starts reading.
			if (n_to_1) {
            MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");
				if (param->verbose >= VERBOSE_2)
               printf("rank %d: passed barrier\n", rank);
         }
		}

		// After writing, reset the CURL connection, so that caches won't be
		// used for reads.
		aws_reset_connection();
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Close\n");
	}
}