Пример #1
0
static
void *
S3_Create_Or_Open_internal(char*         testFileName,
                           IOR_param_t*  param,
                           unsigned char createFile,
									int           multi_part_upload_p ) {

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n",
				 testFileName, createFile, multi_part_upload_p);
	}

	/* initialize curl, if needed */
	s3_connect( param );

	/* Check for unsupported flags */
	if ( param->openFlags & IOR_EXCL ) {
		fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" );
	}
	if ( param->useO_DIRECT == TRUE ) {
		fprintf( stdout, "Direct I/O mode is not implemented in S3\n" );
	}

	// easier to think
	int n_to_n = param->filePerProc;
	int n_to_1 = ! n_to_n;

	/* check whether object needs reset to zero-length */
	int needs_reset = 0;
	if (! multi_part_upload_p)
		needs_reset = 1;			  /* so "append" can work */
	else if ( param->openFlags & IOR_TRUNC )
		needs_reset = 1;			  /* so "append" can work */
	else if (createFile) {
		// AWS4C_CHECK( s3_head(param->io_buf, testFileName) );
		// if ( ! AWS4C_OK(param->io_buf) )
			needs_reset = 1;
	}

	if ( param->open == WRITE ) {

		/* initializations for N:1 or N:N writes using multi-part upload */
		if (multi_part_upload_p) {

			// For N:N, all ranks do their own MPU open/close.  For N:1, only
			// rank0 does that. Either way, the response from the server
			// includes an "uploadId", which must be used to upload parts to
			// the same object.
			if ( n_to_n || (rank == 0) ) {

				// rank0 handles truncate
				if ( needs_reset) {
					aws_iobuf_reset(param->io_buf);
					AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); /* 0-length write */
					AWS4C_CHECK_OK( param->io_buf );
				}

				// POST request with URL+"?uploads" initiates multi-part upload
				snprintf(buff, BUFF_SIZE, "%s?uploads", testFileName);
				IOBuf* response = aws_iobuf_new();
				AWS4C_CHECK( s3_post2(param->io_buf, buff, NULL, response) );
				AWS4C_CHECK_OK( param->io_buf );

				// parse XML returned from server, into a tree structure
				aws_iobuf_realloc(response);
				xmlDocPtr doc = xmlReadMemory(response->first->buf,
														response->first->len,
														NULL, NULL, 0);
				if (doc == NULL)
					ERR_SIMPLE("Rank0 Failed to find POST response\n");

				// navigate parsed XML-tree to find UploadId
				xmlNode* root_element = xmlDocGetRootElement(doc);
				const char* upload_id = find_element_named(root_element, (char*)"UploadId");
				if (! upload_id)
					ERR_SIMPLE("couldn't find 'UploadId' in returned XML\n");

				if (param->verbose >= VERBOSE_3)
					printf("got UploadId = '%s'\n", upload_id);

				const size_t upload_id_len = strlen(upload_id);
				if (upload_id_len > MAX_UPLOAD_ID_SIZE) {
					snprintf(buff, BUFF_SIZE,
								"UploadId length %d exceeds expected max (%d)",
								upload_id_len, MAX_UPLOAD_ID_SIZE);
					ERR_SIMPLE(buff);
				}

				// save the UploadId we found
				memcpy(param->UploadId, upload_id, upload_id_len);
				param->UploadId[upload_id_len] = 0;

				// free storage for parsed XML tree
				xmlFreeDoc(doc);
				aws_iobuf_free(response);

				// For N:1, share UploadId across all ranks
				if (n_to_1)
					MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm);
			}
			else
				// N:1, and we're not rank0. recv UploadID from Rank 0
				MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm);
		}

		/* initializations for N:N or N:1 writes using EMC byte-range extensions */
		else {

			/* maybe reset to zero-length, so "append" can work */
			if (needs_reset) {

            if (verbose >= VERBOSE_3) {
               fprintf( stdout, "rank %d resetting\n",
                        rank);
            }

				aws_iobuf_reset(param->io_buf);
				AWS4C_CHECK( s3_put(param->io_buf, testFileName) );
				AWS4C_CHECK_OK( param->io_buf );
			}
		}
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Create_Or_Open\n");
	}
	return ((void *) testFileName );
}
Пример #2
0
int
main (int argc, char *argv[]) {
  aws_init();
  if(argv[3] != NULL) {
    aws_set_debug(atoi(argv[3]));
  }
  IOBuf * aws_buf = aws_iobuf_new();
  
  // Read credential file
  int rv = aws_read_config("myteksi");
  if ( rv )
  {
    fprintf(stderr, "Could not find a credential in the config file \n" );
    fprintf(stderr, "Make sure your ~/.awsAuth file is correct \n" );
    exit (1);
  }
  
  
  // Read config file
  FILE *fp = NULL;
  
  char getline[ LINE_MAX * sizeof(char) ];
  if( (fp = fopen("s3config", "r")) == NULL) {
    //File does not exist. Initialize it
    if( (fp = fopen("s3config", "w+")) == NULL) {
      fprintf(stderr, "ERROR: Unable to create config file.\n");
      exit(0);
    }
    
    // Ask for bucket_name
    fprintf(stdout, "Config file doesn't exist yet! Creating one now. \n");
    fprintf(stdout, "Please specify the AWS S3 base address "
                    "[default s3.amazonaws.com] :");
    char getInput[ LINE_MAX * sizeof(char) ];
    if( fgets( getInput, sizeof(getInput) , stdin ) != NULL ) {
      if( strcmp(getInput, "\n") != 0 ) {
        S3_host = strndup(getInput, strlen(getInput) -1); // Remove trailing NL
      }
      else {
        S3_host = strdup("s3.amazonaws.com");
      }
    }
    
    int validbucketname = 0;
    while( !validbucketname ) {
      fprintf(stdout, "Please specify the bucket name: ");
      if( fgets( getInput, sizeof(getInput) , stdin ) != NULL ) {
        bucketname = strndup(getInput, strlen(getInput) -1);
        validbucketname = 1;
      }
    }
    
    char * buf = malloc( snprintf(NULL, 0, "S3_Base_Address=\"%s\"\n"
                                  "bucket_name=\"%s\"\n", S3_host, bucketname));
    sprintf(buf, "S3_Base_Address=\"%s\"\n"
                 "bucket_name=\"%s\"\n", S3_host, bucketname );
    
    if( fputs( buf, fp ) == EOF ) {
      fprintf(stderr, "ERROR: Unable to create config file.\n");
    }
  }
  // Config file exist, parse it
  else {
    char    delim[4] = {'=', '\"', '\n', '\0'};
    char*   left;
    char*   right;
    
    while( fgets( getline, sizeof(getline) , fp ) != NULL ) {
      if( (left = strtok(getline, delim)) != NULL ) {
        right = strtok(NULL, delim);
      }
      else {
        //Empty Line
      }
      
      // Match the strings
      char* comparison = "S3_Base_Address";
      if( strcmp(left, comparison) == 0) {
        if(right != NULL) {
          S3_host = strdup(right);
        }
        else {
          S3_host = strdup("s3.amazonaws.com");
        }
      }
      
      comparison = "bucket_name";
      if( strcmp(left, comparison) == 0 && right != NULL) {
          bucketname = strdup(right);
      }
    }  // End while
    
    if( S3_host == NULL || bucketname == NULL ) {
      fprintf(stderr, "ERROR: Invalid entry in config file.\n");
    }
  }
  
  // Set parameters in S3 library
  s3_set_host(S3_host);
  s3_set_bucket(bucketname);
  s3_set_acl(S3_acl);
  
  // Check for valid arguments
  if ( argc != 3 && argc != 4 ) {
    fprintf(stderr, "Usage: s3util <operation> <filename>\n");
    fprintf(stderr, "Operation can be one of {PUT, GET, DELETE}\n");
    exit(1);
  }
  // Check if operation is valid
  operation = strdup(argv[1]);
  filename  = strdup(argv[2]);
  
  // PUT file
  if( strcmp(operation, "PUT") == 0 ) {
    int rc;
    char s3replyMD5[33];
    
    rv = put_file( aws_buf, filename );
    rc = -1;
    if( aws_buf->eTag != NULL && strlen(aws_buf->eTag) > 2 ) {
      memset(s3replyMD5, 0, 33);
      memcpy(s3replyMD5, aws_buf->eTag + 1, 32);
      rc = verifyMD5(filename, s3replyMD5);
    }
    if(rv != 0 || rc != 0) {
      printf ( "PUT operation was unsuccessful \n" );
      return rc;
    }
    printf ( "MD5SUM matches, file uploaded successfully \n" );
  }
  
  // GET file
  else if( strcmp(operation, "GET") == 0 ) {
    rv = get_file( aws_buf, filename );
    if(rv == 0 && aws_buf->code == 200) {
      printf ( "File was successfully downloaded \n" );
    }
    else {
      printf ( "GET operation was unsuccessful \n" );
      return(-1);
    }
  }
  
  // DELETE FILE
  else if( strcmp(operation, "DELETE") == 0 ) {
    rv = delete_file( aws_buf, filename );
    if(rv == 0 && aws_buf->code == 204) {
      printf ( "File was successfully deleted \n" );
    }
    else {
      printf ( "DELETE operation was unsuccessful \n" );
      return(-1);
    }
  }
  else {
    fprintf(stderr, "Invalid operation, operation must be one of "
    "{PUT, GET, DELETE}\n");
    return(1);
  }
  
  /*
  printf ( "RV %d\n", rv );
  printf ( "CODE    [%d] \n", aws_buf->code );
  printf ( "RESULT  [%s] \n", aws_buf->result );
  printf ( "LEN     [%d] \n", aws_buf->len );
  printf ( "LASTMOD [%s] \n", aws_buf->lastMod );
  printf ( "ETAG    [%s] \n", aws_buf->eTag );
  */
  
  aws_iobuf_free(aws_buf);
  
  global_free();
  return 0;
}
Пример #3
0
static
void
S3_Close_internal( void*         fd,
						 IOR_param_t*  param,
						 int           multi_part_upload_p ) {

	char* fname = (char*)fd; /* see NOTE above S3_Create_Or_Open() */

	// easier to think
	int n_to_n    = param->filePerProc;
	int n_to_1    = (! n_to_n);
   int segmented = (param->segmentCount == 1);

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Close('%s', ,%d) %s\n",
				 fname,
             multi_part_upload_p,
             ((n_to_n) ? "N:N" : ((segmented) ? "N:1(seg)" : "N:1(str)")));
	}

	if (param->open == WRITE) {


		// finalizing Multi-Part Upload (for N:1 or N:N)
		if (multi_part_upload_p) {


			size_t etag_data_size = param->etags->write_count; /* local ETag data (bytes) */
			size_t etags_per_rank = etag_data_size / ETAG_SIZE;		/* number of local etags */

			// --- create XML containing ETags in an IOBuf for "close" request
			IOBuf* xml = NULL;


			if (n_to_1) {

				// for N:1, gather all Etags at Rank0
				MPI_Datatype mpi_size_t;
				if (sizeof(size_t) == sizeof(int))
					mpi_size_t = MPI_INT;
				else if (sizeof(size_t) == sizeof(long))
					mpi_size_t = MPI_LONG;
				else
					mpi_size_t = MPI_LONG_LONG;

				// Everybody should have the same number of ETags (?)
				size_t etag_count_max = 0;		 /* highest number on any proc */
				MPI_Allreduce(&etags_per_rank, &etag_count_max,
								  1, mpi_size_t, MPI_MAX, param->testComm);
				if (etags_per_rank != etag_count_max) {
					printf("Rank %d: etag count mismatch: max:%d, mine:%d\n",
							 rank, etag_count_max, etags_per_rank);
					MPI_Abort(param->testComm, 1);
				}

				// collect ETag data at Rank0
				aws_iobuf_realloc(param->etags);             /* force single contiguous buffer */
				char* etag_data = param->etags->first->buf;  /* per-rank data, contiguous */

				if (rank == 0)  {
					char* etag_ptr;
					int   i;
					int   j;
					int   rnk;

					char* etag_vec = (char*)malloc((param->numTasks * etag_data_size) +1);
					if (! etag_vec) {
						fprintf(stderr, "rank 0 failed to malloc %d bytes\n",
								  param->numTasks * etag_data_size);
						MPI_Abort(param->testComm, 1);
					}
					MPI_Gather(etag_data, etag_data_size, MPI_BYTE,
								  etag_vec,  etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);

					// --- debugging: show the gathered etag data
					//     (This shows the raw concatenated etag-data from each node.)
					if (param->verbose >= VERBOSE_4) {

						printf("rank 0: gathered %d etags from all ranks:\n", etags_per_rank);
						etag_ptr=etag_vec;
						for (rnk=0; rnk<param->numTasks; ++rnk) {
							printf("\t[%d]: '", rnk);

							int ii;
							for (ii=0; ii<etag_data_size; ++ii)	/* NOT null-terminated! */
								printf("%c", etag_ptr[ii]);

							printf("'\n");
							etag_ptr += etag_data_size;
						}
					}


					// add XML for *all* the parts.  The XML must be ordered by
					// part-number.  Each rank wrote <etags_per_rank> parts,
					// locally.  At rank0, the etags for each rank are now
					// stored as a continguous block of text, with the blocks
					// stored in rank order in etag_vec.  In other words, our
					// internal rep at rank 0 matches the "segmented" format.
					// From this, we must select etags in an order matching how
					// they appear in the actual object, and give sequential
					// part-numbers to the resulting sequence.
					//
					// That ordering of parts in the actual written object
					// varies according to whether we wrote in the "segmented"
					// or "strided" format.
					//
					//     supposing N ranks, and P parts per rank:
					//
					// segmented:
					//
					//     all parts for a given rank are consecutive.
					//     rank r writes these parts:
					//
					//     rP, rP+1, ... (r+1)P -1
					//
					//     i.e. rank0 writes parts 0,1,2,3 ... P-1
					//
					//
					// strided:
					//
					//     rank r writes every P-th part, starting with r.
					//
					//     r, P+r, ... (P-1)P + r
					//
					//     i.e. rank0 writes parts 0,P,2P,3P ... (P-1)P
					//
					//
					// NOTE: If we knew ahead of time how many parts each rank was
					//       going to write, we could assign part-number ranges, per
					//       rank, and then have nice locality here.
					//
					//       Alternatively, we could have everyone format their own
					//       XML text and send that, instead of just the tags.  This
					//       would increase the amount of data being sent, but would
					//       reduce the work for rank0 to format everything.

               size_t  i_max;            // outer-loop
               size_t  j_max;            // inner loop
					size_t  start_multiplier; // initial offset in collected data
					size_t  stride;           // in etag_vec

					if (segmented) {          // segmented
                  i_max            = param->numTasks;
                  j_max            = etags_per_rank;
						start_multiplier = etag_data_size;		/* one rank's-worth of Etag data */
						stride           = ETAG_SIZE;				/* one ETag */
					}
					else {                    // strided
                  i_max            = etags_per_rank;
                  j_max            = param->numTasks;
						start_multiplier = ETAG_SIZE;				/* one ETag */
						stride           = etag_data_size;		/* one rank's-worth of Etag data */
					}


					xml = aws_iobuf_new();
					aws_iobuf_growth_size(xml, 1024 * 8);

					// write XML header ...
					aws_iobuf_append_str(xml, "<CompleteMultipartUpload>\n");

					int part = 0;
					for (i=0; i<i_max; ++i) {

						etag_ptr=etag_vec + (i * start_multiplier);

						for (j=0; j<j_max; ++j) {

							// etags were saved as contiguous text.  Extract the next one.
							char etag[ETAG_SIZE +1];
							memcpy(etag, etag_ptr, ETAG_SIZE);
							etag[ETAG_SIZE] = 0;

							// write XML for next part, with Etag ...
							snprintf(buff, BUFF_SIZE,
										"  <Part>\n"
										"    <PartNumber>%d</PartNumber>\n"
										"    <ETag>%s</ETag>\n"
										"  </Part>\n",
										part, etag);

							aws_iobuf_append_str(xml, buff);

							etag_ptr += stride;
							++ part;
						}
					}

					// write XML tail ...
					aws_iobuf_append_str(xml, "</CompleteMultipartUpload>\n");
				}

				else {
					MPI_Gather(etag_data, etag_data_size, MPI_BYTE,
								  NULL,      etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);
				}
			}

			else {   /* N:N */

				xml = aws_iobuf_new();
				aws_iobuf_growth_size(xml, 1024 * 8);

				// write XML header ...
				aws_iobuf_append_str(xml, "<CompleteMultipartUpload>\n");

				// all parts of our object were written from this rank.
				char etag[ETAG_SIZE +1];
				int  part = 0;
				int  i;
				for (i=0; i<etags_per_rank; ++i) {

					// TBD: Instead of reading into etag, then sprintf'ing, then
					// copying into xml, we could just read directly into xml
					int sz = aws_iobuf_get_raw(param->etags, etag, ETAG_SIZE);
					if (sz != ETAG_SIZE) {
						snprintf(buff, BUFF_SIZE,
									"Read of ETag %d had length %d (not %d)\n",
									rank, i, sz, ETAG_SIZE);
						ERR_SIMPLE(buff);
					}
					etag[ETAG_SIZE] = 0;


					// write XML for next part, with Etag ...
					snprintf(buff, BUFF_SIZE,
								"  <Part>\n"
								"    <PartNumber>%d</PartNumber>\n"
								"    <ETag>%s</ETag>\n"
								"  </Part>\n",
								part, etag);

					aws_iobuf_append_str(xml, buff);

					++ part;
				}

				// write XML tail ...
				aws_iobuf_append_str(xml, "</CompleteMultipartUpload>\n");
			}



			// send request to finalize MPU
			if (n_to_n || (rank == 0)) {

				// DEBUGGING: show the XML we constructed
				if (param->verbose >= VERBOSE_3)
					debug_iobuf(xml, 1, 1);

				// --- POST our XML to the server.
				snprintf(buff, BUFF_SIZE,
							"%s?uploadId=%s",
							fname, param->UploadId);

				AWS4C_CHECK   ( s3_post(xml, buff) );
				AWS4C_CHECK_OK( xml );

				aws_iobuf_free(xml);
			}


			// everybody reset MPU info.  Allows another MPU, and frees memory.
			s3_MPU_reset(param);

			// Everybody meetup, so non-zero ranks won't go trying to stat the
			// N:1 file until rank0 has finished the S3 multi-part finalize.
			// The object will not appear to exist, until then.
			if (n_to_1)
				MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");
		}
		else {

			// No finalization is needed, when using EMC's byte-range writing
         // support.  However, we do need to make sure everyone has
         // finished writing, before anyone starts reading.
			if (n_to_1) {
            MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");
				if (param->verbose >= VERBOSE_2)
               printf("rank %d: passed barrier\n", rank);
         }
		}

		// After writing, reset the CURL connection, so that caches won't be
		// used for reads.
		aws_reset_connection();
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Close\n");
	}
}