Exemplo n.º 1
0
Arquivo: s3_put.c Projeto: cheah/aws4c
int main ( int argc, char * argv[] )
{
  int rv;

  aws_init ();
  aws_set_debug  ( 0 );
  int rc = aws_read_config  ( "sample" );
  if ( rc )
    {
      puts ( "Could not find a credential in the config file" );
      puts ( "Make sure your ~/.awsAuth file is correct" );
      exit ( 1 );
    }

  s3_set_bucket ("aws4c.samples");
  s3_set_mime ("text/plain");
  s3_set_acl ("public-read");

  IOBuf * bf = aws_iobuf_new ();

  rv = putObject ( "aws4c.samplefile", bf );
  printf ( "RV %d\n", rv );

  printf ( "CODE    [%d] \n", bf->code );
  printf ( "RESULT  [%s] \n", bf->result );
  printf ( "LEN     [%d] \n", bf->len );
  printf ( "LASTMOD [%s] \n", bf->lastMod );
  printf ( "ETAG    [%s] \n", bf->eTag );

  while(-1)
    {
  char Ln[1024];
  int sz = aws_iobuf_getdata ( bf, Ln, sizeof(Ln));
  if ( Ln[0] == 0 ) break;
    printf ( "S[%3d] %s", sz, Ln );
    }

  /// Now Repeat using the RRS
  bf = aws_iobuf_new ();
  aws_set_rrs ( 1 ) ;
  rv = putObject ( "aws4c.samplefile.rrs", bf );
  printf ( "RV %d\n", rv );
  printf ( "CODE    [%d] \n", bf->code );
  printf ( "RESULT  [%s] \n", bf->result );
  printf ( "LEN     [%d] \n", bf->len );
  printf ( "LASTMOD [%s] \n", bf->lastMod );
  printf ( "ETAG    [%s] \n", bf->eTag );

  aws_deinit ();
  return 0;
}
Exemplo n.º 2
0
Arquivo: s3util.c Projeto: cheah/aws4c
int
main (int argc, char *argv[]) {
  aws_init();
  if(argv[3] != NULL) {
    aws_set_debug(atoi(argv[3]));
  }
  IOBuf * aws_buf = aws_iobuf_new();
  
  // Read credential file
  int rv = aws_read_config("myteksi");
  if ( rv )
  {
    fprintf(stderr, "Could not find a credential in the config file \n" );
    fprintf(stderr, "Make sure your ~/.awsAuth file is correct \n" );
    exit (1);
  }
  
  
  // Read config file
  FILE *fp = NULL;
  
  char getline[ LINE_MAX * sizeof(char) ];
  if( (fp = fopen("s3config", "r")) == NULL) {
    //File does not exist. Initialize it
    if( (fp = fopen("s3config", "w+")) == NULL) {
      fprintf(stderr, "ERROR: Unable to create config file.\n");
      exit(0);
    }
    
    // Ask for bucket_name
    fprintf(stdout, "Config file doesn't exist yet! Creating one now. \n");
    fprintf(stdout, "Please specify the AWS S3 base address "
                    "[default s3.amazonaws.com] :");
    char getInput[ LINE_MAX * sizeof(char) ];
    if( fgets( getInput, sizeof(getInput) , stdin ) != NULL ) {
      if( strcmp(getInput, "\n") != 0 ) {
        S3_host = strndup(getInput, strlen(getInput) -1); // Remove trailing NL
      }
      else {
        S3_host = strdup("s3.amazonaws.com");
      }
    }
    
    int validbucketname = 0;
    while( !validbucketname ) {
      fprintf(stdout, "Please specify the bucket name: ");
      if( fgets( getInput, sizeof(getInput) , stdin ) != NULL ) {
        bucketname = strndup(getInput, strlen(getInput) -1);
        validbucketname = 1;
      }
    }
    
    char * buf = malloc( snprintf(NULL, 0, "S3_Base_Address=\"%s\"\n"
                                  "bucket_name=\"%s\"\n", S3_host, bucketname));
    sprintf(buf, "S3_Base_Address=\"%s\"\n"
                 "bucket_name=\"%s\"\n", S3_host, bucketname );
    
    if( fputs( buf, fp ) == EOF ) {
      fprintf(stderr, "ERROR: Unable to create config file.\n");
    }
  }
  // Config file exist, parse it
  else {
    char    delim[4] = {'=', '\"', '\n', '\0'};
    char*   left;
    char*   right;
    
    while( fgets( getline, sizeof(getline) , fp ) != NULL ) {
      if( (left = strtok(getline, delim)) != NULL ) {
        right = strtok(NULL, delim);
      }
      else {
        //Empty Line
      }
      
      // Match the strings
      char* comparison = "S3_Base_Address";
      if( strcmp(left, comparison) == 0) {
        if(right != NULL) {
          S3_host = strdup(right);
        }
        else {
          S3_host = strdup("s3.amazonaws.com");
        }
      }
      
      comparison = "bucket_name";
      if( strcmp(left, comparison) == 0 && right != NULL) {
          bucketname = strdup(right);
      }
    }  // End while
    
    if( S3_host == NULL || bucketname == NULL ) {
      fprintf(stderr, "ERROR: Invalid entry in config file.\n");
    }
  }
  
  // Set parameters in S3 library
  s3_set_host(S3_host);
  s3_set_bucket(bucketname);
  s3_set_acl(S3_acl);
  
  // Check for valid arguments
  if ( argc != 3 && argc != 4 ) {
    fprintf(stderr, "Usage: s3util <operation> <filename>\n");
    fprintf(stderr, "Operation can be one of {PUT, GET, DELETE}\n");
    exit(1);
  }
  // Check if operation is valid
  operation = strdup(argv[1]);
  filename  = strdup(argv[2]);
  
  // PUT file
  if( strcmp(operation, "PUT") == 0 ) {
    int rc;
    char s3replyMD5[33];
    
    rv = put_file( aws_buf, filename );
    rc = -1;
    if( aws_buf->eTag != NULL && strlen(aws_buf->eTag) > 2 ) {
      memset(s3replyMD5, 0, 33);
      memcpy(s3replyMD5, aws_buf->eTag + 1, 32);
      rc = verifyMD5(filename, s3replyMD5);
    }
    if(rv != 0 || rc != 0) {
      printf ( "PUT operation was unsuccessful \n" );
      return rc;
    }
    printf ( "MD5SUM matches, file uploaded successfully \n" );
  }
  
  // GET file
  else if( strcmp(operation, "GET") == 0 ) {
    rv = get_file( aws_buf, filename );
    if(rv == 0 && aws_buf->code == 200) {
      printf ( "File was successfully downloaded \n" );
    }
    else {
      printf ( "GET operation was unsuccessful \n" );
      return(-1);
    }
  }
  
  // DELETE FILE
  else if( strcmp(operation, "DELETE") == 0 ) {
    rv = delete_file( aws_buf, filename );
    if(rv == 0 && aws_buf->code == 204) {
      printf ( "File was successfully deleted \n" );
    }
    else {
      printf ( "DELETE operation was unsuccessful \n" );
      return(-1);
    }
  }
  else {
    fprintf(stderr, "Invalid operation, operation must be one of "
    "{PUT, GET, DELETE}\n");
    return(1);
  }
  
  /*
  printf ( "RV %d\n", rv );
  printf ( "CODE    [%d] \n", aws_buf->code );
  printf ( "RESULT  [%s] \n", aws_buf->result );
  printf ( "LEN     [%d] \n", aws_buf->len );
  printf ( "LASTMOD [%s] \n", aws_buf->lastMod );
  printf ( "ETAG    [%s] \n", aws_buf->eTag );
  */
  
  aws_iobuf_free(aws_buf);
  
  global_free();
  return 0;
}
Exemplo n.º 3
0
/******************************************************************************
* Name  pack_objects 
* 
* This function traverses the object and file link lists and reads object 
* data for repacking into a new object.   
******************************************************************************/
int pack_objects(File_Handles *file_info, repack_objects *objects)
{
   struct stat statbuf;
   char *path = "/";
//   repack_objects *objects; 

	 //struct stat statbuf;
   stat(path, &statbuf);
   size_t write_offset = 0;
   size_t obj_raw_size;
   size_t obj_size;
   size_t offset;
   //MarFS_XattrPre pre_struct;
   //MarFS_XattrPre* pre = &pre_struct;
   MarFS_XattrPre pre;
   IOBuf *nb = aws_iobuf_new();
   char test_obj[2048];
   obj_files *files;
   int ret;
   char *obj_ptr;
   CURLcode s3_return;
   char pre_str[MARFS_MAX_XATTR_SIZE];


   // Also, if file_count =1 do i make uni or?
   //
   //
   while (objects) { 
      // need inner loop to get files for each object
      // If chunk_count == file count no need to pack
      // and garbage collection took care of it
      if (objects->chunk_count == objects->pack_count) {
         objects=objects->next;
         continue;
      }
      //No need to pack if only one file specified in xattr and only
      //one file found
      if (objects->chunk_count == 1 && objects->pack_count ==1 ) {
         objects=objects->next;
         continue;
      }
      // Not quite sure how this next condition could happen
      // TO DO:  make only one contion chunk_count > file_count
      // all others continue
      if (objects->pack_count > objects->chunk_count) {
         objects=objects->next;
         continue;
      }

      LOG(LOG_INFO,"object = %s\n", objects->objid);
      LOG(LOG_INFO, "file count = %ld chunks = %ld\n", objects->pack_count, objects->chunk_count);
      files = objects->files_ptr;
      write_offset = 0;
      ret=str_2_pre(&pre, objects->objid, NULL);
      sprintf(test_obj,"%s.teste",objects->objid);

      //Make this a unique object since it derived from an existing object 
      pre.unique++;    


      LOG(LOG_INFO,"stdout,new object name =%s\n", test_obj);
  
      //aws_iobuf_reset(nb);

      while (files) {
         //fprintf(stdout, "file = %s offset=%ld\n", files->filename, files->offset);

         stat(files->filename, &statbuf);


         obj_raw_size = statbuf.st_size;
         obj_size = obj_raw_size + MARFS_REC_UNI_SIZE;
         files->size = obj_size;

         //fprintf(stdout, "obj_size = %ld REC SIZE = %d\n", obj_size,MARFS_REC_UNI_SIZE);
         //write_offset+=obj_size;
         if ((obj_ptr = (char *)malloc(obj_size))==NULL) {
            fprintf(stderr, "Error allocating memory\n");
            return -1;
         }

         check_security_access(&pre);
         update_pre(&pre);
         s3_set_host(pre.host);
         //offset = objects->files_ptr->offset;

         offset = files->original_offset;
         //fprintf(stdout, "file %s will get re-written at offset %ld\n",
         //        files->filename, write_offset);

         // get object_data
         // Using byte range to get data for particular offsets
         s3_set_byte_range(offset, obj_size);
         // Use extend to get more buffering capability on each get
         aws_iobuf_extend_dynamic(nb, obj_ptr, obj_size);
         LOG(LOG_INFO, "going to get file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size);
         fprintf(file_info->outfd, "Getting file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size);
         s3_return = s3_get(nb,objects->objid);
         check_S3_error(s3_return, nb, S3_GET);

         LOG(LOG_INFO, "Read buffer write count = %ld  len = %ld\n", nb->write_count, nb->len);
         // may have to copy nb to a new buffer 
         // then write 
     

         files->new_offset = write_offset;
         write_offset += obj_size; 
	 files = files->next;
      }
      // create object string for put
      pre_2_str(pre_str, MARFS_MAX_XATTR_SIZE,&pre);

      strcpy(objects->new_objid, pre_str);
     
      LOG(LOG_INFO, "Going to write to object %s\n", pre_str);
      fprintf(file_info->outfd, "Writing file to object %s\n", pre_str);

      // Write data back to new object
      s3_put(nb,pre_str);
      check_S3_error(s3_return, nb, S3_PUT); 

      aws_iobuf_reset_hard(nb);
      objects=objects->next;
   }
   return 0;
}
Exemplo n.º 4
0
static
void *
S3_Create_Or_Open_internal(char*         testFileName,
                           IOR_param_t*  param,
                           unsigned char createFile,
									int           multi_part_upload_p ) {

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n",
				 testFileName, createFile, multi_part_upload_p);
	}

	/* initialize curl, if needed */
	s3_connect( param );

	/* Check for unsupported flags */
	if ( param->openFlags & IOR_EXCL ) {
		fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" );
	}
	if ( param->useO_DIRECT == TRUE ) {
		fprintf( stdout, "Direct I/O mode is not implemented in S3\n" );
	}

	// easier to think
	int n_to_n = param->filePerProc;
	int n_to_1 = ! n_to_n;

	/* check whether object needs reset to zero-length */
	int needs_reset = 0;
	if (! multi_part_upload_p)
		needs_reset = 1;			  /* so "append" can work */
	else if ( param->openFlags & IOR_TRUNC )
		needs_reset = 1;			  /* so "append" can work */
	else if (createFile) {
		// AWS4C_CHECK( s3_head(param->io_buf, testFileName) );
		// if ( ! AWS4C_OK(param->io_buf) )
			needs_reset = 1;
	}

	if ( param->open == WRITE ) {

		/* initializations for N:1 or N:N writes using multi-part upload */
		if (multi_part_upload_p) {

			// For N:N, all ranks do their own MPU open/close.  For N:1, only
			// rank0 does that. Either way, the response from the server
			// includes an "uploadId", which must be used to upload parts to
			// the same object.
			if ( n_to_n || (rank == 0) ) {

				// rank0 handles truncate
				if ( needs_reset) {
					aws_iobuf_reset(param->io_buf);
					AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); /* 0-length write */
					AWS4C_CHECK_OK( param->io_buf );
				}

				// POST request with URL+"?uploads" initiates multi-part upload
				snprintf(buff, BUFF_SIZE, "%s?uploads", testFileName);
				IOBuf* response = aws_iobuf_new();
				AWS4C_CHECK( s3_post2(param->io_buf, buff, NULL, response) );
				AWS4C_CHECK_OK( param->io_buf );

				// parse XML returned from server, into a tree structure
				aws_iobuf_realloc(response);
				xmlDocPtr doc = xmlReadMemory(response->first->buf,
														response->first->len,
														NULL, NULL, 0);
				if (doc == NULL)
					ERR_SIMPLE("Rank0 Failed to find POST response\n");

				// navigate parsed XML-tree to find UploadId
				xmlNode* root_element = xmlDocGetRootElement(doc);
				const char* upload_id = find_element_named(root_element, (char*)"UploadId");
				if (! upload_id)
					ERR_SIMPLE("couldn't find 'UploadId' in returned XML\n");

				if (param->verbose >= VERBOSE_3)
					printf("got UploadId = '%s'\n", upload_id);

				const size_t upload_id_len = strlen(upload_id);
				if (upload_id_len > MAX_UPLOAD_ID_SIZE) {
					snprintf(buff, BUFF_SIZE,
								"UploadId length %d exceeds expected max (%d)",
								upload_id_len, MAX_UPLOAD_ID_SIZE);
					ERR_SIMPLE(buff);
				}

				// save the UploadId we found
				memcpy(param->UploadId, upload_id, upload_id_len);
				param->UploadId[upload_id_len] = 0;

				// free storage for parsed XML tree
				xmlFreeDoc(doc);
				aws_iobuf_free(response);

				// For N:1, share UploadId across all ranks
				if (n_to_1)
					MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm);
			}
			else
				// N:1, and we're not rank0. recv UploadID from Rank 0
				MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm);
		}

		/* initializations for N:N or N:1 writes using EMC byte-range extensions */
		else {

			/* maybe reset to zero-length, so "append" can work */
			if (needs_reset) {

            if (verbose >= VERBOSE_3) {
               fprintf( stdout, "rank %d resetting\n",
                        rank);
            }

				aws_iobuf_reset(param->io_buf);
				AWS4C_CHECK( s3_put(param->io_buf, testFileName) );
				AWS4C_CHECK_OK( param->io_buf );
			}
		}
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Create_Or_Open\n");
	}
	return ((void *) testFileName );
}
Exemplo n.º 5
0
static void s3_connect( IOR_param_t* param ) {
	if (param->verbose >= VERBOSE_2) {
		printf("-> s3_connect\n"); /* DEBUGGING */
	}

	if ( param->curl_flags & IOR_CURL_INIT ) {
		if (param->verbose >= VERBOSE_2) {
			printf("<- s3_connect  [nothing to do]\n"); /* DEBUGGING */
		}
		return;
	}

	// --- Done once-only (per rank).  Perform all first-time inits.
	//
	// The aws library requires a config file, as illustrated below.  We
	// assume that the user running the test has an entry in this file,
	// using their login moniker (i.e. `echo $USER`) as the key, as
	// suggested in the example:
	//
	//     <user>:<s3_login_id>:<s3_private_key>
	//
	// This file must not be readable by other than user.
	//
	// NOTE: These inits could be done in init_IORParam_t(), in ior.c, but
	//       would require conditional compilation, there.

	aws_set_debug(param->verbose >= 4);
	aws_read_config(getenv("USER"));  // requires ~/.awsAuth
	aws_reuse_connections(1);

	// initalize IOBufs.  These are basically dynamically-extensible
	// linked-lists.  "growth size" controls the increment of new memory
	// allocated, whenever storage is used up.
	param->io_buf = aws_iobuf_new();
	aws_iobuf_growth_size(param->io_buf, 1024*1024*1);

	param->etags = aws_iobuf_new();
	aws_iobuf_growth_size(param->etags, 1024*1024*8);

   // WARNING: if you have http_proxy set in your environment, you may need
   //          to override it here.  TBD: add a command-line variable to
   //          allow you to define a proxy.
   //
	// our hosts are currently 10.140.0.15 - 10.140 0.18
	// TBD: Try DNS-round-robin server at vi-lb.ccstar.lanl.gov
   // TBD: try HAProxy round-robin at 10.143.0.1

#if 1
   //   snprintf(buff, BUFF_SIZE, "10.140.0.%d:9020", 15 + (rank % 4));
   //   s3_set_proxy(buff);
   //
   //   snprintf(buff, BUFF_SIZE, "10.140.0.%d", 15 + (rank % 4));
   //	s3_set_host(buff);

   snprintf(buff, BUFF_SIZE, "10.140.0.%d:9020", 15 + (rank % 4));
   s3_set_host(buff);

#else
/*
 * If you just want to go to one if the ECS nodes, put that IP
 * address in here directly with port 9020.
 *
 */
//   s3_set_host("10.140.0.15:9020");

/*
 * If you want to go to haproxy.ccstar.lanl.gov, this is its IP
 * address.
 *
 */
//   s3_set_proxy("10.143.0.1:80");
//   s3_set_host( "10.143.0.1:80");
#endif

	// make sure test-bucket exists
	s3_set_bucket((char*)bucket_name);

   if (rank == 0) {
      AWS4C_CHECK( s3_head(param->io_buf, "") );
      if ( param->io_buf->code == 404 ) {					// "404 Not Found"
         printf("  bucket '%s' doesn't exist\n", bucket_name);

         AWS4C_CHECK( s3_put(param->io_buf, "") );	/* creates URL as bucket + obj */
         AWS4C_CHECK_OK(     param->io_buf );		// assure "200 OK"
         printf("created bucket '%s'\n", bucket_name);
      }
      else {														// assure "200 OK"
         AWS4C_CHECK_OK( param->io_buf );
      }
   }
   MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");


	// Maybe allow EMC extensions to S3
	s3_enable_EMC_extensions(param->curl_flags & IOR_CURL_S3_EMC_EXT);

	// don't perform these inits more than once
	param->curl_flags |= IOR_CURL_INIT;


	if (param->verbose >= VERBOSE_2) {
		printf("<- s3_connect  [success]\n");
	}
}
Exemplo n.º 6
0
static
void
S3_Close_internal( void*         fd,
						 IOR_param_t*  param,
						 int           multi_part_upload_p ) {

	char* fname = (char*)fd; /* see NOTE above S3_Create_Or_Open() */

	// easier to think
	int n_to_n    = param->filePerProc;
	int n_to_1    = (! n_to_n);
   int segmented = (param->segmentCount == 1);

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Close('%s', ,%d) %s\n",
				 fname,
             multi_part_upload_p,
             ((n_to_n) ? "N:N" : ((segmented) ? "N:1(seg)" : "N:1(str)")));
	}

	if (param->open == WRITE) {


		// finalizing Multi-Part Upload (for N:1 or N:N)
		if (multi_part_upload_p) {


			size_t etag_data_size = param->etags->write_count; /* local ETag data (bytes) */
			size_t etags_per_rank = etag_data_size / ETAG_SIZE;		/* number of local etags */

			// --- create XML containing ETags in an IOBuf for "close" request
			IOBuf* xml = NULL;


			if (n_to_1) {

				// for N:1, gather all Etags at Rank0
				MPI_Datatype mpi_size_t;
				if (sizeof(size_t) == sizeof(int))
					mpi_size_t = MPI_INT;
				else if (sizeof(size_t) == sizeof(long))
					mpi_size_t = MPI_LONG;
				else
					mpi_size_t = MPI_LONG_LONG;

				// Everybody should have the same number of ETags (?)
				size_t etag_count_max = 0;		 /* highest number on any proc */
				MPI_Allreduce(&etags_per_rank, &etag_count_max,
								  1, mpi_size_t, MPI_MAX, param->testComm);
				if (etags_per_rank != etag_count_max) {
					printf("Rank %d: etag count mismatch: max:%d, mine:%d\n",
							 rank, etag_count_max, etags_per_rank);
					MPI_Abort(param->testComm, 1);
				}

				// collect ETag data at Rank0
				aws_iobuf_realloc(param->etags);             /* force single contiguous buffer */
				char* etag_data = param->etags->first->buf;  /* per-rank data, contiguous */

				if (rank == 0)  {
					char* etag_ptr;
					int   i;
					int   j;
					int   rnk;

					char* etag_vec = (char*)malloc((param->numTasks * etag_data_size) +1);
					if (! etag_vec) {
						fprintf(stderr, "rank 0 failed to malloc %d bytes\n",
								  param->numTasks * etag_data_size);
						MPI_Abort(param->testComm, 1);
					}
					MPI_Gather(etag_data, etag_data_size, MPI_BYTE,
								  etag_vec,  etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);

					// --- debugging: show the gathered etag data
					//     (This shows the raw concatenated etag-data from each node.)
					if (param->verbose >= VERBOSE_4) {

						printf("rank 0: gathered %d etags from all ranks:\n", etags_per_rank);
						etag_ptr=etag_vec;
						for (rnk=0; rnk<param->numTasks; ++rnk) {
							printf("\t[%d]: '", rnk);

							int ii;
							for (ii=0; ii<etag_data_size; ++ii)	/* NOT null-terminated! */
								printf("%c", etag_ptr[ii]);

							printf("'\n");
							etag_ptr += etag_data_size;
						}
					}


					// add XML for *all* the parts.  The XML must be ordered by
					// part-number.  Each rank wrote <etags_per_rank> parts,
					// locally.  At rank0, the etags for each rank are now
					// stored as a continguous block of text, with the blocks
					// stored in rank order in etag_vec.  In other words, our
					// internal rep at rank 0 matches the "segmented" format.
					// From this, we must select etags in an order matching how
					// they appear in the actual object, and give sequential
					// part-numbers to the resulting sequence.
					//
					// That ordering of parts in the actual written object
					// varies according to whether we wrote in the "segmented"
					// or "strided" format.
					//
					//     supposing N ranks, and P parts per rank:
					//
					// segmented:
					//
					//     all parts for a given rank are consecutive.
					//     rank r writes these parts:
					//
					//     rP, rP+1, ... (r+1)P -1
					//
					//     i.e. rank0 writes parts 0,1,2,3 ... P-1
					//
					//
					// strided:
					//
					//     rank r writes every P-th part, starting with r.
					//
					//     r, P+r, ... (P-1)P + r
					//
					//     i.e. rank0 writes parts 0,P,2P,3P ... (P-1)P
					//
					//
					// NOTE: If we knew ahead of time how many parts each rank was
					//       going to write, we could assign part-number ranges, per
					//       rank, and then have nice locality here.
					//
					//       Alternatively, we could have everyone format their own
					//       XML text and send that, instead of just the tags.  This
					//       would increase the amount of data being sent, but would
					//       reduce the work for rank0 to format everything.

               size_t  i_max;            // outer-loop
               size_t  j_max;            // inner loop
					size_t  start_multiplier; // initial offset in collected data
					size_t  stride;           // in etag_vec

					if (segmented) {          // segmented
                  i_max            = param->numTasks;
                  j_max            = etags_per_rank;
						start_multiplier = etag_data_size;		/* one rank's-worth of Etag data */
						stride           = ETAG_SIZE;				/* one ETag */
					}
					else {                    // strided
                  i_max            = etags_per_rank;
                  j_max            = param->numTasks;
						start_multiplier = ETAG_SIZE;				/* one ETag */
						stride           = etag_data_size;		/* one rank's-worth of Etag data */
					}


					xml = aws_iobuf_new();
					aws_iobuf_growth_size(xml, 1024 * 8);

					// write XML header ...
					aws_iobuf_append_str(xml, "<CompleteMultipartUpload>\n");

					int part = 0;
					for (i=0; i<i_max; ++i) {

						etag_ptr=etag_vec + (i * start_multiplier);

						for (j=0; j<j_max; ++j) {

							// etags were saved as contiguous text.  Extract the next one.
							char etag[ETAG_SIZE +1];
							memcpy(etag, etag_ptr, ETAG_SIZE);
							etag[ETAG_SIZE] = 0;

							// write XML for next part, with Etag ...
							snprintf(buff, BUFF_SIZE,
										"  <Part>\n"
										"    <PartNumber>%d</PartNumber>\n"
										"    <ETag>%s</ETag>\n"
										"  </Part>\n",
										part, etag);

							aws_iobuf_append_str(xml, buff);

							etag_ptr += stride;
							++ part;
						}
					}

					// write XML tail ...
					aws_iobuf_append_str(xml, "</CompleteMultipartUpload>\n");
				}

				else {
					MPI_Gather(etag_data, etag_data_size, MPI_BYTE,
								  NULL,      etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD);
				}
			}

			else {   /* N:N */

				xml = aws_iobuf_new();
				aws_iobuf_growth_size(xml, 1024 * 8);

				// write XML header ...
				aws_iobuf_append_str(xml, "<CompleteMultipartUpload>\n");

				// all parts of our object were written from this rank.
				char etag[ETAG_SIZE +1];
				int  part = 0;
				int  i;
				for (i=0; i<etags_per_rank; ++i) {

					// TBD: Instead of reading into etag, then sprintf'ing, then
					// copying into xml, we could just read directly into xml
					int sz = aws_iobuf_get_raw(param->etags, etag, ETAG_SIZE);
					if (sz != ETAG_SIZE) {
						snprintf(buff, BUFF_SIZE,
									"Read of ETag %d had length %d (not %d)\n",
									rank, i, sz, ETAG_SIZE);
						ERR_SIMPLE(buff);
					}
					etag[ETAG_SIZE] = 0;


					// write XML for next part, with Etag ...
					snprintf(buff, BUFF_SIZE,
								"  <Part>\n"
								"    <PartNumber>%d</PartNumber>\n"
								"    <ETag>%s</ETag>\n"
								"  </Part>\n",
								part, etag);

					aws_iobuf_append_str(xml, buff);

					++ part;
				}

				// write XML tail ...
				aws_iobuf_append_str(xml, "</CompleteMultipartUpload>\n");
			}



			// send request to finalize MPU
			if (n_to_n || (rank == 0)) {

				// DEBUGGING: show the XML we constructed
				if (param->verbose >= VERBOSE_3)
					debug_iobuf(xml, 1, 1);

				// --- POST our XML to the server.
				snprintf(buff, BUFF_SIZE,
							"%s?uploadId=%s",
							fname, param->UploadId);

				AWS4C_CHECK   ( s3_post(xml, buff) );
				AWS4C_CHECK_OK( xml );

				aws_iobuf_free(xml);
			}


			// everybody reset MPU info.  Allows another MPU, and frees memory.
			s3_MPU_reset(param);

			// Everybody meetup, so non-zero ranks won't go trying to stat the
			// N:1 file until rank0 has finished the S3 multi-part finalize.
			// The object will not appear to exist, until then.
			if (n_to_1)
				MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");
		}
		else {

			// No finalization is needed, when using EMC's byte-range writing
         // support.  However, we do need to make sure everyone has
         // finished writing, before anyone starts reading.
			if (n_to_1) {
            MPI_CHECK(MPI_Barrier(param->testComm), "barrier error");
				if (param->verbose >= VERBOSE_2)
               printf("rank %d: passed barrier\n", rank);
         }
		}

		// After writing, reset the CURL connection, so that caches won't be
		// used for reads.
		aws_reset_connection();
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Close\n");
	}
}
Exemplo n.º 7
0
/******************************************************************************
* Name  pack_objects 
* 
* This function traverses the object link list created in find_repack_objects
* and reads the corresponding file data.  That data is then written to a new
* object.  Because the old object had holes due to missing files, a new
* write offset is calculated.
*
******************************************************************************/
int pack_objects( File_Handles *file_info, 
                 repack_objects *objects)
{
   struct stat statbuf;
   char *path = "/";

   stat(path, &statbuf);
   size_t write_offset = 0;
   size_t obj_raw_size;
   size_t obj_size;
   //size_t offset;
   size_t unique;
   IOBuf *nb = aws_iobuf_new();
   //char test_obj[2048];
   obj_files *files;
   //int ret;
   //char *obj_ptr;
   //CURLcode s3_return;
   //char pre_str[MARFS_MAX_XATTR_SIZE];
   char marfs_path[1024];
   int flags;

   // Traverse object link list and find those that should be packed
   while (objects) { 
      // need inner loop to get files for each object
      // If chunk_count == file count no need to pack
      // and garbage collection took care of it
      if (objects->chunk_count == objects->pack_count) {
         objects=objects->next;
         continue;
      }
      //No need to pack if only one file specified in xattr and only
      //one file found
      if (objects->chunk_count == 1 && objects->pack_count ==1 ) {
         objects=objects->next;
         continue;
      }
      // Not quite sure how this next condition could happen
      // TO DO:  make only one contion chunk_count > file_count
      // all others continue
      if (objects->pack_count > objects->chunk_count) {
         objects=objects->next;
         continue;
      }

      LOG(LOG_INFO,"object = %s\n", objects->objid);
      LOG(LOG_INFO, "file count = %ld chunks = %ld\n", objects->pack_count, objects->chunk_count);
      files = objects->files_ptr;
      write_offset = 0;

      files->new_offset = write_offset;

      // Specify a new object being accessed 
      unique=0;

      // Each object has a files linked list.  Read each file 
      // at the offset calculated and write back to new object with
      // new offset.
      while (files) {
         // Get the associated Marfs file handle from from the linked list
         MarFS_XattrPre*  pre  = &files->fh->info.pre;

         // If new object increment unique to give it a new objid
         if (unique == 0) 
            pre->unique++;    
        
         // Need to make sure that objectSize
         // does not include recovery info TBD
         obj_raw_size = files->fh->objectSize;
         
         obj_size = obj_raw_size + MARFS_REC_UNI_SIZE;
         files->size = obj_size;

//********************
// Questions 
// correct path (fuse path)?
// set flags for open
// offset for read becomes 0, correct?
//
  
  
         char read_buf[1024];  
         size_t read_count;
         ssize_t write_count;
         
         flags = O_RDONLY; 
         get_marfs_path(files->filename, &marfs_path[0]);
         marfs_open_at_offset(marfs_path,
                              files->fh,
                              flags,
                              files->fh->info.post.obj_offset, 
                              obj_size);
         read_count = marfs_read(marfs_path, // Need recovery info as well
                                 read_buf,
                                 obj_size,
                                 0,
                                 files->fh);

         marfs_release (marfs_path, files->fh);
// Instead of reading more to I write now 
// This becomes a new object because pre.unique incremented
//
// DO I need to do anything special with flags?
// O_CREATE or O_APPEND?
// Need new flag with open or new function for recovery info
//
         marfs_open(marfs_path,
                    files->fh,
                    flags,  // WRITE
                    obj_size);
         write_count = marfs_write(marfs_path, // Need recovery info as well
                                   read_buf,
                                   obj_size,
                                   files->new_offset,
                                   files->fh);
         // This needs be moved outside loop
         // and I need an open write before while (files)
         // with O_CREATE and O_WRONLY
         // Jeff states I may need a special release with offset 
         // of last object
         //
         //marfs_release (marfs_path, files->fh);

         LOG(LOG_INFO, "Read buffer write count = %ld  len = %ld\n", nb->write_count, nb->len);
         // may have to copy nb to a new buffer 
         // then write 
     

         files->new_offset = write_offset;
         write_offset += obj_size; 
	 files = files->next;
      }
      objects=objects->next;
   } return 0;
}