示例#1
0
int s3_op_internal(ObjectStream* os) {
   IOBuf*        b  = &os->iob;
   __attribute__ ((unused)) AWSContext*   ctx = b->context;

   // run the GET or PUT
   int is_get = (os->flags & OSF_READING);
   if (is_get) {
      LOG(LOG_INFO, "GET  '%s/%s/%s'\n",
          (ctx ? ctx->S3Host : "*"),  (ctx ? ctx->Bucket : "*"), os->url);
      AWS4C_CHECK1( s3_get(b, os->url) ); /* create empty object with user metadata */
   }
   else {
      LOG(LOG_INFO, "PUT  '%s/%s/%s'\n",
          (ctx ? ctx->S3Host : "*"),  (ctx ? ctx->Bucket : "*"), os->url);
      // If you are getting errors here, the comments above the "#if
      // ((LIBCURL_VERSION ...", in stream_sync(), *might* be relevant.
      AWS4C_CHECK1( s3_put(b, os->url) ); /* create empty object with user metadata */
   }


   // s3_get with byte-range can leave streaming_writefunc() waiting for
   // a curl callback that never comes.  This happens if there is still writable
   // space in the buffer, when the last bytes in the request are processed.
   // This can happen because caller (e.g. fuse) may ask for more bytes than are present,
   // and provide a buffer big enought o receive them.
   if (is_get && (b->code == 206)) {
      // should we do something with os->iob_full?  set os->flags & EOF?
      LOG(LOG_INFO, "GET complete\n");
      os->flags |= OSF_EOF;
      POST(&os->iob_full);
      return 0;
   }
   else if (AWS4C_OK(b) ) {
      LOG(LOG_INFO, "%s complete\n", ((is_get) ? "GET" : "PUT"));
      return 0;
   }
   LOG(LOG_ERR, "CURL ERROR: %lx %d '%s'\n", (size_t)b, b->code, b->result);
   return -1;
}
示例#2
0
文件: s3util.c 项目: cheah/aws4c
int
get_file( IOBuf * aws_buf, char *name ) {
  char * filename;
  filename = getFilenameFromPath(name);

  int rv = s3_get(aws_buf, filename);
  if(rv == 0 && aws_buf->code == 200 && aws_buf->len != 0) {
    FILE  * fp;
    char writebuf[BUF_SIZE];
    
    // Write out the downloaded file
    // Check if file exists
    if( (fp = fopen(name, "rb")) != NULL) {
      fprintf(stdout, "WARNING: The specified file already exist. \n"
                      "Refuse to overwrite. \n");
      fclose(fp);
      return -1;
    }
    // File doesn't exist yet. Okay to write :)
    else {
      if( (fp = fopen(name, "w+")) == NULL) {
        fprintf(stdout, "ERROR: Unable to create the specified file. \n");
        return -1;
      }
      int n, sz;
      while( ( sz = aws_iobuf_getdata ( aws_buf, writebuf, sizeof(writebuf))) != 0 ) {
        n = fwrite(writebuf, sizeof(unsigned char), sz, fp);
        if(n != sz) {
            fprintf(stdout, "ERROR: Error writing to file. \n");
            return -1;
          }
      }
      fclose(fp);
    }
  }
  return rv;
}
示例#3
0
static
IOR_offset_t
S3_Xfer_internal(int          access,
					  void*        file,
					  IOR_size_t*  buffer,
					  IOR_offset_t length,
					  IOR_param_t* param,
					  int          multi_part_upload_p ) {

	if (param->verbose >= VERBOSE_2) {
		printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n",
				 access, (char*)file, buffer, length, param);
	}

	char*      fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */
	size_t     remaining = (size_t)length;
	char*      data_ptr = (char *)buffer;
	off_t      offset = param->offset;

	// easier to think
	int        n_to_n    = param->filePerProc;
	int        n_to_1    = (! n_to_n);
	int        segmented = (param->segmentCount == 1);


	if (access == WRITE) {	/* WRITE */

		if (verbose >= VERBOSE_3) {
			fprintf( stdout, "rank %d writing length=%lld to offset %lld\n",
						rank,
                  remaining,
						param->offset + length - remaining);
		}


		if (multi_part_upload_p) {

			// For N:1, part-numbers must have a global ordering for the
			// components of the final object.  param->part_number is
			// incremented by 1 per write, on each rank.  This lets us use it
			// to compute a global part-numbering.
         //
         // In the N:N case, we only need to increment part-numbers within
			// each rank.
         //
         // In the N:1 case, the global order of part-numbers we're writing
         // depends on whether wer're writing strided or segmented, in
         // other words, how <offset> and <remaining> are acutally
         // positioning the parts being written. [See discussion at
         // S3_Close_internal().]
         //
			// NOTE: 's3curl.pl --debug' shows StringToSign having partNumber
			//       first, even if I put uploadId first in the URL.  Maybe
			//       that's what the server will do.  GetStringToSign() in
			//       aws4c is not clever about this, so we spoon-feed args in
			//       the proper order.

			size_t part_number;
			if (n_to_1) {
            if (segmented) {      // segmented
               size_t parts_per_rank = param->blockSize / param->transferSize;
               part_number = (rank * parts_per_rank) + param->part_number;
            }
            else                // strided
               part_number = (param->part_number * param->numTasks) + rank;
         }
         else
				part_number = param->part_number;
         ++ param->part_number;


         //         if (verbose >= VERBOSE_3) {
         //            fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n",
         //                     rank,
         //                     param->numTasks,
         //                     (n_to_1 ? "N:1" : "N:N"),
         //                     (segmented ? "segmented" : "strided"),
         //                     part_number);
         //         }

			snprintf(buff, BUFF_SIZE,
						"%s?partNumber=%d&uploadId=%s",
						fname, part_number, param->UploadId);

			// For performance, we append <data_ptr> directly into the linked list
			// of data in param->io_buf.  We are "appending" rather than
			// "extending", so the added buffer is seen as written data, rather
			// than empty storage.
			//
			// aws4c parses some header-fields automatically for us (into members
			// of the IOBuf).  After s3_put2(), we can just read the etag from
			// param->io_buf->eTag.  The server actually returns literal
			// quote-marks, at both ends of the string.

			aws_iobuf_reset(param->io_buf);
			aws_iobuf_append_static(param->io_buf, data_ptr, remaining);
			AWS4C_CHECK( s3_put(param->io_buf, buff) );
			AWS4C_CHECK_OK( param->io_buf );

         //			if (verbose >= VERBOSE_3) {
         //				printf("rank %d: read ETag = '%s'\n", rank, param->io_buf->eTag);
         //				if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */
         //					fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n",
         //							  rank, ETAG_SIZE);
         //					exit(1);
         //				}
         //			}

         if (verbose >= VERBOSE_3) {
            fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n",
                     rank,
                     param->numTasks,
                     (n_to_1 ? "N:1" : "N:N"),
                     (segmented ? "segmented" : "strided"),
                     offset,
                     part_number,
                     param->io_buf->eTag); // incl quote-marks at [0] and [len-1]
         }
         if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */
					fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n",
							  rank, ETAG_SIZE);
					exit(1);
         }

			// save the eTag for later
			//
			//		memcpy(etag, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2);
			//		etag[ETAG_SIZE] = 0;
			aws_iobuf_append(param->etags,
								  param->io_buf->eTag +1,
								  strlen(param->io_buf->eTag) -2);
			// DEBUGGING
			if (verbose >= VERBOSE_4) {
				printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag);
			}

			// drop ptrs to <data_ptr>, in param->io_buf
			aws_iobuf_reset(param->io_buf);
		}
		else {	 // use EMC's byte-range write-support, instead of MPU


			// NOTE: You must call 's3_enable_EMC_extensions(1)' for
			//       byte-ranges to work for writes.
			if (n_to_n)
				s3_set_byte_range(-1,-1); // EMC header "Range: bytes=-1-" means "append"
			else
				s3_set_byte_range(offset, remaining);

			// For performance, we append <data_ptr> directly into the linked list
			// of data in param->io_buf.  We are "appending" rather than
			// "extending", so the added buffer is seen as written data, rather
			// than empty storage.
			aws_iobuf_reset(param->io_buf);
			aws_iobuf_append_static(param->io_buf, data_ptr, remaining);
			AWS4C_CHECK   ( s3_put(param->io_buf, file) );
			AWS4C_CHECK_OK( param->io_buf );

			// drop ptrs to <data_ptr>, in param->io_buf
			aws_iobuf_reset(param->io_buf);
		}


		if ( param->fsyncPerWrite == TRUE ) {
			WARN("S3 doesn't support 'fsync'" ); /* does it? */
		}

	}
	else {				/* READ or CHECK */

		if (verbose >= VERBOSE_3) {
			fprintf( stdout, "rank %d reading from offset %lld\n",
						rank,
						param->offset + length - remaining );
		}

		// read specific byte-range from the object
      // [This is included in the "pure" S3 spec.]
		s3_set_byte_range(offset, remaining);

		// For performance, we append <data_ptr> directly into the linked
		// list of data in param->io_buf.  In this case (i.e. reading),
		// we're "extending" rather than "appending".  That means the
		// buffer represents empty storage, which will be filled by the
		// libcurl writefunction, invoked via aws4c.
		aws_iobuf_reset(param->io_buf);
		aws_iobuf_extend_static(param->io_buf, data_ptr, remaining);
		AWS4C_CHECK( s3_get(param->io_buf, file) );
		if (param->io_buf->code != 206) { /* '206 Partial Content' */
			snprintf(buff, BUFF_SIZE,
						"Unexpected result (%d, '%s')",
						param->io_buf->code, param->io_buf->result);
			ERR_SIMPLE(buff);
		}

		// drop refs to <data_ptr>, in param->io_buf
		aws_iobuf_reset(param->io_buf);
	}


	if (param->verbose >= VERBOSE_2) {
		printf("<- S3_Xfer\n");
	}
	return ( length );
}
示例#4
0
/******************************************************************************
* Name  pack_objects 
* 
* This function traverses the object and file link lists and reads object 
* data for repacking into a new object.   
******************************************************************************/
int pack_objects(File_Handles *file_info, repack_objects *objects)
{
   struct stat statbuf;
   char *path = "/";
//   repack_objects *objects; 

	 //struct stat statbuf;
   stat(path, &statbuf);
   size_t write_offset = 0;
   size_t obj_raw_size;
   size_t obj_size;
   size_t offset;
   //MarFS_XattrPre pre_struct;
   //MarFS_XattrPre* pre = &pre_struct;
   MarFS_XattrPre pre;
   IOBuf *nb = aws_iobuf_new();
   char test_obj[2048];
   obj_files *files;
   int ret;
   char *obj_ptr;
   CURLcode s3_return;
   char pre_str[MARFS_MAX_XATTR_SIZE];


   // Also, if file_count =1 do i make uni or?
   //
   //
   while (objects) { 
      // need inner loop to get files for each object
      // If chunk_count == file count no need to pack
      // and garbage collection took care of it
      if (objects->chunk_count == objects->pack_count) {
         objects=objects->next;
         continue;
      }
      //No need to pack if only one file specified in xattr and only
      //one file found
      if (objects->chunk_count == 1 && objects->pack_count ==1 ) {
         objects=objects->next;
         continue;
      }
      // Not quite sure how this next condition could happen
      // TO DO:  make only one contion chunk_count > file_count
      // all others continue
      if (objects->pack_count > objects->chunk_count) {
         objects=objects->next;
         continue;
      }

      LOG(LOG_INFO,"object = %s\n", objects->objid);
      LOG(LOG_INFO, "file count = %ld chunks = %ld\n", objects->pack_count, objects->chunk_count);
      files = objects->files_ptr;
      write_offset = 0;
      ret=str_2_pre(&pre, objects->objid, NULL);
      sprintf(test_obj,"%s.teste",objects->objid);

      //Make this a unique object since it derived from an existing object 
      pre.unique++;    


      LOG(LOG_INFO,"stdout,new object name =%s\n", test_obj);
  
      //aws_iobuf_reset(nb);

      while (files) {
         //fprintf(stdout, "file = %s offset=%ld\n", files->filename, files->offset);

         stat(files->filename, &statbuf);


         obj_raw_size = statbuf.st_size;
         obj_size = obj_raw_size + MARFS_REC_UNI_SIZE;
         files->size = obj_size;

         //fprintf(stdout, "obj_size = %ld REC SIZE = %d\n", obj_size,MARFS_REC_UNI_SIZE);
         //write_offset+=obj_size;
         if ((obj_ptr = (char *)malloc(obj_size))==NULL) {
            fprintf(stderr, "Error allocating memory\n");
            return -1;
         }

         check_security_access(&pre);
         update_pre(&pre);
         s3_set_host(pre.host);
         //offset = objects->files_ptr->offset;

         offset = files->original_offset;
         //fprintf(stdout, "file %s will get re-written at offset %ld\n",
         //        files->filename, write_offset);

         // get object_data
         // Using byte range to get data for particular offsets
         s3_set_byte_range(offset, obj_size);
         // Use extend to get more buffering capability on each get
         aws_iobuf_extend_dynamic(nb, obj_ptr, obj_size);
         LOG(LOG_INFO, "going to get file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size);
         fprintf(file_info->outfd, "Getting file %s from object %s at offset %ld and size %ld\n", files->filename, objects->objid, offset, obj_size);
         s3_return = s3_get(nb,objects->objid);
         check_S3_error(s3_return, nb, S3_GET);

         LOG(LOG_INFO, "Read buffer write count = %ld  len = %ld\n", nb->write_count, nb->len);
         // may have to copy nb to a new buffer 
         // then write 
     

         files->new_offset = write_offset;
         write_offset += obj_size; 
	 files = files->next;
      }
      // create object string for put
      pre_2_str(pre_str, MARFS_MAX_XATTR_SIZE,&pre);

      strcpy(objects->new_objid, pre_str);
     
      LOG(LOG_INFO, "Going to write to object %s\n", pre_str);
      fprintf(file_info->outfd, "Writing file to object %s\n", pre_str);

      // Write data back to new object
      s3_put(nb,pre_str);
      check_S3_error(s3_return, nb, S3_PUT); 

      aws_iobuf_reset_hard(nb);
      objects=objects->next;
   }
   return 0;
}
static int makeflow_s3_archive_copy_task_files(struct archive_instance *a, char *id, char *task_path, struct batch_task *t){
	char *taskTarFile = string_format("%s/%s",task_path,id);
	// Check to see if the task is already in the local archive so it is not downloaded twice
	if(access(taskTarFile,R_OK) != 0){
		// Copy tar file from the s3 bucket
		struct timeval start_time;
		struct timeval end_time;
		char *copyTar = string_format("%s/%s",task_path,id);
		FILE *taskFile = fopen(copyTar,"wb");
		gettimeofday(&start_time,NULL);
		if(s3_get(taskFile,id) != 0){
			gettimeofday(&end_time,NULL);
					float run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0;
					total_down_time += run_time;
					debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to fail downloading to %s",run_time, id, a->s3_dir);
					debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time);
			free(copyTar);
			return 0;
		}
		gettimeofday(&end_time,NULL);
				float run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0;
				total_down_time += run_time;
		printf("Download %s from %s/%s\n",id,a->s3_dir,id);
				debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to download from %s",run_time, id, a->s3_dir);
				debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time);
		free(copyTar);
		fclose(taskFile);

		char *extractTar = string_format("tar -xzvf %s/%s -C %s",task_path,id,task_path);
		if(system(extractTar) == -1){
			free(extractTar);
			return 0;
		}
		free(extractTar);

		struct batch_file *f;
		struct list_cursor *cur = list_cursor_create(t->output_files);
		// Iterate through output files
		for(list_seek(cur, 0); list_get(cur, (void**)&f); list_next(cur)) {
			char *output_file_path = string_format("%s/output_files/%s", task_path,  basename(f->inner_name));
			char buf[1024];
			ssize_t len;
			// Read what the symlink is actually pointing to
			if((len = readlink(output_file_path, buf, sizeof(buf)-1)) != -1)
				buf[len] = '\0';
			free(output_file_path);
			// Grabs the actual name of the file from the buffer
			char *file_name	= basename(buf);
			debug(D_MAKEFLOW_HOOK,"The FILE_NAME  is %s",file_name);
			// Check to see if the file was already copied to the /files/ directory
			char *filePath = string_format("%s/files/%.2s/%s",a->dir,file_name,file_name);
			char *fileDir = string_format("%s/files/%.2s",a->dir,file_name);
			if(access(filePath,R_OK) != 0){
				debug(D_MAKEFLOW_HOOK,"COPYING  %s to /files/ from the s3 bucket",file_name);
				// Copy the file to the local archive /files/ directory
				gettimeofday(&start_time,NULL);
				create_dir(fileDir,0777);
				FILE *fileLocal = fopen(filePath, "wb");
				if(s3_get(fileLocal, file_name) != 0){
					gettimeofday(&end_time,NULL);
							run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0;
							total_down_time += run_time;
							debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to fail downloading from %s",run_time, id, a->s3_dir);
							debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time);
					return 0;
				}
				gettimeofday(&end_time,NULL);
						run_time = ((end_time.tv_sec*1000000 + end_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec)) / 1000000.0;
						total_down_time += run_time;
				printf("Download %s from %s/%s\n",file_name,a->s3_dir, file_name);
						debug(D_MAKEFLOW_HOOK," It took %f seconds for %s to download from %s",run_time, id, a->s3_dir);
						debug(D_MAKEFLOW_HOOK," The total download time is %f second(s)",total_down_time);
				fclose(fileLocal);
				//Extract the tar file of a directory (always run even if it isnt a tar file)
				char *extractDirTar = string_format("tar -xzvf %s -C %s/foo >&/dev/null",filePath,fileDir);
				char *makeDir = string_format("mkdir %s/foo",fileDir);
				system(makeDir);
				free(makeDir);
				if(system(extractDirTar) != 0){
					debug(D_MAKEFLOW_HOOK,"%s is either a file or the tar file could not be extracted",file_name);
					free(extractDirTar);
					char *removeFooDir = string_format("rm -rf %s/foo",fileDir);
					system(removeFooDir);
					continue;
				}
				char *removeTar = string_format("rm %s",filePath);
				system(removeTar);
				free(removeTar);
				char *renameFile = string_format("mv %s/foo %s", fileDir, filePath);
				system(renameFile);
				free(renameFile);
				free(extractDirTar);

			}
			free(fileDir);
			free(filePath);
		}
		free(taskTarFile);
		return 1;
	}
	debug(D_MAKEFLOW_HOOK,"TASK already exist in local archive, not downloading from s3 bucket");
	free(taskTarFile);
	return 1;
}