Esempio n. 1
0
// finish up getting directory metadata, and free up the download handle
// return 0 on success, and set *batch_id to this download's batch
//   *ret_num_children to the number of children downloaded, and *max_gen to be the largest generation number seen.
// return -ENOMEM on OOM 
static int ms_client_get_dir_metadata_end( struct ms_client* client, uint64_t parent_id, struct md_download_context* dlctx, ms_client_dir_listing* dir_listing, int64_t* batch_id, size_t* ret_num_children, int64_t* max_gen ) {
   
   int rc = 0;
   int listing_error = 0;
   struct md_entry* children = NULL;
   size_t num_children = 0;
   CURL* curl = NULL;
   
   int64_t biggest_generation = 0;
   
   struct ms_client_get_dir_download_state* dlstate = (struct ms_client_get_dir_download_state*)md_download_context_get_cls( dlctx );
   md_download_context_set_cls( dlctx, NULL );

   // download status?
   rc = ms_client_download_parse_errors( dlctx );
   
   if( rc != 0 ) {
      
      if( rc != -EAGAIN) {
         // fatal 
         SG_error("ms_client_download_parse_errors( %p ) rc = %d\n", dlctx, rc );
      }
      
      // TODO: connection pool
      md_download_context_unref_free( dlctx, &curl );
      if( curl != NULL ) {
          curl_easy_cleanup( curl );
      }

      ms_client_get_dir_download_state_free( dlstate );
      dlstate = NULL;
      
      return rc;
   }
   
   // collect the data 
   rc = ms_client_listing_read_entries( client, dlctx, &children, &num_children, &listing_error );
   
   // done with the download
   // TODO: connection pool
   md_download_context_unref_free( dlctx, &curl );
   if( curl != NULL ) {
      curl_easy_cleanup( curl );
   }

   ms_client_get_dir_download_state_free( dlstate );
   dlstate = NULL;
   
   // did we get valid data?
   if( rc != 0 ) {
      
      SG_error("ms_client_listing_read_entries(%p) rc = %d\n", dlctx, rc );
      return rc;
   }
   
   if( listing_error != MS_LISTING_NEW ) {
      
      // somehow we didn't get data.  shouldn't happen in listdir
      SG_error("BUG: failed to get listing data for %" PRIX64 ", listing_error = %d\n", parent_id, listing_error );
      return -ENODATA;
   }
   
   // merge children in 
   for( unsigned int i = 0; i < num_children; i++ ) {
      
      uint64_t file_id = children[i].file_id;
      
      SG_debug("%p: %" PRIX64 "\n", dlctx, file_id );
      
      if( dir_listing->count( file_id ) > 0 ) {
         
         SG_error("Duplicate child %" PRIX64 "\n", file_id );
         rc = -EBADMSG;
      }
      
      if( rc == 0 ) {
         
         try {
            
            (*dir_listing)[ file_id ] = children[i];
         }
         catch( bad_alloc& ba ) {
            rc = -ENOMEM;
            break;
         }
         
         // generation?
         if( children[i].generation > biggest_generation ) {
            
            biggest_generation = children[i].generation;
         }
      }
      
      if( rc != 0 ) {
         break;
      }
   }
   
   // NOTE: shallow free--we've copied the children into dir_listing
   SG_safe_free( children );
   
   *ret_num_children = num_children;
   *max_gen = biggest_generation;
   
   return 0;
}
// download metadata for a directory, in one of two ways:
// LISTDIR: fetch num_children entries in parallel by requesting disjoint ranges of them by index, in the range [0, dir_capacity].
// DIFFDIR: query by least unknown generation number until we have num_children entries, or the number of entries in a downloaded batch becomes 0 (i.e. no more entries known).
// in both cases, stop once the number of children is exceeded.
// if least_unknown_generation >= 0, then we will DIFFDIR.
// if dir_capacity >= 0, then we will LISTDIR.
// we can only do one or the other (both/neither are invalid arguments)
// return partial results, even on error 
// return 0 on success
// return -EINVAL for invalid arguments.
// return -ENOMEM on OOM
// return negative on download failure, or corruption
static int ms_client_get_dir_metadata( struct ms_client* client, uint64_t parent_id, int64_t num_children, int64_t least_unknown_generation, int64_t dir_capacity, struct ms_client_multi_result* results ) {
   
   int rc = 0;
   
   struct md_download_loop* dlloop = NULL;
   queue< int64_t > batch_queue;
   
   ms_client_dir_listing children;
   uint64_t num_children_downloaded = 0;
   
   int64_t max_known_generation = 0;
   
   struct md_download_context* dlctx = NULL;
   
   int64_t batch_id = 0;
   size_t num_children_fetched = 0;
   int64_t max_generation_fetched = 0;
   int query_count = 0;
   int num_downloads_finished = 0;
   CURL* curl = NULL;
   bool aborted = false;

   int i = 0;
   bool diffdir = false; 
   struct md_entry* ents = NULL;
   
   // sanity check 
   if( least_unknown_generation < 0 && dir_capacity < 0 ) {
      SG_error("Invalid args: %" PRId64 " < 0 and %" PRId64 " < 0\n", least_unknown_generation, dir_capacity );
      return -EINVAL;
   }
   
   if( least_unknown_generation >= 0 && dir_capacity >= 0 ) {
      SG_error("Invalid args: %" PRId64 " >= 0 and %" PRId64 " >= 0\n", least_unknown_generation, dir_capacity );
      return -EINVAL;
   }

   if( num_children < 0 && dir_capacity < 0 ) {
      SG_error("Invalid args: %" PRId64 " < 0 and %" PRId64 " < 0\n", num_children, dir_capacity);
      return -EINVAL;
   }

   memset( results, 0, sizeof(struct ms_client_multi_result) );
   
   SG_debug("listdir %" PRIX64 ", num_children = %" PRId64 ", l.u.g. = %" PRId64 ", dir_capacity = %" PRId64 "\n", parent_id, num_children, least_unknown_generation, dir_capacity );
   
   try {
      if( least_unknown_generation >= 0 ) {
         
         // download from a generation offset 
         batch_queue.push( least_unknown_generation );
      }
      else {
         
         // get all batches in parallel
         for( int64_t batch_id = 0; batch_id * client->page_size < num_children; batch_id++ ) {
            
            batch_queue.push( batch_id );
         }
      }
   }
   catch( bad_alloc& ba ) {
      return -ENOMEM;
   }
   
   // set up the md_download_loop
   dlloop = md_download_loop_new();
   if( dlloop == NULL ) {
      return -ENOMEM;
   }

   rc = md_download_loop_init( dlloop, client->dl, client->max_connections );
   if( rc != 0 ) {

      SG_safe_free( dlloop );
      return rc;
   }
   
   // run the downloads!
   do {
      
      while( batch_queue.size() > 0 ) {
         
         // next batch 
         int64_t next_batch = batch_queue.front();
         batch_queue.pop();
         
         query_count++;
         
         // next download 
         rc = md_download_loop_next( dlloop, &dlctx );
         if( rc != 0 ) {
            
            if( rc == -EAGAIN ) {
               // all downloads are running 
               rc = 0; 
               break;
            }
            
            SG_error("md_download_loop_next rc = %d\n", rc );
            break;
         }
         else { 
             // GOGOGO!
             rc = ms_client_get_dir_metadata_begin( client, parent_id, least_unknown_generation, next_batch, dlloop, dlctx );
             if( rc != 0 ) {
            
                SG_error("ms_client_get_dir_metadata_begin( LUG=%" PRId64 ", batch=%" PRId64 " ) rc = %d\n", least_unknown_generation, next_batch, rc );
                break;
             }
         }
      }
      
      if( rc != 0 ) {
         break;
      }
      
      // await next download 
      rc = md_download_loop_run( dlloop );
      if( rc != 0 ) {
         
         SG_error("md_download_loop_run rc = %d\n", rc );
         break;
      }
      
      num_downloads_finished = 0;

      // process all completed downloads 
      while( true ) {
         
         // next completed download 
         rc = md_download_loop_finished( dlloop, &dlctx );
         if( rc != 0 ) {
            
            // finished all downloads?
            if( rc == -EAGAIN ) {
              
               SG_debug("Finished %d downloads (rc = %d)\n", num_downloads_finished, rc); 
               rc = 0;
               break;
            }
            
            SG_error("md_download_loop_finish rc = %d\n", rc );
            break;
         }
         
         // process it 
         rc = ms_client_get_dir_metadata_end( client, parent_id, dlctx, &children, &batch_id, &num_children_fetched, &max_generation_fetched );
         if( rc != 0 ) {
            
            SG_error("ms_client_get_dir_metadata_end rc = %d\n", rc );
            break;
         }
         
         num_downloads_finished ++;
         num_children_downloaded += num_children_fetched;

         if( max_generation_fetched > 0 ) {
             // got at least one child
             max_known_generation = MAX( max_generation_fetched, max_known_generation );
         }

         // are we out of children to fetch?
         if( num_children_fetched == 0 ) {
           
            if( (unsigned)num_children_downloaded >= (unsigned)num_children || diffdir ) { 
                SG_debug("Out of children (%" PRIu64 " fetched total)\n", num_children_downloaded );
            
                rc = MD_DOWNLOAD_FINISH;
                break;
            }
         }

         SG_debug("Fetched %" PRIu64 " (%" PRIu64 " downloaded total)\n", num_children_fetched, num_children_downloaded );
         
         // do we need to switch over to DIFFDIR?
         if( batch_queue.size() == 0 && num_children > 0 && num_children_downloaded < (unsigned)num_children ) {
            
            // yup
            SG_debug("Downloaded %" PRIu64 " children (%" PRId64 " given by inode); l.u.g. is now %" PRIu64 "\n", num_children_downloaded, num_children, max_known_generation + 1 );
            least_unknown_generation = max_known_generation + 1;
            batch_queue.push( least_unknown_generation );
            diffdir = true;
         }
      }
      
      if( rc != 0 ) { 
         SG_debug("Breaking loop on rc = %d\n", rc );
         break;
      }
      
   } while( (batch_queue.size() > 0 || md_download_loop_running( dlloop )) && num_children_downloaded < (unsigned)num_children );
   
   if( rc != 0 ) {
      
      // download stopped prematurely
      // manually unref and free downloads.
      md_download_loop_abort( dlloop );
      aborted = true;
   } 
   
   // free all ms_client_get_dir_download_state
   i = 0;
   for( dlctx = md_download_loop_next_initialized( dlloop, &i ); dlctx != NULL; dlctx = md_download_loop_next_initialized( dlloop, &i ) ) {
      
      if( dlctx == NULL ) {
         break;
      }
      
      struct ms_client_get_dir_download_state* dlstate = (struct ms_client_get_dir_download_state*)md_download_context_get_cls( dlctx );
      md_download_context_set_cls( dlctx, NULL );
     
      if( dlstate != NULL ) { 
          ms_client_get_dir_download_state_free( dlstate );
          dlstate = NULL;
      }

      if( aborted ) {
         // unref downloads 
         md_download_context_unref_free( dlctx, &curl );
         if( curl != NULL ) {
             curl_easy_cleanup( curl );
         }
      }
   }
   
   md_download_loop_cleanup( dlloop, NULL, NULL );
   md_download_loop_free( dlloop );
   SG_safe_free( dlloop );
   
   if( rc == MD_DOWNLOAD_FINISH ) {
      rc = 0;
   }
   
   SG_debug("Downloaded %" PRId64 " children (out of %" PRId64 ")\n", num_children_downloaded, num_children );

   // coalesce what we have into results
   ents = SG_CALLOC( struct md_entry, children.size() );
   if( ents == NULL ) {
      
      if( rc == 0 ) {
         rc = -ENOMEM;
      }
      
      // preserve download error, if need be
      return rc;
   }
   
   i = 0;
   for( ms_client_dir_listing::iterator itr = children.begin(); itr != children.end(); itr++ ) {
      
      ents[i] = itr->second;
      i++;
   }
   
   // populate results 
   results->ents = ents;
   results->reply_error = 0;
   results->num_processed = query_count;
   results->num_ents = children.size();
   
   return rc;
}