// open a block in the cache // return a file descriptor >= 0 on success // return -ENOMEM if OOM // return negative on error int md_cache_open_block( struct md_syndicate_cache* cache, uint64_t file_id, int64_t file_version, uint64_t block_id, int64_t block_version, int flags ) { int rc = 0; int fd = 0; char* block_path = NULL; char* block_url = md_url_local_block_url( cache->conf->data_root, cache->conf->volume, file_id, file_version, block_id, block_version ); if( block_url == NULL ) { return -ENOMEM; } block_path = SG_URL_LOCAL_PATH( block_url ); // if we're creating the block, go ahead and create all the directories up to it. if( flags & O_CREAT ) { rc = md_cache_file_setup( cache, file_id, file_version, 0700 ); if( rc != 0 ) { SG_error("md_cache_file_setup( %" PRIX64 ".%" PRId64 " ) rc = %d\n", file_id, file_version, rc ); SG_safe_free( block_url ); return rc; } } fd = open( block_path, flags, 0600 ); if( fd < 0 ) { fd = -errno; SG_error("open(%s) rc = %d\n", block_path, fd ); } SG_safe_free( block_url ); return fd; }
// set up RG // return 0 on success // return -errno on failure (see SG_gateway_init) int RG_init( struct RG_core* rg, int argc, char** argv ) { int rc = 0; struct md_opts* overrides = md_opts_new( 1 ); if( overrides == NULL ) { return -ENOMEM; } md_opts_set_client( overrides, false ); md_opts_set_gateway_type( overrides, SYNDICATE_RG ); md_opts_set_driver_config( overrides, RG_DEFAULT_EXEC, RG_DRIVER_ROLES, RG_NUM_DRIVER_ROLES ); memset( rg, 0, sizeof(struct RG_core) ); rc = pthread_rwlock_init( &rg->lock, NULL ); if( rc != 0 ) { md_opts_free( overrides ); SG_safe_free( overrides ); return rc; } rg->gateway = SG_CALLOC( struct SG_gateway, 1 ); if( rg->gateway == NULL ) { pthread_rwlock_destroy( &rg->lock ); md_opts_free( overrides ); SG_safe_free( overrides ); return -ENOMEM; } // core gateway... rc = SG_gateway_init( rg->gateway, SYNDICATE_RG, argc, argv, overrides ); md_opts_free( overrides ); SG_safe_free( overrides ); if( rc != 0 ) { SG_error("SG_gateway_init rc = %d\n", rc ); SG_safe_free( rg->gateway ); pthread_rwlock_destroy( &rg->lock ); return rc; } // core methods... rc = RG_server_install_methods( rg->gateway, rg ); if( rc != 0 ) { SG_error("RG_server_install_methods rc = %d\n", rc ); SG_gateway_shutdown( rg->gateway ); SG_safe_free( rg->gateway ); pthread_rwlock_destroy( &rg->lock ); return rc; } return rc; }
// free download state // always succeeds static void ms_client_get_dir_download_state_free( struct ms_client_get_dir_download_state* dlstate ) { SG_safe_free( dlstate->url ); SG_safe_free( dlstate->auth_header ); SG_safe_free( dlstate ); return; }
// add a block to the cache, to be written asynchronously. // return a future that can be waited on // return NULL on error, and set *_rc to the error code // *_rc can be: // * -EAGAN if the cache is not running // * -ENOMEM if OOM // * negative if we failed to open the block (see md_cache_open_block) // NOTE: the given data will be referenced! Do NOT free it! struct md_cache_block_future* md_cache_write_block_async( struct md_syndicate_cache* cache, uint64_t file_id, int64_t file_version, uint64_t block_id, int64_t block_version, char* data, size_t data_len, bool detached, int* _rc ) { *_rc = 0; if( !cache->running ) { *_rc = -EAGAIN; return NULL; } // reserve the right to cache this block sem_wait( &cache->sem_write_hard_limit ); struct md_cache_block_future* f = SG_CALLOC( struct md_cache_block_future, 1 ); if( f == NULL ) { *_rc = -ENOMEM; return NULL; } // create the block to cache int block_fd = md_cache_open_block( cache, file_id, file_version, block_id, block_version, O_CREAT | O_RDWR | O_TRUNC ); if( block_fd < 0 ) { *_rc = block_fd; SG_error("md_cache_open_block( %" PRIX64 ".%" PRId64 "[%" PRIu64 ".%" PRId64 "] ) rc = %d\n", file_id, file_version, block_id, block_version, block_fd ); SG_safe_free( f ); return NULL; } md_cache_block_future_init( cache, f, file_id, file_version, block_id, block_version, block_fd, data, data_len, detached ); md_cache_pending_wlock( cache ); try { cache->pending->push_back( f ); } catch( bad_alloc& ba ) { SG_safe_free( f ); f = NULL; *_rc = -ENOMEM; } // wake up the thread--we have another block sem_post( &cache->sem_blocks_writing ); md_cache_pending_unlock( cache ); return f; }
// set up the AG // return a client on success // return NULL on error struct AG_state* AG_init( int argc, char** argv ) { int rc = 0; struct UG_state* ug = NULL; struct AG_state* ag = NULL; struct md_opts* overrides = md_opts_new( 1 ); if( overrides == NULL ) { return NULL; } md_opts_default( overrides ); md_opts_set_client( overrides, false ); md_opts_set_gateway_type( overrides, SYNDICATE_AG ); md_opts_set_driver_config( overrides, AG_DEFAULT_DRIVER_EXEC_STR, AG_DRIVER_ROLES, AG_DRIVER_NUM_ROLES ); ag = SG_CALLOC( struct AG_state, 1 ); if( ag == NULL ) { // OOM md_opts_free( overrides ); SG_safe_free( overrides ); return NULL; } // create UG core ug = UG_init_ex( argc, argv, overrides, ag ); md_opts_free( overrides ); SG_safe_free( overrides ); if( ug == NULL ) { SG_error("%s", "UG_init failed\n"); SG_safe_free( ag ); return NULL; } ag->ug_core = ug; rc = pthread_rwlock_init( &ag->lock, NULL ); if( rc != 0 ) { SG_error("pthread_rwlock_init rc = %d\n", rc ); UG_shutdown( ug ); SG_safe_free( ag ); return NULL; } // add AG server-side behaviors AG_server_install_methods( AG_state_gateway( ag ) ); return ag; }
/// Free a cert void ms_client_gateway_cert_free( struct ms_gateway_cert* cert ) { SG_safe_free( cert->hostname ); SG_safe_free( cert->name ); SG_safe_free( cert->driver_hash ); SG_safe_free( cert->driver_text ); SG_safe_delete( cert->pb ); SG_safe_delete( cert->user_pb ); if( cert->pubkey != NULL ) { EVP_PKEY_free( cert->pubkey ); cert->pubkey = NULL; } }
/** * @brief Initialize a gateway certificate. * @note cert takes ownership of ms_cert * @retval 0 Success * @retval -ENOMEM Out of Memory * @retval -EINVAL Invalid */ int ms_client_gateway_cert_init( struct ms_gateway_cert* cert, uint64_t my_gateway_id, ms::ms_gateway_cert* ms_cert ) { int rc = 0; // sanity check if( my_gateway_id == cert->gateway_id && ms_cert->driver_hash().size() > 0 ) { if( ms_cert->driver_hash().size() != SHA256_DIGEST_LENGTH ) { SG_error("Invalid driver hash length: expected %d, got %zu\n", SHA256_DIGEST_LENGTH, ms_cert->driver_hash().size() ); return -EINVAL; } } cert->name = strdup( ms_cert->name().c_str() ); cert->hostname = strdup( ms_cert->host().c_str() ); if( cert->name == NULL || cert->hostname == NULL ) { // OOM SG_safe_free( cert->name ); SG_safe_free( cert->hostname ); return -ENOMEM; } cert->user_id = ms_cert->owner_id(); cert->gateway_id = ms_cert->gateway_id(); cert->gateway_type = ms_cert->gateway_type(); cert->portnum = ms_cert->port(); cert->version = ms_cert->version(); cert->caps = ms_cert->caps(); cert->volume_id = ms_cert->volume_id(); cert->driver_text = NULL; cert->driver_text_len = 0; cert->pb = ms_cert; // store *our* driver hash if( my_gateway_id == cert->gateway_id && ms_cert->driver_hash().size() > 0 ) { cert->driver_hash_len = ms_cert->driver_hash().size(); cert->driver_hash = SG_CALLOC( unsigned char, cert->driver_hash_len ); if( cert->driver_hash == NULL ) { // OOM SG_safe_free( cert->name ); SG_safe_free( cert->hostname ); return -ENOMEM; } memcpy( cert->driver_hash, ms_cert->driver_hash().data(), cert->driver_hash_len ); }
// tear down RG // return 0 on success // return -errno on failure int RG_shutdown( struct RG_core* rg ) { int rc = 0; if( rg->running ) { // ask the SG to die SG_gateway_signal_main( rg->gateway ); pthread_join( rg->thread, NULL ); rg->running = false; } // shut down the core gateway rc = SG_gateway_shutdown( rg->gateway ); if( rc != 0 ) { SG_error("SG_gateway_shutdown rc = %d\n", rc ); } SG_safe_free( rg->gateway ); pthread_rwlock_destroy( &rg->lock ); md_shutdown(); memset( rg, 0, sizeof(struct RG_core) ); return 0; }
// free a future // always succeeds int md_cache_block_future_free( struct md_cache_block_future* f ) { if( f != NULL ) { md_cache_block_future_clean( f ); SG_safe_free( f ); } return 0; }
// clean up a future // always succeeds int md_cache_block_future_clean( struct md_cache_block_future* f ) { if( f->block_fd >= 0 ) { fsync( f->block_fd ); close( f->block_fd ); f->block_fd = -1; } SG_safe_free( f->block_data ); SG_safe_free( f->aio.aio_sigevent.sigev_value.sival_ptr ); memset( &f->aio, 0, sizeof(f->aio) ); sem_destroy( &f->sem_ongoing ); return 0; }
// apply a function over a file's cached blocks // keep applying it even if the callback fails on some of them // return 0 on success // return -ENOMEM on OOM // return negative on opendir(2) failure // return non-zero if the block_func callback does not return 0 int md_cache_file_blocks_apply( char const* local_path, int (*block_func)( char const*, void* ), void* cls ) { struct dirent* result = NULL; char* block_path = NULL; int rc = 0; int worst_rc = 0; int dirent_sz = 0; DIR* dir = opendir( local_path ); if( dir == NULL ) { int rc = -errno; return rc; } dirent_sz = offsetof(struct dirent, d_name) + pathconf(local_path, _PC_NAME_MAX) + 1; struct dirent* dent = SG_CALLOC( struct dirent, dirent_sz ); if( dent == NULL ) { closedir( dir ); return -ENOMEM; } do { readdir_r( dir, dent, &result ); if( result != NULL ) { if( strcmp(result->d_name, ".") == 0 || strcmp(result->d_name, "..") == 0 ) { continue; } block_path = md_fullpath( local_path, result->d_name, NULL ); if( block_path == NULL ) { worst_rc = -ENOMEM; break; } rc = (*block_func)( block_path, cls ); if( rc != 0 ) { // could not unlink rc = -errno; SG_error( "block_func(%s) errno = %d\n", block_path, rc ); worst_rc = rc; } SG_safe_free( block_path ); } } while( result != NULL ); closedir( dir ); free( dent ); return worst_rc; }
// delete a block in the cache // return 0 on success // return -ENOMEM on OOM // return negative (from unlink) on error static int md_cache_evict_block_internal( struct md_syndicate_cache* cache, uint64_t file_id, int64_t file_version, uint64_t block_id, int64_t block_version ) { char* block_path = NULL; int rc = 0; char* block_url = NULL; char* local_file_url = NULL; char* local_file_path = NULL; block_url = md_url_local_block_url( cache->conf->data_root, cache->conf->volume, file_id, file_version, block_id, block_version ); if( block_url == NULL ) { return -ENOMEM; } block_path = SG_URL_LOCAL_PATH( block_url ); rc = unlink( block_path ); if( rc != 0 ) { rc = -errno; } if( rc == 0 || rc == -ENOENT ) { // let another block get queued sem_post( &cache->sem_write_hard_limit ); local_file_url = md_url_local_file_url( cache->conf->data_root, cache->conf->volume, file_id, file_version ); if( local_file_url == NULL ) { SG_safe_free( block_url ); return -ENOMEM; } local_file_path = SG_URL_LOCAL_PATH( local_file_url ); // remove the file's empty directories md_rmdirs( local_file_path ); SG_safe_free( local_file_url ); } SG_safe_free( block_url ); return rc; }
// evict a file from the cache // return 0 on success // return -ENOMEM on OOM // return negative if unlink(2) fails due to something besides -ENOENT int md_cache_evict_file( struct md_syndicate_cache* cache, uint64_t file_id, int64_t file_version ) { char* local_file_path = NULL; char* local_file_url = NULL; int rc = 0; struct local { // lambda function for deleting a block and evicting it static int cache_evict_block( char const* block_path, void* cls ) { struct md_syndicate_cache* c = (struct md_syndicate_cache*)cls; int rc = unlink( block_path ); if( rc != 0 ) { rc = -errno; } if( rc == 0 || rc == -ENOENT ) { // evicted! __sync_fetch_and_sub( &c->num_blocks_written, 1 ); // let another block get queued sem_post( &c->sem_write_hard_limit ); } else { // not evicted! SG_error("WARN: unlink( %s ) rc = %d\n", block_path, rc ); // nevertheless, try to evict as much as possible rc = 0; } return rc; } }; // path to the file... local_file_url = md_url_local_file_url( cache->conf->data_root, cache->conf->volume, file_id, file_version ); if( local_file_url == NULL ) { return -ENOMEM; } local_file_path = SG_URL_LOCAL_PATH( local_file_url ); rc = md_cache_file_blocks_apply( local_file_path, local::cache_evict_block, cache ); if( rc == 0 ) { // remove this file's directories md_rmdirs( local_file_path ); } SG_safe_free( local_file_url ); return rc; }
/// Free a cert bundle void ms_client_cert_bundle_free( ms_cert_bundle* bundle ) { for( ms_cert_bundle::iterator itr = bundle->begin(); itr != bundle->end(); itr++ ) { if( itr->second != NULL ) { ms_client_gateway_cert_free( itr->second ); SG_safe_free( itr->second ); } } bundle->clear(); }
// set up reads to existing but partially-written blocks, in a zero-copy manner. *dirty_blocks must NOT contain the partial block information yet. // A block is partially-written if the write buffer represented by (buf_len, offset) encompasses part of it. // return 0 on success // return -errno on failure // NOTE: inode->entry must be read-locked int UG_write_read_partial_setup( struct SG_gateway* gateway, char const* fs_path, struct UG_inode* inode, size_t buf_len, off_t offset, UG_dirty_block_map_t* dirty_blocks ) { int rc = 0; struct ms_client* ms = SG_gateway_ms( gateway ); uint64_t block_size = ms_client_get_volume_blocksize( ms ); uint64_t block_id = 0; char* buf = NULL; uint64_t first_affected_block = (offset) / block_size; uint64_t last_affected_block = (offset + buf_len) / block_size; bool first_affected_block_exists = (SG_manifest_block_lookup( UG_inode_manifest( inode ), first_affected_block ) != NULL ); bool last_affected_block_exists = (SG_manifest_block_lookup( UG_inode_manifest( inode ), last_affected_block ) != NULL ); // scratch area for fetching blocks UG_dirty_block_map_t partial_blocks; SG_debug("First affected block: %" PRIu64 " (exists = %d)\n", first_affected_block, first_affected_block_exists ); SG_debug("Last affected block: %" PRIu64 " (exists = %d)\n", last_affected_block, last_affected_block_exists ); // is the first block partial? // also, if the first block is the same as the last block, and the last block is partial, // then the first block is considered partial if( first_affected_block_exists && (( first_affected_block == last_affected_block && (offset % block_size != 0 || (offset + buf_len) % block_size) != 0) || ( first_affected_block < last_affected_block && (offset % block_size) != 0)) ) { // head is partial block_id = offset / block_size; // make a head buffer buf = SG_CALLOC( char, block_size ); if( buf == NULL ) { return -ENOMEM; } SG_debug("Read partial HEAD block %" PRIu64 "\n", block_id ); // set up the request rc = UG_write_setup_partial_block_buffer( inode, block_id, buf, block_size, &partial_blocks ); if( rc != 0 ) { SG_safe_free( buf ); UG_dirty_block_map_free( &partial_blocks ); return rc; } buf = NULL; }
// connect to the CDN // return 0 on success // return -ENOMEM on OOM static int UG_impl_connect_cache( struct SG_gateway* gateway, CURL* curl, char const* url, void* cls ) { int rc = 0; char* out_url = NULL; struct UG_state* ug = (struct UG_state*)SG_gateway_cls( gateway ); rc = UG_driver_cdn_url( ug, url, &out_url ); if( rc != 0 ) { return rc; } // set up the curl handle curl_easy_setopt( curl, CURLOPT_URL, out_url ); SG_safe_free( out_url ); return 0; }
// callback to apply over a file's blocks. // cls must be of type struct md_cache_cb_add_lru_args // return 0 on success // return -ENOMEM on OOM // return -EINVAL if we couldn't parse the block path static int md_cache_cb_add_lru( char const* block_path, void* cls ) { struct md_cache_cb_add_lru_args* args = (struct md_cache_cb_add_lru_args*)cls; md_cache_lru_t* cache_lru = args->cache_lru; uint64_t file_id = args->file_id; int64_t file_version = args->file_version; uint64_t block_id = 0; int64_t block_version = 0; // scan path for block ID and block version char* block_path_basename = md_basename( block_path, NULL ); if( block_path_basename == NULL ) { return -ENOMEM; } int rc = sscanf( block_path_basename, "%" PRIu64 ".%" PRId64, &block_id, &block_version ); if( rc != 2 ) { SG_error("Unparsable block name '%s'\n", block_path_basename ); rc = -EINVAL; } else { struct md_cache_entry_key lru_key; memset( &lru_key, 0, sizeof(lru_key) ); lru_key.file_id = file_id; lru_key.file_version = file_version; lru_key.block_id = block_id; lru_key.block_version = block_version; rc = 0; try { cache_lru->push_back( lru_key ); } catch( bad_alloc& ba ) { rc = -ENOMEM; } } SG_safe_free( block_path_basename ); return rc; }
// free a volume void ms_client_volume_free( struct ms_volume* vol ) { if( vol == NULL ) { return; } if( vol->volume_public_key != NULL ) { EVP_PKEY_free( vol->volume_public_key ); vol->volume_public_key = NULL; } if( vol->volume_md != NULL ) { SG_safe_delete( vol->volume_md ); vol->volume_md = NULL; } SG_safe_free( vol->name ); memset( vol, 0, sizeof(struct ms_volume) ); }
// entry point int main( int argc, char** argv ) { int rc = 0; int exit_code = 0; struct AG_state* ag = NULL; pthread_t crawl_thread; // setup... ag = AG_init( argc, argv ); if( ag == NULL ) { SG_error("%s", "AG_init failed\n" ); exit(1); } // start crawler rc = md_start_thread( &crawl_thread, AG_crawl_loop, ag, false ); if( rc != 0 ) { SG_error("md_start_thread rc = %d\n", rc ); exit(1); } // run gateway rc = AG_main( ag ); if( rc != 0 ) { SG_error("AG_main rc = %d\n", rc ); exit_code = 1; } // stop crawler g_running = false; pthread_cancel( crawl_thread ); pthread_join( crawl_thread, NULL ); // stop gateway rc = AG_shutdown( ag ); if( rc != 0 ) { SG_error("AG_shutdown rc = %d\n", rc ); } SG_safe_free( ag ); exit(exit_code); }
// set up a file's cache directory. // return 0 on success // return -ENOMEM on OOM // return negative if we failed to create the directory to hold the data static int md_cache_file_setup( struct md_syndicate_cache* cache, uint64_t file_id, int64_t version, mode_t mode ) { // it is possible for there to be a 0-sized non-directory here, to indicate the next version to be created. // if so, remove it int rc = 0; char* local_path = NULL; char* local_file_url = md_url_local_file_url( cache->conf->data_root, cache->conf->volume, file_id, version ); if( local_file_url == NULL ) { return -ENOMEM; } local_path = SG_URL_LOCAL_PATH( local_file_url ); rc = md_mkdirs3( local_path, mode | 0700 ); SG_safe_free( local_file_url ); return rc; }
// stat a block in the cache (system use only) // return 0 on success // return -ENOMEM if OOM // return negative (from stat(2) errno) on error int md_cache_stat_block_by_id( struct md_syndicate_cache* cache, uint64_t file_id, int64_t file_version, uint64_t block_id, int64_t block_version, struct stat* sb ) { char* stat_path = NULL; int rc = 0; char* block_url = NULL; block_url = md_url_local_block_url( cache->conf->data_root, cache->conf->volume, file_id, file_version, block_id, block_version ); if( block_url == NULL ) { return -ENOMEM; } stat_path = SG_URL_LOCAL_PATH( block_url ); rc = stat( stat_path, sb ); if( rc != 0 ) { rc = -errno; } SG_safe_free( block_url ); return rc; }
// reversion a file. // move it into place, and then insert the new cache_entry_key records for it to the cache_lru list. // don't bother removing the old cache_entry_key records; they will be removed from the cache_lru list automatically. // NOTE: the corresponding fent structure should be write-locked for this, to make it atomic. // return 0 on success // return -ENOMEM on OOM // return negative if stat(2) on the new path fails for some reason besides -ENOENT int md_cache_reversion_file( struct md_syndicate_cache* cache, uint64_t file_id, int64_t old_file_version, int64_t new_file_version ) { char* cur_local_url = md_url_local_file_url( cache->conf->data_root, cache->conf->volume, file_id, old_file_version ); if( cur_local_url == NULL ) { return -ENOMEM; } char* new_local_url = md_url_local_file_url( cache->conf->data_root, cache->conf->volume, file_id, new_file_version ); if( new_local_url == NULL ) { SG_safe_free( cur_local_url ); return -ENOMEM; } char* cur_local_path = SG_URL_LOCAL_PATH( cur_local_url ); char* new_local_path = SG_URL_LOCAL_PATH( new_local_url ); // new path shouldn't exist, but old path should struct stat old_sb; struct stat new_sb; int rc = 0; rc = stat( cur_local_path, &old_sb ); if( rc != 0 ) { rc = -errno; if( rc != -ENOENT ) { // problem SG_error("Failed to stat %s, rc = %d\n", cur_local_path, rc ); SG_safe_free( cur_local_url ); SG_safe_free( new_local_url ); return rc; } } rc = stat( new_local_path, &new_sb ); if( rc == 0 ) { rc = -EEXIST; } else { rc = -errno; } if( rc != -ENOENT ) { SG_error("Failed to stat %s, rc = %d\n", new_local_path, rc ); SG_safe_free( cur_local_url ); SG_safe_free( new_local_url ); return rc; } // move the file data over rc = rename( cur_local_path, new_local_path ); if( rc != 0 ) { rc = -errno; SG_error("rename(%s,%s) rc = %d\n", cur_local_path, new_local_path, rc ); SG_safe_free( cur_local_url ); SG_safe_free( new_local_url ); return rc; } // insert the new records md_cache_lru_t lru; struct md_cache_cb_add_lru_args lru_args; lru_args.cache_lru = &lru; lru_args.file_id = file_id; lru_args.file_version = new_file_version; rc = md_cache_file_blocks_apply( new_local_path, md_cache_cb_add_lru, &lru_args ); if( rc == 0 ) { // promote these blocks in the cache md_cache_promotes_wlock( cache ); for( md_cache_lru_t::iterator itr = lru.begin(); itr != lru.end(); itr++ ) { try { cache->promotes->push_back( *itr ); } catch( bad_alloc& ba ) { rc = -ENOMEM; break; } } md_cache_promotes_unlock( cache ); } SG_safe_free( cur_local_url ); SG_safe_free( new_local_url ); return rc; }
// cache main loop. // * start new writes // * reap completed writes // * evict blocks after the soft size limit has been exceeded void* md_cache_main_loop( void* arg ) { struct md_syndicate_cache_thread_args* args = (struct md_syndicate_cache_thread_args*)arg; struct md_syndicate_cache* cache = args->cache; // cancel whenever by default pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, NULL ); SG_debug("%s", "Cache writer thread strated\n" ); while( cache->running ) { // wait for there to be blocks, if there are none if( cache->ongoing_writes->size() == 0 ) { sem_wait( &cache->sem_blocks_writing ); } // waken up to die? if( !cache->running ) { break; } md_cache_lru_t new_writes; // don't get cancelled while doing this pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, NULL ); // begin all pending writes md_cache_begin_writes( cache ); // reap completed writes md_cache_complete_writes( cache, &new_writes ); // can get cancelled now if needed pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL ); // evict blocks md_cache_evict_blocks( cache, &new_writes ); } // wait for remaining writes to finish // TODO: aio cancellations while( cache->ongoing_writes->size() > 0 ) { SG_debug("Waiting for %zu blocks to sync...\n", cache->ongoing_writes->size() ); md_cache_lru_t new_writes; // don't get cancelled here pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, NULL ); // reap completed writes md_cache_complete_writes( cache, &new_writes ); // can get cancelled now if needed pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL ); // evict blocks md_cache_evict_blocks( cache, &new_writes ); sleep(1); } SG_safe_free( args ); SG_debug("%s", "Cache writer thread exited\n" ); return NULL; }
/** * @brief syndicate-removexattr entry point * */ int main( int argc, char** argv ) { int rc = 0; struct UG_state* ug = NULL; struct SG_gateway* gateway = NULL; char* path = NULL; char* xattr = NULL; int path_optind = 0; struct tool_opts opts; uint64_t* times = NULL; struct timespec ts_begin; struct timespec ts_end; memset( &opts, 0, sizeof(tool_opts) ); argc = parse_args( argc, argv, &opts ); if( argc < 0 ) { usage( argv[0], "path xattr [xattr...]" ); md_common_usage(); exit(1); } // setup... ug = UG_init( argc, argv ); if( ug == NULL ) { SG_error("%s", "UG_init failed\n" ); exit(1); } gateway = UG_state_gateway( ug ); // get the directory path path_optind = SG_gateway_first_arg_optind( gateway ); if( path_optind + 1 >= argc ) { usage( argv[0], "path xattr [xattr...]" ); md_common_usage(); UG_shutdown( ug ); exit(1); } if( opts.benchmark ) { times = SG_CALLOC( uint64_t, argc - path_optind + 1 ); if( times == NULL ) { UG_shutdown( ug ); SG_error("%s", "Out of memory\n"); exit(1); } } path = argv[path_optind]; for( int i = path_optind + 1; i < argc; i++ ) { xattr = argv[ i ]; // load up... clock_gettime( CLOCK_MONOTONIC, &ts_begin ); rc = UG_removexattr( ug, path, xattr ); if( rc < 0 ) { fprintf(stderr, "Failed to removexattr '%s' '%s': %s\n", path, xattr, strerror(abs(rc)) ); rc = 1; break; } clock_gettime( CLOCK_MONOTONIC, &ts_end ); if( times != NULL ) { times[i - path_optind] = md_timespec_diff( &ts_end, &ts_begin ); } } if( times != NULL ) { printf("@@@@@"); for( int i = path_optind; i < argc - 1; i++ ) { printf("%" PRIu64 ",", times[i - path_optind] ); } printf("%" PRIu64 "@@@@@\n", times[argc - 1 - path_optind] ); SG_safe_free( times ); } UG_shutdown( ug ); exit(rc); }
// destroy the cache // return 0 on success // return -EINVAL if the cache is still running int md_cache_destroy( struct md_syndicate_cache* cache ) { if( cache->running ) { // have to stop it first return -EINVAL; } cache->pending = NULL; cache->completed = NULL; md_cache_block_buffer_t* pendings[] = { cache->pending_1, cache->pending_2, NULL }; for( int i = 0; pendings[i] != NULL; i++ ) { for( md_cache_block_buffer_t::iterator itr = pendings[i]->begin(); itr != pendings[i]->end(); itr++ ) { if( *itr != NULL ) { SG_safe_free( *itr ); } } SG_safe_delete( pendings[i] ); } md_cache_completion_buffer_t* completeds[] = { cache->completed_1, cache->completed_2, NULL }; for( int i = 0; completeds[i] != NULL; i++ ) { for( md_cache_completion_buffer_t::iterator itr = completeds[i]->begin(); itr != completeds[i]->end(); itr++ ) { struct md_cache_block_future* f = *itr; md_cache_block_future_free( f ); } SG_safe_delete( completeds[i] ); } md_cache_lru_t* lrus[] = { cache->cache_lru, cache->promotes_1, cache->promotes_2, cache->evicts_1, cache->evicts_2, NULL }; for( int i = 0; lrus[i] != NULL; i++ ) { SG_safe_delete( lrus[i] ); } SG_safe_delete( cache->ongoing_writes ); pthread_rwlock_t* locks[] = { &cache->pending_lock, &cache->completed_lock, &cache->cache_lru_lock, &cache->promotes_lock, &cache->ongoing_writes_lock, NULL }; for( int i = 0; locks[i] != NULL; i++ ) { pthread_rwlock_destroy( locks[i] ); } sem_destroy( &cache->sem_blocks_writing ); sem_destroy( &cache->sem_write_hard_limit ); return 0; }
/** * @brief syndicate-get entry point * */ int main( int argc, char** argv ) { int rc = 0; struct UG_state* ug = NULL; struct SG_gateway* gateway = NULL; char* path = NULL; int path_optind = 0; char* file_path = NULL; int fd = 0; char* buf = NULL; ssize_t nr = 0; ssize_t total = 0; UG_handle_t* fh = NULL; int t = 0; struct timespec ts_begin; struct timespec ts_end; int64_t* times = NULL; mode_t um = umask(0); umask( um ); struct tool_opts opts; memset( &opts, 0, sizeof(tool_opts) ); argc = parse_args( argc, argv, &opts ); if( argc < 0 ) { usage( argv[0], "syndicate_file local_file [syndicate_file local_file...]" ); md_common_usage(); exit(1); } // setup... ug = UG_init( argc, argv ); if( ug == NULL ) { SG_error("%s", "UG_init failed\n" ); exit(1); } gateway = UG_state_gateway( ug ); // get the path... path_optind = SG_gateway_first_arg_optind( gateway ); if( path_optind == argc || ((argc - path_optind) % 2) != 0 ) { usage( argv[0], "syndicate_file local_file [syndicate_file local_file]" ); UG_shutdown( ug ); exit(1); } if( opts.benchmark ) { times = SG_CALLOC( int64_t, (argc - path_optind) / 2 + 1 ); if( times == NULL ) { UG_shutdown( ug ); SG_error("%s", "Out of memory\n"); exit(1); } } buf = SG_CALLOC( char, BUF_SIZE ); if( buf == NULL ) { UG_shutdown( ug ); SG_error("%s", "Out of memory\n"); exit(1); } for( int i = path_optind; i < argc; i += 2 ) { total = 0; // get the syndicate path... path = argv[i]; // get the file path... file_path = argv[i+1]; // open the file... fd = open( file_path, O_CREAT | O_EXCL | O_WRONLY, 0600 ); if( fd < 0 ) { rc = -errno; fprintf(stderr, "Failed to open '%s': %s\n", file_path, strerror(-rc)); rc = 1; goto get_end; } // try to open fh = UG_open( ug, path, O_RDONLY, &rc ); if( rc != 0 ) { fprintf(stderr, "Failed to open '%s': %d %s\n", path, rc, strerror( abs(rc) ) ); rc = 1; goto get_end; } clock_gettime( CLOCK_MONOTONIC, &ts_begin ); while( 1 ) { nr = UG_read( ug, buf, BUF_SIZE, fh ); if( nr == 0 ) { break; } if( nr < 0 ) { rc = nr; fprintf(stderr, "Failed to read '%s': %s\n", path, strerror(abs(rc))); break; } rc = write( fd, buf, nr ); if( rc < 0 ) { rc = -errno; fprintf(stderr, "Failed to write '%s': %d %s\n", file_path, rc, strerror(abs(rc))); break; } total += nr; } close( fd ); if( rc < 0 ) { rc = 1; goto get_end; } clock_gettime( CLOCK_MONOTONIC, &ts_end ); // close rc = UG_close( ug, fh ); if( rc != 0 ) { fprintf(stderr, "Failed to close '%s': %d %s\n", path, rc, strerror( abs(rc) ) ); rc = 1; goto get_end; } if( times != NULL ) { printf("\n%ld.%ld - %ld.%ld = %ld\n", ts_end.tv_sec, ts_end.tv_nsec, ts_begin.tv_sec, ts_begin.tv_nsec, md_timespec_diff_ms( &ts_end, &ts_begin )); times[t] = md_timespec_diff_ms( &ts_end, &ts_begin ); t++; } SG_debug("Read %zd bytes for %s\n", total, path ); } get_end: UG_shutdown( ug ); SG_safe_free( buf ); if( times != NULL ) { printf("@@@@@"); for( int i = 0; i < t - 1; i++ ) { printf("%" PRId64 ",", times[i] ); } printf("%" PRId64 "@@@@@\n", times[t-1] ); SG_safe_free( times ); } if( rc != 0 ) { exit(1); } else { exit(0); } }
// begin downloading metadata for a directory. // if least_unknown_generation >= 0, then use the generation number to generate the URL // otherwise, use the batch (page) number // return 0 on success // return -ENOMEM on OOM // return negative on failure to initialize or start the download static int ms_client_get_dir_metadata_begin( struct ms_client* client, uint64_t parent_id, int64_t least_unknown_generation, int64_t batch_id, struct md_download_loop* dlloop, struct md_download_context* dlctx ) { int rc = 0; CURL* curl = NULL; char* url = NULL; char* auth_header = NULL; uint64_t volume_id = ms_client_get_volume_id( client ); struct ms_client_get_dir_download_state* dlstate = NULL; if( least_unknown_generation > 0 ) { // least unknown generation url = ms_client_file_listdir_url( client->url, volume_id, ms_client_volume_version( client ), ms_client_cert_version( client ), parent_id, -1, least_unknown_generation ); } else { // page id url = ms_client_file_listdir_url( client->url, volume_id, ms_client_volume_version( client ), ms_client_cert_version( client ), parent_id, batch_id, -1 ); } if( url == NULL ) { return -ENOMEM; } // set up download state dlstate = SG_CALLOC( struct ms_client_get_dir_download_state, 1 ); if( dlstate == NULL ) { SG_safe_free( url ); return -ENOMEM; } // set up CURL // TODO: connection pool curl = curl_easy_init(); if( curl == NULL ) { SG_safe_free( dlstate ); SG_safe_free( url ); return -ENOMEM; } // generate auth header rc = ms_client_auth_header( client, url, &auth_header ); if( rc != 0 ) { // failed! curl_easy_cleanup( curl ); SG_safe_free( url ); SG_safe_free( dlstate ); return -ENOMEM; } ms_client_init_curl_handle( client, curl, url, auth_header ); // set up download rc = md_download_context_init( dlctx, curl, MS_MAX_MSG_SIZE, dlstate ); if( rc != 0 ) { SG_safe_free( dlstate ); SG_safe_free( url ); SG_safe_free( auth_header ); curl_easy_cleanup( curl ); return rc; } // watch the download rc = md_download_loop_watch( dlloop, dlctx ); if( rc != 0 ) { SG_error("md_download_loop_watch rc = %d\n", rc ); md_download_context_free( dlctx, NULL ); SG_safe_free( dlstate ); SG_safe_free( url ); SG_safe_free( auth_header ); curl_easy_cleanup( curl ); return rc; } // set up download state ms_client_get_dir_download_state_init( dlstate, batch_id, url, auth_header ); // start download rc = md_download_context_start( client->dl, dlctx ); if( rc != 0 ) { md_download_context_free( dlctx, NULL ); ms_client_get_dir_download_state_free( dlstate ); dlstate = NULL; curl_easy_cleanup( curl ); return rc; } return rc; }
// finish up getting directory metadata, and free up the download handle // return 0 on success, and set *batch_id to this download's batch // *ret_num_children to the number of children downloaded, and *max_gen to be the largest generation number seen. // return -ENOMEM on OOM static int ms_client_get_dir_metadata_end( struct ms_client* client, uint64_t parent_id, struct md_download_context* dlctx, ms_client_dir_listing* dir_listing, int64_t* batch_id, size_t* ret_num_children, int64_t* max_gen ) { int rc = 0; int listing_error = 0; struct md_entry* children = NULL; size_t num_children = 0; CURL* curl = NULL; int64_t biggest_generation = 0; struct ms_client_get_dir_download_state* dlstate = (struct ms_client_get_dir_download_state*)md_download_context_get_cls( dlctx ); md_download_context_set_cls( dlctx, NULL ); // download status? rc = ms_client_download_parse_errors( dlctx ); if( rc != 0 ) { if( rc != -EAGAIN) { // fatal SG_error("ms_client_download_parse_errors( %p ) rc = %d\n", dlctx, rc ); } // TODO: connection pool md_download_context_unref_free( dlctx, &curl ); if( curl != NULL ) { curl_easy_cleanup( curl ); } ms_client_get_dir_download_state_free( dlstate ); dlstate = NULL; return rc; } // collect the data rc = ms_client_listing_read_entries( client, dlctx, &children, &num_children, &listing_error ); // done with the download // TODO: connection pool md_download_context_unref_free( dlctx, &curl ); if( curl != NULL ) { curl_easy_cleanup( curl ); } ms_client_get_dir_download_state_free( dlstate ); dlstate = NULL; // did we get valid data? if( rc != 0 ) { SG_error("ms_client_listing_read_entries(%p) rc = %d\n", dlctx, rc ); return rc; } if( listing_error != MS_LISTING_NEW ) { // somehow we didn't get data. shouldn't happen in listdir SG_error("BUG: failed to get listing data for %" PRIX64 ", listing_error = %d\n", parent_id, listing_error ); return -ENODATA; } // merge children in for( unsigned int i = 0; i < num_children; i++ ) { uint64_t file_id = children[i].file_id; SG_debug("%p: %" PRIX64 "\n", dlctx, file_id ); if( dir_listing->count( file_id ) > 0 ) { SG_error("Duplicate child %" PRIX64 "\n", file_id ); rc = -EBADMSG; } if( rc == 0 ) { try { (*dir_listing)[ file_id ] = children[i]; } catch( bad_alloc& ba ) { rc = -ENOMEM; break; } // generation? if( children[i].generation > biggest_generation ) { biggest_generation = children[i].generation; } } if( rc != 0 ) { break; } } // NOTE: shallow free--we've copied the children into dir_listing SG_safe_free( children ); *ret_num_children = num_children; *max_gen = biggest_generation; return 0; }
// download metadata for a directory, in one of two ways: // LISTDIR: fetch num_children entries in parallel by requesting disjoint ranges of them by index, in the range [0, dir_capacity]. // DIFFDIR: query by least unknown generation number until we have num_children entries, or the number of entries in a downloaded batch becomes 0 (i.e. no more entries known). // in both cases, stop once the number of children is exceeded. // if least_unknown_generation >= 0, then we will DIFFDIR. // if dir_capacity >= 0, then we will LISTDIR. // we can only do one or the other (both/neither are invalid arguments) // return partial results, even on error // return 0 on success // return -EINVAL for invalid arguments. // return -ENOMEM on OOM // return negative on download failure, or corruption static int ms_client_get_dir_metadata( struct ms_client* client, uint64_t parent_id, int64_t num_children, int64_t least_unknown_generation, int64_t dir_capacity, struct ms_client_multi_result* results ) { int rc = 0; struct md_download_loop* dlloop = NULL; queue< int64_t > batch_queue; ms_client_dir_listing children; uint64_t num_children_downloaded = 0; int64_t max_known_generation = 0; struct md_download_context* dlctx = NULL; int64_t batch_id = 0; size_t num_children_fetched = 0; int64_t max_generation_fetched = 0; int query_count = 0; struct md_entry* ents = NULL; // sanity check if( least_unknown_generation < 0 && dir_capacity < 0 ) { return -EINVAL; } if( least_unknown_generation >= 0 && dir_capacity >= 0 ) { return -EINVAL; } memset( results, 0, sizeof(struct ms_client_multi_result) ); SG_debug("listdir %" PRIX64 ", num_children = %" PRId64 ", l.u.g. = %" PRId64 ", dir_capacity = %" PRId64 "\n", parent_id, num_children, least_unknown_generation, dir_capacity ); try { if( least_unknown_generation >= 0 ) { // download from a generation offset batch_queue.push( least_unknown_generation ); } else { // get all batches in parallel for( int64_t batch_id = 0; batch_id * client->page_size < dir_capacity; batch_id++ ) { batch_queue.push( batch_id ); } } } catch( bad_alloc& ba ) { return -ENOMEM; } // set up the md_download_loop dlloop = md_download_loop_new(); if( dlloop == NULL ) { return -ENOMEM; } rc = md_download_loop_init( dlloop, client->dl, client->max_connections ); if( rc != 0 ) { SG_safe_free( dlloop ); return rc; } // run the downloads! do { while( batch_queue.size() > 0 ) { // next batch int64_t next_batch = batch_queue.front(); batch_queue.pop(); query_count++; // next download rc = md_download_loop_next( dlloop, &dlctx ); if( rc != 0 ) { if( rc == -EAGAIN ) { // all downloads are running break; } SG_error("md_download_loop_next rc = %d\n", rc ); break; } // GOGOGO! rc = ms_client_get_dir_metadata_begin( client, parent_id, least_unknown_generation, next_batch, dlloop, dlctx ); if( rc != 0 ) { SG_error("ms_client_get_dir_metadata_begin( LUG=%" PRId64 ", batch=%" PRId64 " ) rc = %d\n", least_unknown_generation, next_batch, rc ); break; } } if( rc != 0 ) { break; } // await next download rc = md_download_loop_run( dlloop ); if( rc != 0 ) { SG_error("md_download_loop_run rc = %d\n", rc ); break; } // process all completed downloads while( true ) { // next completed download rc = md_download_loop_finished( dlloop, &dlctx ); if( rc != 0 ) { // out of downloads? if( rc == -EAGAIN ) { rc = 0; break; } SG_error("md_download_loop_finish rc = %d\n", rc ); break; } // process it rc = ms_client_get_dir_metadata_end( client, parent_id, dlctx, &children, &batch_id, &num_children_fetched, &max_generation_fetched ); if( rc != 0 ) { SG_error("ms_client_get_dir_metadata_end rc = %d\n", rc ); break; } num_children_downloaded += num_children_fetched; max_known_generation = MAX( max_generation_fetched, max_known_generation ); // are we out of children to fetch? if( num_children_fetched == 0 ) { if( (unsigned)num_children_downloaded >= (unsigned)num_children ) { SG_debug("Out of children (%" PRIu64 " fetched total)\n", num_children_downloaded ); rc = MD_DOWNLOAD_FINISH; break; } } // do we need to switch over to LISTDIR? if( batch_queue.size() == 0 && num_children_downloaded < (unsigned)num_children ) { // yup SG_debug("Fetched %" PRIu64 " children (%" PRId64 " total); l.u.g. is now %" PRIu64 "\n", num_children_downloaded, num_children, max_known_generation + 1 ); least_unknown_generation = max_known_generation + 1; batch_queue.push( least_unknown_generation ); } } if( rc != 0 ) { break; } } while( (batch_queue.size() > 0 || md_download_loop_running( dlloop )) && num_children_downloaded < (unsigned)num_children ); if( rc != 0 ) { // download stopped prematurely md_download_loop_abort( dlloop ); int i = 0; // free all ms_client_get_dir_download_state for( dlctx = md_download_loop_next_initialized( dlloop, &i ); dlctx != NULL; dlctx = md_download_loop_next_initialized( dlloop, &i ) ) { if( dlctx == NULL ) { break; } struct ms_client_get_dir_download_state* dlstate = (struct ms_client_get_dir_download_state*)md_download_context_get_cls( dlctx ); md_download_context_set_cls( dlctx, NULL ); if( dlstate != NULL ) { ms_client_get_dir_download_state_free( dlstate ); dlstate = NULL; } } } md_download_loop_cleanup( dlloop, NULL, NULL ); md_download_loop_free( dlloop ); SG_safe_free( dlloop ); if( rc == MD_DOWNLOAD_FINISH ) { rc = 0; } // coalesce what we have into results ents = SG_CALLOC( struct md_entry, children.size() ); if( ents == NULL ) { if( rc == 0 ) { rc = -ENOMEM; } // preserve download error, if need be return rc; } int i = 0; for( ms_client_dir_listing::iterator itr = children.begin(); itr != children.end(); itr++ ) { ents[i] = itr->second; i++; } // populate results results->ents = ents; results->reply_error = 0; results->num_processed = query_count; results->num_ents = children.size(); return rc; }