// detach a file from the filesystem // Only remove a directory if it is empty. int fs_entry_detach( struct fs_core* core, char const* path, uint64_t user, uint64_t vol ) { // resolve the parent of this child (and write-lock it) char* path_dirname = md_dirname( path, NULL ); char* path_basename = md_basename( path, NULL ); int err = 0; struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &err ); free( path_dirname ); if( parent == NULL ) { free( path_basename ); return err; } if( parent->ftype != FTYPE_DIR ) { // not a directory fs_entry_unlock( parent ); free( path_basename ); return -ENOTDIR; } if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // directory not searchable fs_entry_unlock( parent ); free( path_basename ); return -EACCES; } // is parent writeable? if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // nope fs_entry_unlock( parent ); free( path_basename ); return -EACCES; } struct fs_entry* child = fs_entry_set_find_name( parent->children, path_basename ); free( path_basename ); if( child == NULL ) { // doesn't exist fs_entry_unlock( parent ); return -ENOENT; } int rc = fs_entry_detach_lowlevel( core, parent, child ); fs_entry_unlock( parent ); return rc; }
// attach a file to the filesystem (same as link()) // THIS METHOD ONLY UPDATES THE METADATA; IT DOES NOT TOUCH STABLE STORAGE int fs_entry_attach( struct fs_core* core, struct fs_entry* fent, char const* path, uint64_t user, uint64_t vol ) { // sanity check: path's basename should be fent's name char* path_base = md_basename( path, NULL ); if( strcmp( fent->name, path_base ) != 0 ) { free(path_base); return -EINVAL; // invalid entry } free( path_base ); int err = 0; char* dirname = md_dirname( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, dirname, user, vol, true, &err ); free( dirname ); if( parent == NULL ) { return err; } err = 0; fs_entry_wlock( fent ); if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // directory not searchable fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -EACCES; } if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // not writable--cannot insert fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -EACCES; } if( fs_entry_set_find_name( parent->children, fent->name ) == NULL ) { fs_entry_attach_lowlevel( core, parent, fent ); } else { err = -EEXIST; } fs_entry_unlock( fent ); fs_entry_unlock( parent ); return err; }
// get the parent and child nodes on create/open, checking permissions along the way // write-lock the parent. // do NOT touch the child // if the child is not found, *child will be set to NULL // return 0 on success // return -ENOTDIR if a directory along the path wasn't a directory // return -EACCES on permission error int fs_entry_open_parent_and_child( struct fs_core* core, char const* path, uint64_t user, uint64_t vol, struct fs_entry** ret_parent, struct fs_entry** ret_child ) { // resolve the parent of this child (and write-lock it) int rc = 0; char* path_dirname = md_dirname( path, NULL ); char* path_basename = md_basename( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &rc ); if( parent == NULL ) { free( path_basename ); free( path_dirname ); return rc; } free( path_dirname ); if( parent->ftype != FTYPE_DIR ) { // parent is not a directory fs_entry_unlock( parent ); free( path_basename ); return -ENOTDIR; } // can parent be searched? if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // nope fs_entry_unlock( parent ); free( path_basename ); return -EACCES; } // resolve the child struct fs_entry* child = fs_entry_set_find_name( parent->children, path_basename ); free( path_basename ); *ret_parent = parent; *ret_child = child; return 0; }
// callback to apply over a file's blocks. // cls must be of type struct md_cache_cb_add_lru_args // return 0 on success // return -ENOMEM on OOM // return -EINVAL if we couldn't parse the block path static int md_cache_cb_add_lru( char const* block_path, void* cls ) { struct md_cache_cb_add_lru_args* args = (struct md_cache_cb_add_lru_args*)cls; md_cache_lru_t* cache_lru = args->cache_lru; uint64_t file_id = args->file_id; int64_t file_version = args->file_version; uint64_t block_id = 0; int64_t block_version = 0; // scan path for block ID and block version char* block_path_basename = md_basename( block_path, NULL ); if( block_path_basename == NULL ) { return -ENOMEM; } int rc = sscanf( block_path_basename, "%" PRIu64 ".%" PRId64, &block_id, &block_version ); if( rc != 2 ) { SG_error("Unparsable block name '%s'\n", block_path_basename ); rc = -EINVAL; } else { struct md_cache_entry_key lru_key; memset( &lru_key, 0, sizeof(lru_key) ); lru_key.file_id = file_id; lru_key.file_version = file_version; lru_key.block_id = block_id; lru_key.block_version = block_version; rc = 0; try { cache_lru->push_back( lru_key ); } catch( bad_alloc& ba ) { rc = -ENOMEM; } } SG_safe_free( block_path_basename ); return rc; }
// unlink a file from the filesystem // pass -1 if the version is not known, or pass the known version to be unlinked // return -EUCLEAN if we failed to garbage-collect, but needed to (i.e. a manifest was missing) // return -EREMOTEIO for failure to revalidate metadata // return -ESTALE if the given information is out of date int fs_entry_versioned_unlink( struct fs_core* core, char const* path, uint64_t file_id, uint64_t coordinator_id, int64_t known_version, uint64_t owner, uint64_t volume, uint64_t gateway_id, bool check_file_id_and_coordinator_id ) { // can't modify state if anonymous if( core->gateway == SG_GATEWAY_ANON ) { SG_error("%s", "Writing is forbidden for anonymous gateways\n"); return -EPERM; } // get some info about this file first int rc = 0; int err = 0; bool no_manifest = false; // consistency check err = fs_entry_revalidate_path( core, path ); if( err != 0 ) { SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, err ); if( err == -ENOENT ) return -ENOENT; return -EREMOTEIO; } // look up the parent char* path_dirname = md_dirname( path, NULL ); char* path_basename = md_basename( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, owner, volume, true, &err ); free( path_dirname ); if( !parent || err ) { free( path_basename ); return err; } if( parent->ftype != FTYPE_DIR ) { fs_entry_unlock( parent ); free( path_basename ); return err; } // get the child struct fs_entry* fent = fs_entry_set_find_name( parent->children, path_basename ); free( path_basename ); if( fent == NULL ) { fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ENOENT; } fs_entry_wlock( fent ); bool local = FS_ENTRY_LOCAL( core, fent ); int64_t version = fent->version; if( check_file_id_and_coordinator_id ) { if( fent->file_id != file_id ) { SG_error("Remote unlink to file %s ID %" PRIX64 ", expected %" PRIX64 "\n", path, file_id, fent->file_id ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ESTALE; } if( fent->coordinator != coordinator_id ) { SG_error("Remote unlink to file %s coordinator %" PRIu64 ", expected %" PRIu64 "\n", path, coordinator_id, fent->coordinator ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ESTALE; } } if( known_version > 0 && fent->version > 0 && fent->version != known_version ) { SG_error("Remote unlink to file %s version %" PRId64 ", expected %" PRId64 "\n", path, known_version, fent->version ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ESTALE; } // make sure the manifest is fresh, so we delete every block // only need to worry about this if file has > 0 size if( fent->size > 0 ) { // try to get it err = fs_entry_revalidate_manifest( core, path, fent ); if( err != 0 ) { SG_error( "fs_entry_revalidate_manifest(%s) rc = %d\n", path, err ); if( err == -ENOENT ) { // continue without a manifest no_manifest = true; SG_error("WARN: no manifest found for %s %" PRIX64 ". Assuming data is already vacuumed.\n", path, fent->file_id ); } else { // some other problem fs_entry_unlock( fent ); fs_entry_unlock( parent ); return err; } } } // tell the driver we're deleting int driver_rc = driver_delete_file( core, core->closure, path, fent ); if( driver_rc != 0 ) { SG_error("driver_delete_file(%s %" PRIX64 ") rc = %d\n", path, fent->file_id, driver_rc ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return driver_rc; } rc = 0; if( !local ) { // this is someone else's file; tell them to unlink Serialization::WriteMsg* detach_request = new Serialization::WriteMsg(); fs_entry_init_write_message( detach_request, core, Serialization::WriteMsg::DETACH ); fs_entry_prepare_detach_message( detach_request, path, fent, version ); Serialization::WriteMsg* detach_ack = new Serialization::WriteMsg(); // send the write message, or become the coordinator rc = fs_entry_send_write_or_coordinate( core, path, fent, detach_request, detach_ack ); if( rc < 0 ) { SG_error( "fs_entry_send_write_or_coordinate(%s) rc = %d\n", path, rc ); } else if( rc == 0 ) { // successfully sent if( detach_ack->type() != Serialization::WriteMsg::ACCEPTED ) { if( detach_ack->type() == Serialization::WriteMsg::ERROR ) { // could not detach on the remote end SG_error( "remote unlink error = %d (%s)\n", detach_ack->errorcode(), detach_ack->errortxt().c_str() ); rc = detach_ack->errorcode(); } else { // unknown message SG_error( "remote unlink invalid message %d\n", detach_ack->type() ); rc = -EIO; } } } else { // we're now the coordinator. local = true; } delete detach_ack; delete detach_request; } if( local ) { // we're responsible for this file // mark the file as deleted, so it won't show up again in any listing fent->deletion_in_progress = true; // safe to unlock parent--it won't be empty (in a rmdir-able sense) until fent is fully garbage-collected, but fent won't be listed either fs_entry_unlock( parent ); // garbage-collect, then unlink on the MS. Loop this until we succeed in unlinking on the MS (which can only happen // once all of fent's data has been garbage-collected). while( true ) { if( !no_manifest ) { // if we got the latest manifest, garbage-collect all writes on the file rc = fs_entry_vacuumer_file( core, path, fent ); if( rc != 0 ) { SG_error("fs_entry_vacuumer_vacuum_file( %s %" PRIX64 " ) rc = %d\n", path, fent->file_id, rc ); // failed to garbage-collect...need to un-delete fent fent->deletion_in_progress = false; fs_entry_unlock( fent ); return -EREMOTEIO; } } // tell the metadata server we just unlinked // preserve the entry information so we can issue a deletion struct md_entry ent; fs_entry_to_md_entry( core, &ent, fent, parent->file_id, parent->name ); rc = ms_client_delete( core->ms, &ent ); md_entry_free( &ent ); if( rc != 0 ) { SG_error( "ms_client_delete(%s) rc = %d\n", path, rc ); if( rc == -EAGAIN ) { if( !no_manifest ) { // try vacuuming again--some write got added in between our garbage-collection and our unlink request rc = 0; continue; } else { // there are un-garbage-collected writes, but we have no manifest, so we can't vacuum in order to proceed with the delete. SG_error("MEMORY LEAK DETECTED: No manifest for %" PRIX64 " available; unable to vacuum!\n", fent->file_id ); // failed to garbage-collect...need to un-delete fent fent->deletion_in_progress = false; fs_entry_unlock( fent ); return -EUCLEAN; } } else { // something more serious rc = -EREMOTEIO; fent->deletion_in_progress = false; fs_entry_unlock( fent ); return rc; } } else { // success! break; } } // re-lock the parent--it's guaranteed to exist, since it's not empty fs_entry_wlock( parent ); // unlock fent--we're done with it fs_entry_unlock( fent ); // detatch fent from parent rc = fs_entry_detach_lowlevel( core, parent, fent ); if( rc != 0 ) { SG_error("fs_entry_detach_lowlevel(%" PRIX64 ") rc = %d\n", fent->file_id, rc ); fs_entry_unlock( parent ); return rc; } fs_entry_unlock( parent ); } return rc; }
// make a node (regular files only at this time) int fs_entry_mknod( struct fs_core* core, char const* path, mode_t mode, dev_t dev, uint64_t user, uint64_t vol ) { // only regular files at this time... if( ! ( S_ISREG( mode ) || S_ISFIFO( mode ) ) ) { return -ENOTSUP; } // revalidate this path int rc = fs_entry_revalidate_path( core, path ); if( rc != 0 && rc != -ENOENT ) { // consistency cannot be guaranteed SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, rc ); return rc; } int err = 0; // get the parent directory and lock it char* path_dirname = md_dirname( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &err ); free( path_dirname ); if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // not searchable fs_entry_unlock( parent ); return -EACCES; } if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // not writeable fs_entry_unlock( parent ); return -EACCES; } uint64_t parent_id = parent->file_id; char* parent_name = strdup( parent->name ); char* path_basename = md_basename( path, NULL ); // make sure it doesn't exist already (or isn't in the process of being deleted, since we might have to re-create it if deleting it fails) if( fs_entry_set_find_name( parent->children, path_basename ) != NULL ) { free( path_basename ); fs_entry_unlock( parent ); free( parent_name ); return -EEXIST; } struct fs_entry* child = (struct fs_entry*)calloc( sizeof(struct fs_entry), 1 ); struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ); int mmode = 0; if (S_ISFIFO(mode)) { mmode = ( mode & 0777 ) | S_IFIFO; err = fs_entry_init_fifo( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mmode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 ); } if (S_ISREG(mode)) { mmode = ( mode & 0777 ); err = fs_entry_init_file( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mmode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 ); } if( err == 0 ) { // mark it as created in this session child->created_in_session = true; // we're creating, so this manifest is initialized (to zero blocks) child->manifest->initialize_empty( child->version ); fs_entry_wlock( child ); // call the driver err = driver_create_file( core, core->closure, path, child ); if( err != 0 ) { // undo SG_error("driver_create_file(%s) rc = %d\n", path, err ); child->open_count = 0; fs_entry_unlock( child ); fs_entry_destroy( child, false ); free( child ); } else { // attach the file fs_entry_attach_lowlevel( core, parent, child ); struct md_entry data; fs_entry_to_md_entry( core, &data, child, parent_id, parent_name ); // create the child on the MS, obtaining its file ID and write nonce err = ms_client_create( core->ms, &child->file_id, &child->write_nonce, &data ); md_entry_free( &data ); if( err != 0 ) { SG_error( "ms_client_create(%s) rc = %d\n", path, err ); err = -EREMOTEIO; child->open_count = 0; fs_entry_unlock( child ); fs_entry_detach_lowlevel( core, parent, child ); free( child ); } else { fs_entry_unlock( child ); } } } fs_entry_unlock( parent ); free( parent_name ); free( path_basename ); return err; }
// carry out the create locally. // check permissions, initialize the child, and add it as a child of parent. // return the initialized child (which will NOT be locked) via *ret_child // return 0 on success // return -EACCES on permission failure // parent MUST be write locked int fs_entry_do_create( struct fs_core* core, char const* path, struct fs_entry* parent, struct fs_entry** ret_child, uint64_t user, uint64_t vol, mode_t mode ) { int rc = 0; struct fs_entry* child = NULL; if( !IS_WRITEABLE(parent->mode, parent->owner, parent->volume, user, vol) ) { // can't create return -EACCES; } else { struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ); // can create--initialize the child child = SG_CALLOC( struct fs_entry, 1 ); char* path_basename = md_basename( path, NULL ); rc = fs_entry_init_file( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 ); free( path_basename ); if( rc != 0 ) { SG_error("fs_entry_init_file(%s) rc = %d\n", path, rc ); fs_entry_destroy( child, false ); free( child ); return rc; } else { // mark it as created in this session child->created_in_session = true; // we're creating, so this manifest is initialized (to zero blocks) child->manifest->initialize_empty( child->version ); // run the driver int driver_rc = driver_create_file( core, core->closure, path, child ); if( driver_rc != 0 ) { SG_error("driver_create_file(%s) rc = %d\n", path, driver_rc ); fs_entry_destroy( child, false ); free( child ); return driver_rc; } // insert it into the filesystem fs_entry_wlock( child ); // open it child->open_count++; fs_entry_setup_working_data( core, child ); fs_entry_attach_lowlevel( core, parent, child ); fs_entry_unlock( child ); *ret_child = child; } } return 0; }