// get the manifest mod time in its entirety int fs_entry_get_manifest_mod_time( struct fs_core* core, char const* fs_path, struct timespec* t ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } if( fent->manifest == NULL ) { errorf("BUG: %" PRIX64 " (%s) not initialized\n", fent->file_id, fent->name ); exit(1); } if( !fent->manifest->is_initialized() ) { fs_entry_unlock( fent ); return -ENODATA; } fent->manifest->get_modtime( t ); fs_entry_unlock( fent ); return 0; }
// get the gateway coordinator of a file uint64_t fs_entry_get_block_host( struct fs_core* core, char* fs_path, uint64_t block_id ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } if( fent->manifest == NULL ) { errorf("BUG: %" PRIX64 " (%s) not initialized\n", fent->file_id, fent->name ); exit(1); } if( !fent->manifest->is_initialized() ) { fs_entry_unlock( fent ); return -ENODATA; } uint64_t ret = fent->manifest->get_block_host( core, block_id ); fs_entry_unlock( fent ); return ret; }
// detach a file from the filesystem // Only remove a directory if it is empty. int fs_entry_detach( struct fs_core* core, char const* path, uint64_t user, uint64_t vol ) { // resolve the parent of this child (and write-lock it) char* path_dirname = md_dirname( path, NULL ); char* path_basename = md_basename( path, NULL ); int err = 0; struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &err ); free( path_dirname ); if( parent == NULL ) { free( path_basename ); return err; } if( parent->ftype != FTYPE_DIR ) { // not a directory fs_entry_unlock( parent ); free( path_basename ); return -ENOTDIR; } if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // directory not searchable fs_entry_unlock( parent ); free( path_basename ); return -EACCES; } // is parent writeable? if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // nope fs_entry_unlock( parent ); free( path_basename ); return -EACCES; } struct fs_entry* child = fs_entry_set_find_name( parent->children, path_basename ); free( path_basename ); if( child == NULL ) { // doesn't exist fs_entry_unlock( parent ); return -ENOENT; } int rc = fs_entry_detach_lowlevel( core, parent, child ); fs_entry_unlock( parent ); return rc; }
// utime int fs_entry_utime( struct fs_core* core, char const* path, struct utimbuf* tb, uint64_t user, uint64_t volume ) { int err = 0; uint64_t parent_id = 0; char* parent_name = NULL; struct fs_entry* fent = fs_entry_resolve_path_and_parent_info( core, path, user, volume, true, &err, &parent_id, &parent_name ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } // check permissions if( tb == NULL && !IS_WRITEABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) { fs_entry_unlock( fent ); return -EACCES; } if( tb != NULL && fent->owner != user ) { fs_entry_unlock( fent ); return -EACCES; } if( tb != NULL ) { fent->mtime_sec = tb->modtime; fent->atime = tb->actime; } else { struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ); fent->mtime_sec = ts.tv_sec; fent->mtime_nsec = ts.tv_nsec; fent->atime = fent->mtime_sec; } fent->atime = currentTimeSeconds(); // post update struct md_entry up; fs_entry_to_md_entry( core, &up, fent, parent_id, parent_name ); int rc = ms_client_update( core->ms, &fent->write_nonce, &up ); if( rc != 0 ) { errorf("ms_client_update(%s) rc = %d\n", path, rc ); } md_entry_free( &up ); fs_entry_unlock( fent ); return rc; }
// access int fs_entry_access( struct fs_core* core, char const* path, int mode, uint64_t user, uint64_t volume ) { // make sure this path exists int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } // F_OK implicitly satisfied if( (mode & R_OK) && !IS_READABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) { err = -EACCES; } else if( (mode & W_OK) && !IS_WRITEABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) { err = -EACCES; } else if( (mode & X_OK) && !IS_EXECUTABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) { err = -EACCES; } fs_entry_unlock( fent ); return err; }
// statfs int fs_entry_statfs( struct fs_core* core, char const* path, struct statvfs *statv, uint64_t user, uint64_t vol ) { // make sure this path refers to a path in the FS int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, path, user, vol, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } uint64_t num_files = ms_client_get_num_files( core->ms ); // populate the statv struct statv->f_bsize = core->blocking_factor; statv->f_blocks = 0; statv->f_bfree = 0; statv->f_bavail = 0; statv->f_files = num_files; statv->f_ffree = 0; statv->f_fsid = SYNDICATEFS_MAGIC; statv->f_namemax = 256; // might as well keep it limited to what ext2/ext3/ext4 can handle statv->f_frsize = 0; statv->f_flag = ST_NODEV | ST_NOSUID; fs_entry_unlock( fent ); return 0; }
// fstat int fs_entry_fstat( struct fs_core* core, struct fs_file_handle* fh, struct stat* sb ) { int rc = fs_file_handle_rlock( fh ); if( rc != 0 ) { errorf("fs_file_handle_rlock rc = %d\n", rc ); return -EBADF; } // revalidate rc = fs_entry_revalidate_path( core, fh->volume, fh->path ); if( rc != 0 ) { errorf("fs_entry_revalidate_path(%s) rc = %d\n", fh->path, rc ); fs_file_handle_unlock( fh ); if( rc == -ENOENT ) { // file no longer exists return -EBADF; } return -EREMOTEIO; } rc = fs_entry_rlock( fh->fent ); if( rc != 0 ) { errorf("fs_entry_rlock rc = %d\n", rc ); fs_file_handle_unlock( fh ); return -EBADF; } fs_entry_do_stat( core, fh->fent, sb ); fs_entry_unlock( fh->fent ); fs_file_handle_unlock( fh ); return 0; }
// get the parent and child nodes on create/open, checking permissions along the way // write-lock the parent. // do NOT touch the child // if the child is not found, *child will be set to NULL // return 0 on success // return -ENOTDIR if a directory along the path wasn't a directory // return -EACCES on permission error int fs_entry_open_parent_and_child( struct fs_core* core, char const* path, uint64_t user, uint64_t vol, struct fs_entry** ret_parent, struct fs_entry** ret_child ) { // resolve the parent of this child (and write-lock it) int rc = 0; char* path_dirname = md_dirname( path, NULL ); char* path_basename = md_basename( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &rc ); if( parent == NULL ) { free( path_basename ); free( path_dirname ); return rc; } free( path_dirname ); if( parent->ftype != FTYPE_DIR ) { // parent is not a directory fs_entry_unlock( parent ); free( path_basename ); return -ENOTDIR; } // can parent be searched? if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // nope fs_entry_unlock( parent ); free( path_basename ); return -EACCES; } // resolve the child struct fs_entry* child = fs_entry_set_find_name( parent->children, path_basename ); free( path_basename ); *ret_parent = parent; *ret_child = child; return 0; }
// open a directory, but fail-fast if we can't get path metadata struct fs_dir_handle* fs_entry_opendir( struct fs_core* core, char const* fs_path, uint64_t user, uint64_t vol, int* err ) { // ensure path ends in / char path[PATH_MAX]; strcpy( path, fs_path ); md_sanitize_path( path ); int rc = fs_entry_revalidate_path( core, path ); if( rc != 0 ) { SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, rc ); return NULL; } uint64_t parent_id = 0; char* parent_name = NULL; struct fs_entry* dir = fs_entry_resolve_path_and_parent_info( core, path, user, vol, true, err, &parent_id, &parent_name ); if( dir == NULL ) { return NULL; } // make sure it's a directory if( dir->ftype != FTYPE_DIR ) { *err = -ENOTDIR; fs_entry_unlock( dir ); return NULL; } // open this directory dir->open_count++; struct fs_dir_handle* dirh = fs_dir_handle_create( dir, path, parent_id, parent_name ); rc = fs_dir_handle_open( dirh ); if( rc != 0 ) { fs_dir_handle_destroy( dirh ); free( dirh ); dirh = NULL; *err = rc; } // release the directory fs_entry_unlock( dir ); free( parent_name ); return dirh; }
// revalidate on open (not create) int fs_entry_open_revalidate( struct fs_core* core, char const* path, uint64_t user, uint64_t vol ) { int rc = 0; struct fs_entry* fent = NULL; // see that the entry still exists rc = fs_entry_revalidate_path( core, path ); if( rc != 0 ) { SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, rc ); return rc; } // find the entry fent = fs_entry_resolve_path( core, path, user, vol, true, &rc ); if( fent == NULL || rc != 0 ) { SG_error("fs_entry_resolve_path(%s) rc = %d\n", path, rc ); return rc; } // temporarily mark this entry as referenced, so it won't be unlinked while we revalidate fent->link_count++; fs_entry_unlock( fent ); // revalidate the entry's path and manifest rc = fs_entry_revalidate_metadata( core, path, fent, NULL ); fs_entry_wlock( fent ); fent->link_count--; fs_entry_unlock( fent ); if( rc != 0 ) { SG_error("fs_entry_revalidate_metadata(%s) rc = %d\n", path, rc ); } return rc; }
// get a file manifest as a string char* fs_entry_get_manifest_str( struct fs_core* core, char* fs_path ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err ); if( !fent || err ) { return NULL; } char* ret = fent->manifest->serialize_str(); fs_entry_unlock( fent ); return ret; }
// is a file local? bool fs_entry_is_local( struct fs_core* core, char const* path, uint64_t user, uint64_t volume, int* err ) { struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, err ); if( !fent || *err ) { if( !*err ) *err = -ENOMEM; return false; } bool rc = FS_ENTRY_LOCAL( core, fent ); fs_entry_unlock( fent ); return rc; }
// get a file manifest as a serialized protobuf ssize_t fs_entry_serialize_manifest( struct fs_core* core, char* fs_path, char** manifest_bits, bool sign ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err ); if( !fent || err ) { *manifest_bits = NULL; return err; } ssize_t ret = fs_entry_serialize_manifest( core, fent, manifest_bits, sign ); fs_entry_unlock( fent ); return ret; }
// chmod int fs_entry_chmod( struct fs_core* core, char const* path, uint64_t user, uint64_t volume, mode_t mode ) { int err = 0; uint64_t parent_id = 0; char* parent_name = NULL; struct fs_entry* fent = fs_entry_resolve_path_and_parent_info( core, path, user, volume, true, &err, &parent_id, &parent_name ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } // can't chmod unless we own the file if( fent->owner != user ) { fs_entry_unlock( fent ); free( parent_name ); return -EPERM; } fent->mode = mode; // post update struct md_entry up; fs_entry_to_md_entry( core, &up, fent, parent_id, parent_name ); int rc = ms_client_update( core->ms, &fent->write_nonce, &up ); if( rc != 0 ) { errorf("ms_client_update(%s) rc = %d\n", path, rc ); } md_entry_free( &up ); fs_entry_unlock( fent ); free( parent_name ); return rc; }
// get the in-memory version of a file int64_t fs_entry_get_version( struct fs_core* core, char const* fs_path ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } int64_t ret = fent->version; fs_entry_unlock( fent ); return ret; }
// set the mod time (at the nanosecond resolution) int fs_entry_set_mod_time( struct fs_core* core, char const* fs_path, struct timespec* t ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, true, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } fent->mtime_sec = t->tv_sec; fent->mtime_nsec = t->tv_nsec; fs_entry_unlock( fent ); return 0; }
// is this local? That is, is the block hosted here? bool fs_entry_is_block_local( struct fs_core* core, char const* path, uint64_t user, uint64_t volume, uint64_t block_id ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } bool rc = fent->manifest->is_block_local( core, block_id ); fs_entry_unlock( fent ); return rc; }
// get the actual creation time // get the mod time in its entirety int fs_entry_get_creation_time( struct fs_core* core, char const* fs_path, struct timespec* t ) { int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } t->tv_sec = fent->ctime_sec; t->tv_nsec = fent->ctime_nsec; fs_entry_unlock( fent ); return 0; }
// undo a local create // parent and child must be write-locked // this will unlock and free the child int fs_entry_undo_create( struct fs_core* core, char const* path, struct fs_entry* parent, struct fs_entry* child ) { // revert child->link_count--; child->open_count--; if( child->open_count == 0 ) { fs_entry_free_working_data( child ); } fs_entry_unlock( child ); // NOTE: parent will still exist--we can't remove a non-empty directory fs_entry_detach_lowlevel( core, parent, child ); return 0; }
// stat int fs_entry_stat_extended( struct fs_core* core, char const* path, struct stat* sb, bool* is_local, int64_t* version, uint64_t* coordinator_id, uint64_t user, uint64_t volume, bool revalidate ) { int rc = 0; if( revalidate ) { // revalidate rc = fs_entry_revalidate_path( core, volume, path ); if( rc != 0 ) { errorf("fs_entry_revalidate_path(%s) rc = %d\n", path, rc ); return rc; } } int err = 0; struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, &err ); if( !fent || err ) { if( !err ) err = -ENOMEM; return err; } memset( sb, 0, sizeof(struct stat) ); // have entry read-locked fs_entry_do_stat( core, fent, sb ); if( is_local ) { *is_local = FS_ENTRY_LOCAL( core, fent ); } if( version ) { *version = fent->version; } if( coordinator_id ) { *coordinator_id = fent->coordinator; } fs_entry_unlock( fent ); return 0; }
// attach a file to the filesystem (same as link()) // THIS METHOD ONLY UPDATES THE METADATA; IT DOES NOT TOUCH STABLE STORAGE int fs_entry_attach( struct fs_core* core, struct fs_entry* fent, char const* path, uint64_t user, uint64_t vol ) { // sanity check: path's basename should be fent's name char* path_base = md_basename( path, NULL ); if( strcmp( fent->name, path_base ) != 0 ) { free(path_base); return -EINVAL; // invalid entry } free( path_base ); int err = 0; char* dirname = md_dirname( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, dirname, user, vol, true, &err ); free( dirname ); if( parent == NULL ) { return err; } err = 0; fs_entry_wlock( fent ); if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // directory not searchable fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -EACCES; } if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // not writable--cannot insert fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -EACCES; } if( fs_entry_set_find_name( parent->children, fent->name ) == NULL ) { fs_entry_attach_lowlevel( core, parent, fent ); } else { err = -EEXIST; } fs_entry_unlock( fent ); fs_entry_unlock( parent ); return err; }
// fstat, with directory int fs_entry_fstat_dir( struct fs_core* core, struct fs_dir_handle* dh, struct stat* sb ) { if( fs_dir_handle_rlock( dh ) != 0 ) { return -EBADF; } // revalidate int rc = fs_entry_revalidate_path( core, dh->volume, dh->path ); if( rc != 0 ) { errorf("fs_entry_revalidate_path(%s) rc = %d\n", dh->path, rc ); fs_dir_handle_unlock( dh ); return -EREMOTEIO; } if( fs_entry_rlock( dh->dent ) != 0 ) { fs_dir_handle_unlock( dh ); return -EBADF; } fs_entry_do_stat( core, dh->dent, sb ); fs_entry_unlock( dh->dent ); fs_dir_handle_unlock( dh ); return 0; }
// remove a directory, if it is empty int fs_entry_rmdir( struct fs_core* core, char const* path, uint64_t user, uint64_t volume ) { if( core->gateway == GATEWAY_ANON ) { errorf("%s", "Removing directories is forbidden for anonymous gateways\n"); return -EPERM; } // get some info about this directory first int rc = 0; char* fpath = strdup( path ); md_sanitize_path( fpath ); // revalidate this path rc = fs_entry_revalidate_path( core, volume, fpath ); if( rc != 0 && rc != -ENOENT ) { // consistency cannot be guaranteed errorf("fs_entry_revalidate_path(%s) rc = %d\n", fpath, rc ); free( fpath ); return rc; } free( fpath ); int err = 0; struct fs_entry* dent = fs_entry_resolve_path( core, path, user, volume, false, &err ); if( !dent || err ) { return err; } if( dent->ftype != FTYPE_DIR ) { fs_entry_unlock( dent ); return -ENOTDIR; } char* path_dirname = md_dirname( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, volume, true, &err ); free( path_dirname ); if( !parent || err ) { fs_entry_unlock( dent ); return err; } // IS THE PARENT EMPTY? if( fs_entry_set_count( dent->children ) > 2 ) { // nope fs_entry_unlock( dent ); fs_entry_unlock( parent ); return -ENOTEMPTY; } struct md_entry ent; fs_entry_to_md_entry( core, &ent, dent, parent->file_id, parent->name ); // tell the MS that this directory should go away rc = ms_client_delete( core->ms, &ent ); md_entry_free( &ent ); if( rc != 0 ) { errorf( "ms_client_delete(%s) rc = %d\n", path, rc ); rc = -EREMOTEIO; fs_entry_unlock( dent ); fs_entry_unlock( parent ); } else { fs_entry_unlock( dent ); // detach from the filesystem rc = fs_entry_detach_lowlevel( core, parent, dent ); if( rc != 0 ) { errorf("fs_entry_detach_lowlevel(%s) rc = %d\n", path, rc ); } fs_entry_unlock( parent ); } return rc; }
// lowlevel unlink operation--given an fs_entry and the name of an entry // parent must be write-locked! // child must NOT be locked! int fs_entry_detach_lowlevel( struct fs_core* core, struct fs_entry* parent, struct fs_entry* child ) { if( parent == child ) { // tried to detach . return -ENOTEMPTY; } if( child == NULL ) { // no entry found return -ENOENT; } fs_entry_wlock( child ); if( child->link_count == 0 ) { // child is invalid fs_entry_unlock( child ); return -ENOENT; } // if the child is a directory, and it's not empty, then don't proceed if( child->ftype == FTYPE_DIR && fs_entry_set_count( child->children ) > 2 ) { // not empty fs_entry_unlock( child ); return -ENOTEMPTY; } // unlink fs_entry_set_remove( parent->children, child->name ); struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ); parent->mtime_sec = ts.tv_sec; parent->mtime_nsec = ts.tv_nsec; int rc = 0; if( child->open_count == 0 ) { // evict blocks, if there is a file to begin with if( child->ftype == FTYPE_FILE && child->file_id != 0 ) { rc = md_cache_evict_file( core->cache, child->file_id, child->version ); if( rc == -ENOENT ) { // not a problem rc = 0; } } if( rc == 0 ) { fs_entry_destroy( child, false ); free( child ); child = NULL; } else { fs_entry_unlock( child ); } } else { fs_entry_unlock( child ); } if( rc == 0 && child ) { child->link_count = 0; } return rc; }
// unlink a file from the filesystem // pass -1 if the version is not known, or pass the known version to be unlinked // return -EUCLEAN if we failed to garbage-collect, but needed to (i.e. a manifest was missing) // return -EREMOTEIO for failure to revalidate metadata // return -ESTALE if the given information is out of date int fs_entry_versioned_unlink( struct fs_core* core, char const* path, uint64_t file_id, uint64_t coordinator_id, int64_t known_version, uint64_t owner, uint64_t volume, uint64_t gateway_id, bool check_file_id_and_coordinator_id ) { // can't modify state if anonymous if( core->gateway == SG_GATEWAY_ANON ) { SG_error("%s", "Writing is forbidden for anonymous gateways\n"); return -EPERM; } // get some info about this file first int rc = 0; int err = 0; bool no_manifest = false; // consistency check err = fs_entry_revalidate_path( core, path ); if( err != 0 ) { SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, err ); if( err == -ENOENT ) return -ENOENT; return -EREMOTEIO; } // look up the parent char* path_dirname = md_dirname( path, NULL ); char* path_basename = md_basename( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, owner, volume, true, &err ); free( path_dirname ); if( !parent || err ) { free( path_basename ); return err; } if( parent->ftype != FTYPE_DIR ) { fs_entry_unlock( parent ); free( path_basename ); return err; } // get the child struct fs_entry* fent = fs_entry_set_find_name( parent->children, path_basename ); free( path_basename ); if( fent == NULL ) { fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ENOENT; } fs_entry_wlock( fent ); bool local = FS_ENTRY_LOCAL( core, fent ); int64_t version = fent->version; if( check_file_id_and_coordinator_id ) { if( fent->file_id != file_id ) { SG_error("Remote unlink to file %s ID %" PRIX64 ", expected %" PRIX64 "\n", path, file_id, fent->file_id ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ESTALE; } if( fent->coordinator != coordinator_id ) { SG_error("Remote unlink to file %s coordinator %" PRIu64 ", expected %" PRIu64 "\n", path, coordinator_id, fent->coordinator ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ESTALE; } } if( known_version > 0 && fent->version > 0 && fent->version != known_version ) { SG_error("Remote unlink to file %s version %" PRId64 ", expected %" PRId64 "\n", path, known_version, fent->version ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return -ESTALE; } // make sure the manifest is fresh, so we delete every block // only need to worry about this if file has > 0 size if( fent->size > 0 ) { // try to get it err = fs_entry_revalidate_manifest( core, path, fent ); if( err != 0 ) { SG_error( "fs_entry_revalidate_manifest(%s) rc = %d\n", path, err ); if( err == -ENOENT ) { // continue without a manifest no_manifest = true; SG_error("WARN: no manifest found for %s %" PRIX64 ". Assuming data is already vacuumed.\n", path, fent->file_id ); } else { // some other problem fs_entry_unlock( fent ); fs_entry_unlock( parent ); return err; } } } // tell the driver we're deleting int driver_rc = driver_delete_file( core, core->closure, path, fent ); if( driver_rc != 0 ) { SG_error("driver_delete_file(%s %" PRIX64 ") rc = %d\n", path, fent->file_id, driver_rc ); fs_entry_unlock( fent ); fs_entry_unlock( parent ); return driver_rc; } rc = 0; if( !local ) { // this is someone else's file; tell them to unlink Serialization::WriteMsg* detach_request = new Serialization::WriteMsg(); fs_entry_init_write_message( detach_request, core, Serialization::WriteMsg::DETACH ); fs_entry_prepare_detach_message( detach_request, path, fent, version ); Serialization::WriteMsg* detach_ack = new Serialization::WriteMsg(); // send the write message, or become the coordinator rc = fs_entry_send_write_or_coordinate( core, path, fent, detach_request, detach_ack ); if( rc < 0 ) { SG_error( "fs_entry_send_write_or_coordinate(%s) rc = %d\n", path, rc ); } else if( rc == 0 ) { // successfully sent if( detach_ack->type() != Serialization::WriteMsg::ACCEPTED ) { if( detach_ack->type() == Serialization::WriteMsg::ERROR ) { // could not detach on the remote end SG_error( "remote unlink error = %d (%s)\n", detach_ack->errorcode(), detach_ack->errortxt().c_str() ); rc = detach_ack->errorcode(); } else { // unknown message SG_error( "remote unlink invalid message %d\n", detach_ack->type() ); rc = -EIO; } } } else { // we're now the coordinator. local = true; } delete detach_ack; delete detach_request; } if( local ) { // we're responsible for this file // mark the file as deleted, so it won't show up again in any listing fent->deletion_in_progress = true; // safe to unlock parent--it won't be empty (in a rmdir-able sense) until fent is fully garbage-collected, but fent won't be listed either fs_entry_unlock( parent ); // garbage-collect, then unlink on the MS. Loop this until we succeed in unlinking on the MS (which can only happen // once all of fent's data has been garbage-collected). while( true ) { if( !no_manifest ) { // if we got the latest manifest, garbage-collect all writes on the file rc = fs_entry_vacuumer_file( core, path, fent ); if( rc != 0 ) { SG_error("fs_entry_vacuumer_vacuum_file( %s %" PRIX64 " ) rc = %d\n", path, fent->file_id, rc ); // failed to garbage-collect...need to un-delete fent fent->deletion_in_progress = false; fs_entry_unlock( fent ); return -EREMOTEIO; } } // tell the metadata server we just unlinked // preserve the entry information so we can issue a deletion struct md_entry ent; fs_entry_to_md_entry( core, &ent, fent, parent->file_id, parent->name ); rc = ms_client_delete( core->ms, &ent ); md_entry_free( &ent ); if( rc != 0 ) { SG_error( "ms_client_delete(%s) rc = %d\n", path, rc ); if( rc == -EAGAIN ) { if( !no_manifest ) { // try vacuuming again--some write got added in between our garbage-collection and our unlink request rc = 0; continue; } else { // there are un-garbage-collected writes, but we have no manifest, so we can't vacuum in order to proceed with the delete. SG_error("MEMORY LEAK DETECTED: No manifest for %" PRIX64 " available; unable to vacuum!\n", fent->file_id ); // failed to garbage-collect...need to un-delete fent fent->deletion_in_progress = false; fs_entry_unlock( fent ); return -EUCLEAN; } } else { // something more serious rc = -EREMOTEIO; fent->deletion_in_progress = false; fs_entry_unlock( fent ); return rc; } } else { // success! break; } } // re-lock the parent--it's guaranteed to exist, since it's not empty fs_entry_wlock( parent ); // unlock fent--we're done with it fs_entry_unlock( fent ); // detatch fent from parent rc = fs_entry_detach_lowlevel( core, parent, fent ); if( rc != 0 ) { SG_error("fs_entry_detach_lowlevel(%" PRIX64 ") rc = %d\n", fent->file_id, rc ); fs_entry_unlock( parent ); return rc; } fs_entry_unlock( parent ); } return rc; }
// make a node (regular files only at this time) int fs_entry_mknod( struct fs_core* core, char const* path, mode_t mode, dev_t dev, uint64_t user, uint64_t vol ) { // only regular files at this time... if( ! ( S_ISREG( mode ) || S_ISFIFO( mode ) ) ) { return -ENOTSUP; } // revalidate this path int rc = fs_entry_revalidate_path( core, path ); if( rc != 0 && rc != -ENOENT ) { // consistency cannot be guaranteed SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, rc ); return rc; } int err = 0; // get the parent directory and lock it char* path_dirname = md_dirname( path, NULL ); struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &err ); free( path_dirname ); if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // not searchable fs_entry_unlock( parent ); return -EACCES; } if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) { // not writeable fs_entry_unlock( parent ); return -EACCES; } uint64_t parent_id = parent->file_id; char* parent_name = strdup( parent->name ); char* path_basename = md_basename( path, NULL ); // make sure it doesn't exist already (or isn't in the process of being deleted, since we might have to re-create it if deleting it fails) if( fs_entry_set_find_name( parent->children, path_basename ) != NULL ) { free( path_basename ); fs_entry_unlock( parent ); free( parent_name ); return -EEXIST; } struct fs_entry* child = (struct fs_entry*)calloc( sizeof(struct fs_entry), 1 ); struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ); int mmode = 0; if (S_ISFIFO(mode)) { mmode = ( mode & 0777 ) | S_IFIFO; err = fs_entry_init_fifo( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mmode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 ); } if (S_ISREG(mode)) { mmode = ( mode & 0777 ); err = fs_entry_init_file( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mmode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 ); } if( err == 0 ) { // mark it as created in this session child->created_in_session = true; // we're creating, so this manifest is initialized (to zero blocks) child->manifest->initialize_empty( child->version ); fs_entry_wlock( child ); // call the driver err = driver_create_file( core, core->closure, path, child ); if( err != 0 ) { // undo SG_error("driver_create_file(%s) rc = %d\n", path, err ); child->open_count = 0; fs_entry_unlock( child ); fs_entry_destroy( child, false ); free( child ); } else { // attach the file fs_entry_attach_lowlevel( core, parent, child ); struct md_entry data; fs_entry_to_md_entry( core, &data, child, parent_id, parent_name ); // create the child on the MS, obtaining its file ID and write nonce err = ms_client_create( core->ms, &child->file_id, &child->write_nonce, &data ); md_entry_free( &data ); if( err != 0 ) { SG_error( "ms_client_create(%s) rc = %d\n", path, err ); err = -EREMOTEIO; child->open_count = 0; fs_entry_unlock( child ); fs_entry_detach_lowlevel( core, parent, child ); free( child ); } else { fs_entry_unlock( child ); } } } fs_entry_unlock( parent ); free( parent_name ); free( path_basename ); return err; }
// Try to open a file, but fail-fast on error. It behaves as close to POSIX-open as possible, with the following differences: // * return -EREMOTEIO if the UG could not contact the MS, or if it could not obtain a fresh manifest. // * return -EUCLEAN if the UG was unable to merge metadata from the MS into its metadata hierarchy (usually indicates a bug) // * return a driver-specific, non-zero error code given by the driver's create_file() method // Side-effects: // * re-downloads and updates metadata for all entries along the path that are stale. // * re-downloads the manifest for the i-node if it is stale. struct fs_file_handle* fs_entry_open( struct fs_core* core, char const* _path, uint64_t user, uint64_t vol, int flags, mode_t mode, int* err ) { // sanity check if( (flags & O_RDONLY) == 0 && (flags & O_RDWR) != 0 && (flags & O_WRONLY) != 0 ) { *err = -EINVAL; return NULL; } if( (flags & O_RDONLY) != 0 && (flags & O_WRONLY) != 0 ) { *err = -EINVAL; return NULL; } // sanity check: check open mode vs whether or not we're a client and/or have read-only caps if( core->gateway == SG_GATEWAY_ANON ) { // no authentication; we're read-only if( flags & (O_CREAT | O_RDWR | O_WRONLY | O_TRUNC | O_EXCL) ) { SG_error("%s", "Opening to create, write, or truncate is forbidden for anonymous gateways\n"); *err = -EPERM; return NULL; } } int rc = 0; char* parent_name = NULL; uint64_t parent_id = 0; struct fs_entry* child = NULL; struct fs_entry* parent = NULL; char const* reval_method = NULL; struct fs_file_handle* ret = NULL; // make sure path is sane char* path = strdup(_path); md_sanitize_path( path ); // revalidate metadata if( flags & O_CREAT ) { reval_method = "fs_entry_create_revalidate"; rc = fs_entry_create_revalidate( core, path, user, vol ); } else { reval_method = "fs_entry_open_revalidate"; rc = fs_entry_open_revalidate( core, path, user, vol ); } if( rc != 0 ) { SG_error("%s(%s) rc = %d\n", reval_method, path, rc ); *err = rc; free( path ); return NULL; } // get the parent and child // NOTE: parent will be write-locked; child will not be rc = fs_entry_open_parent_and_child( core, path, user, vol, &parent, &child ); if( rc != 0 ) { SG_error("fs_entry_open_parent_and_child( %s ) rc = %d\n", path, rc ); *err = rc; free( path ); return NULL; } if( flags & O_CREAT ) { // creating... if( child != NULL ) { // can't create--child exists *err = -EEXIST; free( path ); return NULL; } // carry out the local create rc = fs_entry_do_create( core, path, parent, &child, user, vol, mode ); if( rc != 0 ) { SG_error("fs_entry_do_create( %s ) rc = %d\n", path, rc ); *err = rc; free( path ); return NULL; } // preserve these before unlocking, since we'll need them for the file handle parent_id = parent->file_id; parent_name = strdup( parent->name ); fs_entry_wlock( child ); fs_entry_unlock( parent ); // carry out the remote create rc = fs_entry_do_MS_create( core, path, child, parent_id, parent_name ); if( rc != 0 ) { SG_error("fs_entry_do_MS_create(%s) rc = %d\n", path, rc ); if( rc == -EAGAIN ) { *err = rc; } else { *err = -EREMOTEIO; } // NOTE: parent is guaranteed to exist, since child is attached to it and is write-locked (so it can't be unlinked) fs_entry_wlock( parent ); fs_entry_undo_create( core, path, parent, child ); fs_entry_unlock( parent ); child = NULL; free( path ); free( parent_name ); return NULL; } } else { // opening... if( child == NULL ) { fs_entry_unlock( parent ); // can't open--child doesn't exist *err = -ENOENT; free( path ); return NULL; } // preserve these before unlocking, since we'll need them for the file handle parent_id = parent->file_id; parent_name = strdup( parent->name ); fs_entry_wlock( child ); fs_entry_unlock( parent ); // carry out the open rc = fs_entry_do_open( core, path, child, user, vol, flags ); if( rc != 0 ) { fs_entry_unlock( child ); SG_error("fs_entry_do_open(%s) rc = %d\n", path, rc ); *err = rc; free( path ); free( parent_name ); return NULL; } // if we're truncating, do so as well if( flags & O_TRUNC ) { rc = fs_entry_open_truncate( core, path, child, parent_id, parent_name ); if( rc != 0 ) { fs_entry_unlock( child ); SG_error("fs_entry_open_truncate(%s) rc = %d\n", path, rc ); *err = rc; free( path ); free( parent_name ); return NULL; } } } // success! child->atime = md_current_time_seconds(); // give back a file handle ret = fs_file_handle_create( core, child, path, parent_id, parent_name ); fs_file_handle_open( ret, flags, mode ); fs_entry_unlock( child ); free( path ); free( parent_name ); return ret; }
// carry out the create locally. // check permissions, initialize the child, and add it as a child of parent. // return the initialized child (which will NOT be locked) via *ret_child // return 0 on success // return -EACCES on permission failure // parent MUST be write locked int fs_entry_do_create( struct fs_core* core, char const* path, struct fs_entry* parent, struct fs_entry** ret_child, uint64_t user, uint64_t vol, mode_t mode ) { int rc = 0; struct fs_entry* child = NULL; if( !IS_WRITEABLE(parent->mode, parent->owner, parent->volume, user, vol) ) { // can't create return -EACCES; } else { struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ); // can create--initialize the child child = SG_CALLOC( struct fs_entry, 1 ); char* path_basename = md_basename( path, NULL ); rc = fs_entry_init_file( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 ); free( path_basename ); if( rc != 0 ) { SG_error("fs_entry_init_file(%s) rc = %d\n", path, rc ); fs_entry_destroy( child, false ); free( child ); return rc; } else { // mark it as created in this session child->created_in_session = true; // we're creating, so this manifest is initialized (to zero blocks) child->manifest->initialize_empty( child->version ); // run the driver int driver_rc = driver_create_file( core, core->closure, path, child ); if( driver_rc != 0 ) { SG_error("driver_create_file(%s) rc = %d\n", path, driver_rc ); fs_entry_destroy( child, false ); free( child ); return driver_rc; } // insert it into the filesystem fs_entry_wlock( child ); // open it child->open_count++; fs_entry_setup_working_data( core, child ); fs_entry_attach_lowlevel( core, parent, child ); fs_entry_unlock( child ); *ret_child = child; } } return 0; }
int main(int argc, char** argv) { md_debug(1); md_error(1); SG_debug("%s\n", "starting up debugging"); SG_error("%s\n", "starting up errors"); int c; char* config_file = (char*)CLIENT_DEFAULT_CONFIG; int portnum = 0; struct md_HTTP syndicate_http; char* username = NULL; char* password = NULL; char* volume_name = NULL; char* volume_secret = NULL; char* ms_url = NULL; int read_count = 1; static struct option syndicate_options[] = { {"config-file", required_argument, 0, 'c'}, {"volume-name", required_argument, 0, 'v'}, {"volume-secret", required_argument, 0, 's'}, {"username", required_argument, 0, 'u'}, {"password", required_argument, 0, 'p'}, {"port", required_argument, 0, 'P'}, {"MS", required_argument, 0, 'm'}, {"read-count", required_argument, 0, 'R'}, {0, 0, 0, 0} }; int opt_index = 0; while((c = getopt_long(argc, argv, "c:v:s:u:p:P:fm:R:", syndicate_options, &opt_index)) != -1) { switch( c ) { case 'R': { read_count = strtol(optarg, NULL, 10); break; } case 'v': { volume_name = optarg; break; } case 'c': { config_file = optarg; break; } case 's': { volume_secret = optarg; break; } case 'u': { username = optarg; break; } case 'p': { password = optarg; break; } case 'P': { portnum = strtol(optarg, NULL, 10); break; } case 'm': { ms_url = optarg; break; } default: { break; } } } int rc = syndicate_init( config_file, &syndicate_http, portnum, ms_url, volume_name, volume_secret, username, password ); if( rc != 0 ) exit(1); struct md_syndicate_conf* conf = syndicate_get_conf(); if( portnum == 0 ) portnum = conf->httpd_portnum; struct syndicate_state* state = syndicate_get_state(); // synchronous everything conf->default_write_freshness = 0; conf->default_read_freshness = 0; char file[PATH_MAX]; memset(file, 0, PATH_MAX); strcpy( file, READ_FILE ); struct fs_file_handle* fh = NULL; ssize_t nw = 0; ssize_t file_size = conf->blocking_factor * 100; char* buf = SG_CALLOC( char, file_size ); char fill = rand() % 26 + 'A'; memset( buf, fill, file_size ); struct timespec ts, ts2; DATA_BLOCK("open"); SG_BEGIN_TIMING_DATA( ts ); // create the file fh = fs_entry_open( state->core, file, NULL, conf->owner, conf->volume, O_SYNC | O_RDWR, 0666, &rc ); if( rc != 0 ) { SG_error("fs_entry_open(%s) rc = %d\n", file, rc ); exit(1); } SG_END_TIMING_DATA( ts, ts2, "open + MS revalidate + manifest refresh" ); DATA_BLOCK("remote read miss"); // mark the file as stale fs_entry_wlock( fh->fent ); fs_entry_mark_read_stale( fh->fent ); fs_entry_unlock( fh->fent ); char const* key = "remote read miss"; char const* hit = "remote read hit"; for( int i = 0; i < read_count; i++ ) { // read the file SG_BEGIN_TIMING_DATA( ts ); nw = fs_entry_read( state->core, fh, buf, file_size, 0 ); if( nw != file_size ) { SG_error("fs_entry_read(%s) rc = %ld\n", file, nw ); exit(1); } SG_END_TIMING_DATA( ts, ts2, key ); key = hit; char buf[100]; sprintf(buf, "remote read hit %d", i ); DATA_BLOCK(buf); } SG_BEGIN_TIMING_DATA( ts ); // close rc = fs_entry_close( state->core, fh ); if( rc != 0 ) { SG_error("fs_entry_close(%s) rc = %d\n", file, rc ); exit(1); } SG_END_TIMING_DATA( ts, ts2, "close" ); DATA_BLOCK(""); free( fh ); syndicate_destroy(); free( buf ); return 0; }