예제 #1
0
// get the gateway coordinator of a file
uint64_t fs_entry_get_block_host( struct fs_core* core, char* fs_path, uint64_t block_id ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   if( fent->manifest == NULL ) {
      errorf("BUG: %" PRIX64 " (%s) not initialized\n", fent->file_id, fent->name );
      exit(1);
   }
   
   if( !fent->manifest->is_initialized() ) {
      fs_entry_unlock( fent );
      return -ENODATA;
   }
   
   uint64_t ret = fent->manifest->get_block_host( core, block_id );

   fs_entry_unlock( fent );
   return ret;
}
예제 #2
0
// statfs
int fs_entry_statfs( struct fs_core* core, char const* path, struct statvfs *statv, uint64_t user, uint64_t vol ) {
   // make sure this path refers to a path in the FS
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, path, user, vol, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   uint64_t num_files = ms_client_get_num_files( core->ms );

   // populate the statv struct
   statv->f_bsize = core->blocking_factor;
   statv->f_blocks = 0;
   statv->f_bfree = 0;
   statv->f_bavail = 0;
   statv->f_files = num_files;
   statv->f_ffree = 0;
   statv->f_fsid = SYNDICATEFS_MAGIC;
   statv->f_namemax = 256;    // might as well keep it limited to what ext2/ext3/ext4 can handle
   statv->f_frsize = 0;
   statv->f_flag = ST_NODEV | ST_NOSUID;
   
   fs_entry_unlock( fent );

   return 0;
}
예제 #3
0
// get the manifest mod time in its entirety
int fs_entry_get_manifest_mod_time( struct fs_core* core, char const* fs_path, struct timespec* t ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }
   
   if( fent->manifest == NULL ) {
      errorf("BUG: %" PRIX64 " (%s) not initialized\n", fent->file_id, fent->name );
      exit(1);
   }
   
   if( !fent->manifest->is_initialized() ) {
      fs_entry_unlock( fent );
      return -ENODATA;
   }

   fent->manifest->get_modtime( t );
   
   fs_entry_unlock( fent );
   return 0;
}
예제 #4
0
// access
int fs_entry_access( struct fs_core* core, char const* path, int mode, uint64_t user, uint64_t volume ) {
   // make sure this path exists
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   // F_OK implicitly satisfied

   if( (mode & R_OK) && !IS_READABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) {
      err = -EACCES;
   }
   else if( (mode & W_OK) && !IS_WRITEABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) {
      err = -EACCES;
   }
   else if( (mode & X_OK) && !IS_EXECUTABLE( fent->mode, fent->owner, fent->volume, user, volume ) ) {
      err = -EACCES;
   }

   fs_entry_unlock( fent );
   return err;
}
예제 #5
0
// get a file manifest as a string
char* fs_entry_get_manifest_str( struct fs_core* core, char* fs_path ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err );
   if( !fent || err ) {
      return NULL;
   }

   char* ret = fent->manifest->serialize_str();
   fs_entry_unlock( fent );

   return ret;
}
예제 #6
0
// is a file local?
bool fs_entry_is_local( struct fs_core* core, char const* path, uint64_t user, uint64_t volume, int* err ) {
   struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, err );
   if( !fent || *err ) {
      if( !*err )
         *err = -ENOMEM;

      return false;
   }

   bool rc = FS_ENTRY_LOCAL( core, fent );
   fs_entry_unlock( fent );
   return rc;
}
예제 #7
0
// detach a file from the filesystem
// Only remove a directory if it is empty.
int fs_entry_detach( struct fs_core* core, char const* path, uint64_t user, uint64_t vol ) {

   // resolve the parent of this child (and write-lock it)
   char* path_dirname = md_dirname( path, NULL );
   char* path_basename = md_basename( path, NULL );
   int err = 0;
   struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &err );
   free( path_dirname );

   if( parent == NULL ) {
      free( path_basename );
      return err;
   }
   if( parent->ftype != FTYPE_DIR ) {
      // not a directory
      fs_entry_unlock( parent );
      free( path_basename );
      return -ENOTDIR;
   }


   if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
      // directory not searchable
      fs_entry_unlock( parent );
      free( path_basename );
      return -EACCES;
   }
   
   // is parent writeable?
   if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
      // nope
      fs_entry_unlock( parent );
      free( path_basename );
      return -EACCES;
   }

   struct fs_entry* child = fs_entry_set_find_name( parent->children, path_basename );
   free( path_basename );

   if( child == NULL ) {
      // doesn't exist
      fs_entry_unlock( parent );
      return -ENOENT;
   }

   int rc = fs_entry_detach_lowlevel( core, parent, child );

   fs_entry_unlock( parent );

   return rc;
}
예제 #8
0
// get a file manifest as a serialized protobuf
ssize_t fs_entry_serialize_manifest( struct fs_core* core, char* fs_path, char** manifest_bits, bool sign ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err );
   if( !fent || err ) {
      *manifest_bits = NULL;
      return err;
   }

   ssize_t ret = fs_entry_serialize_manifest( core, fent, manifest_bits, sign );

   fs_entry_unlock( fent );

   return ret;
}
예제 #9
0
// get the in-memory version of a file
int64_t fs_entry_get_version( struct fs_core* core, char const* fs_path ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   int64_t ret = fent->version;

   fs_entry_unlock( fent );
   return ret;
}
예제 #10
0
// set the mod time (at the nanosecond resolution)
int fs_entry_set_mod_time( struct fs_core* core, char const* fs_path, struct timespec* t ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, true, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   fent->mtime_sec = t->tv_sec;
   fent->mtime_nsec = t->tv_nsec;

   fs_entry_unlock( fent );
   return 0;
}
예제 #11
0
// get the actual creation time
// get the mod time in its entirety
int fs_entry_get_creation_time( struct fs_core* core, char const* fs_path, struct timespec* t ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, fs_path, SYS_USER, 0, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   t->tv_sec = fent->ctime_sec;
   t->tv_nsec = fent->ctime_nsec;

   fs_entry_unlock( fent );
   return 0;
}
예제 #12
0
// is this local?  That is, is the block hosted here?
bool fs_entry_is_block_local( struct fs_core* core, char const* path, uint64_t user, uint64_t volume, uint64_t block_id ) {
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   bool rc = fent->manifest->is_block_local( core, block_id );

   fs_entry_unlock( fent );

   return rc;
}
예제 #13
0
// attach a file to the filesystem (same as link())
// THIS METHOD ONLY UPDATES THE METADATA; IT DOES NOT TOUCH STABLE STORAGE
int fs_entry_attach( struct fs_core* core, struct fs_entry* fent, char const* path, uint64_t user, uint64_t vol ) {
   // sanity check: path's basename should be fent's name
   char* path_base = md_basename( path, NULL );
   if( strcmp( fent->name, path_base ) != 0 ) {
      free(path_base);
      return -EINVAL;      // invalid entry
   }
   free( path_base );

   int err = 0;
   char* dirname = md_dirname( path, NULL );
   struct fs_entry* parent = fs_entry_resolve_path( core, dirname, user, vol, true, &err );
   free( dirname );
   if( parent == NULL ) {
      return err;
   }

   err = 0;

   fs_entry_wlock( fent );

   if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
      // directory not searchable
      fs_entry_unlock( fent );
      fs_entry_unlock( parent );
      return -EACCES;
   }
   if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
      // not writable--cannot insert
      fs_entry_unlock( fent );
      fs_entry_unlock( parent );
      return -EACCES;
   }
   if( fs_entry_set_find_name( parent->children, fent->name ) == NULL ) {
      fs_entry_attach_lowlevel( core, parent, fent );
   }
   else {
      err = -EEXIST;
   }

   fs_entry_unlock( fent );

   fs_entry_unlock( parent );

   return err;
}
예제 #14
0
// get the parent and child nodes on create/open, checking permissions along the way
// write-lock the parent.
// do NOT touch the child
// if the child is not found, *child will be set to NULL
// return 0 on success
// return -ENOTDIR if a directory along the path wasn't a directory
// return -EACCES on permission error
int fs_entry_open_parent_and_child( struct fs_core* core, char const* path, uint64_t user, uint64_t vol, struct fs_entry** ret_parent, struct fs_entry** ret_child ) {

    // resolve the parent of this child (and write-lock it)
    int rc = 0;
    char* path_dirname = md_dirname( path, NULL );
    char* path_basename = md_basename( path, NULL );

    struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &rc );

    if( parent == NULL ) {

        free( path_basename );
        free( path_dirname );

        return rc;
    }

    free( path_dirname );

    if( parent->ftype != FTYPE_DIR ) {
        // parent is not a directory
        fs_entry_unlock( parent );
        free( path_basename );

        return -ENOTDIR;
    }

    // can parent be searched?
    if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
        // nope
        fs_entry_unlock( parent );
        free( path_basename );

        return -EACCES;
    }

    // resolve the child
    struct fs_entry* child = fs_entry_set_find_name( parent->children, path_basename );

    free( path_basename );

    *ret_parent = parent;
    *ret_child = child;

    return 0;
}
예제 #15
0
// stat
int fs_entry_stat_extended( struct fs_core* core, char const* path, struct stat* sb, bool* is_local, int64_t* version, uint64_t* coordinator_id, uint64_t user, uint64_t volume, bool revalidate ) {

   int rc = 0;
   
   if( revalidate ) {
      // revalidate
      rc = fs_entry_revalidate_path( core, volume, path );
      if( rc != 0 ) {
         errorf("fs_entry_revalidate_path(%s) rc = %d\n", path, rc );
         return rc;
      }
   }
   
   int err = 0;
   struct fs_entry* fent = fs_entry_resolve_path( core, path, user, volume, false, &err );
   if( !fent || err ) {
      if( !err )
         err = -ENOMEM;

      return err;
   }

   memset( sb, 0, sizeof(struct stat) );

   // have entry read-locked
   fs_entry_do_stat( core, fent, sb );

   if( is_local ) {
      *is_local = FS_ENTRY_LOCAL( core, fent );
   }
   
   if( version ) {
      *version = fent->version;
   }
   
   if( coordinator_id ) {
      *coordinator_id = fent->coordinator;
   }
   
   fs_entry_unlock( fent );

   return 0;
}
예제 #16
0
// revalidate on open (not create)
int fs_entry_open_revalidate( struct fs_core* core, char const* path, uint64_t user, uint64_t vol ) {

    int rc = 0;
    struct fs_entry* fent = NULL;

    // see that the entry still exists
    rc = fs_entry_revalidate_path( core, path );
    if( rc != 0 ) {
        SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, rc );

        return rc;
    }

    // find the entry
    fent = fs_entry_resolve_path( core, path, user, vol, true, &rc );
    if( fent == NULL || rc != 0 ) {

        SG_error("fs_entry_resolve_path(%s) rc = %d\n", path, rc );
        return rc;
    }

    // temporarily mark this entry as referenced, so it won't be unlinked while we revalidate
    fent->link_count++;

    fs_entry_unlock( fent );

    // revalidate the entry's path and manifest
    rc = fs_entry_revalidate_metadata( core, path, fent, NULL );

    fs_entry_wlock( fent );

    fent->link_count--;

    fs_entry_unlock( fent );

    if( rc != 0 ) {
        SG_error("fs_entry_revalidate_metadata(%s) rc = %d\n", path, rc );
    }

    return rc;
}
예제 #17
0
// remove a directory, if it is empty
int fs_entry_rmdir( struct fs_core* core, char const* path, uint64_t user, uint64_t volume ) {

   if( core->gateway == GATEWAY_ANON ) {
      errorf("%s", "Removing directories is forbidden for anonymous gateways\n");
      return -EPERM;
   }
   
   // get some info about this directory first
   int rc = 0;
   
   char* fpath = strdup( path );
   md_sanitize_path( fpath );
   
   // revalidate this path
   rc = fs_entry_revalidate_path( core, volume, fpath );
   if( rc != 0 && rc != -ENOENT ) {
      // consistency cannot be guaranteed
      errorf("fs_entry_revalidate_path(%s) rc = %d\n", fpath, rc );
      free( fpath );
      return rc;
   }
   
   free( fpath );

   int err = 0;
   struct fs_entry* dent = fs_entry_resolve_path( core, path, user, volume, false, &err );
   if( !dent || err ) {
      return err;
   }

   if( dent->ftype != FTYPE_DIR ) {
      fs_entry_unlock( dent );
      return -ENOTDIR;
   }

   char* path_dirname = md_dirname( path, NULL );

   struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, volume, true, &err );

   free( path_dirname );

   if( !parent || err ) {
      fs_entry_unlock( dent );

      return err;
   }

   // IS THE PARENT EMPTY?
   if( fs_entry_set_count( dent->children ) > 2 ) {
      // nope
      fs_entry_unlock( dent );
      fs_entry_unlock( parent );

      return -ENOTEMPTY;
   }


   struct md_entry ent;
   fs_entry_to_md_entry( core, &ent, dent, parent->file_id, parent->name );

   // tell the MS that this directory should go away
   rc = ms_client_delete( core->ms, &ent );
   md_entry_free( &ent );

   if( rc != 0 ) {
      errorf( "ms_client_delete(%s) rc = %d\n", path, rc );
      rc = -EREMOTEIO;

      fs_entry_unlock( dent );
      fs_entry_unlock( parent );
   }
   else {

      fs_entry_unlock( dent );

      // detach from the filesystem 
      rc = fs_entry_detach_lowlevel( core, parent, dent );
      if( rc != 0 ) {
         errorf("fs_entry_detach_lowlevel(%s) rc = %d\n", path, rc );
      }
      
      fs_entry_unlock( parent );
   }
   
   return rc;
}
예제 #18
0
// unlink a file from the filesystem
// pass -1 if the version is not known, or pass the known version to be unlinked
// return -EUCLEAN if we failed to garbage-collect, but needed to (i.e. a manifest was missing)
// return -EREMOTEIO for failure to revalidate metadata 
// return -ESTALE if the given information is out of date
int fs_entry_versioned_unlink( struct fs_core* core, char const* path, uint64_t file_id, uint64_t coordinator_id, int64_t known_version, uint64_t owner, uint64_t volume, uint64_t gateway_id, 
                               bool check_file_id_and_coordinator_id ) {
   
   // can't modify state if anonymous
   if( core->gateway == SG_GATEWAY_ANON ) {
      SG_error("%s", "Writing is forbidden for anonymous gateways\n");
      return -EPERM;
   }
   
   // get some info about this file first
   int rc = 0;
   int err = 0;
   bool no_manifest = false;
   
   // consistency check
   err = fs_entry_revalidate_path( core, path );
   if( err != 0 ) {
      SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, err );
      
      if( err == -ENOENT )
         return -ENOENT;
      
      return -EREMOTEIO;
   }
   
   // look up the parent
   char* path_dirname = md_dirname( path, NULL );
   char* path_basename = md_basename( path, NULL );
   
   struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, owner, volume, true, &err );

   free( path_dirname );

   if( !parent || err ) {

      free( path_basename );
      return err;
   }
   
   if( parent->ftype != FTYPE_DIR ) {
      fs_entry_unlock( parent );
      free( path_basename );
      return err;
   }
   
   // get the child
   struct fs_entry* fent = fs_entry_set_find_name( parent->children, path_basename );
   
   free( path_basename );
   
   if( fent == NULL ) {
      fs_entry_unlock( fent );
      fs_entry_unlock( parent );
      return -ENOENT;
   }
   
   fs_entry_wlock( fent );
   
   bool local = FS_ENTRY_LOCAL( core, fent );
   int64_t version = fent->version;
   
   if( check_file_id_and_coordinator_id ) {
      if( fent->file_id != file_id ) {
         SG_error("Remote unlink to file %s ID %" PRIX64 ", expected %" PRIX64 "\n", path, file_id, fent->file_id );
         fs_entry_unlock( fent );
         fs_entry_unlock( parent );
         return -ESTALE;
      }
      
      if( fent->coordinator != coordinator_id ) {
         SG_error("Remote unlink to file %s coordinator %" PRIu64 ", expected %" PRIu64 "\n", path, coordinator_id, fent->coordinator );
         fs_entry_unlock( fent );
         fs_entry_unlock( parent );
         return -ESTALE;
      }
   }
   
   if( known_version > 0 && fent->version > 0 && fent->version != known_version ) {
      SG_error("Remote unlink to file %s version %" PRId64 ", expected %" PRId64 "\n", path, known_version, fent->version );
      fs_entry_unlock( fent );
      fs_entry_unlock( parent );
      return -ESTALE;
   }
   
   // make sure the manifest is fresh, so we delete every block
   // only need to worry about this if file has > 0 size
   if( fent->size > 0 ) {
      
      // try to get it
      err = fs_entry_revalidate_manifest( core, path, fent );
            
      if( err != 0 ) {
         SG_error( "fs_entry_revalidate_manifest(%s) rc = %d\n", path, err );
         
         if( err == -ENOENT ) {
            // continue without a manifest 
            no_manifest = true;
            SG_error("WARN: no manifest found for %s %" PRIX64 ".  Assuming data is already vacuumed.\n", path, fent->file_id );
         }
         else {
            // some other problem
            fs_entry_unlock( fent );
            fs_entry_unlock( parent );
            return err;
         }
      }
   }
   
   // tell the driver we're deleting 
   int driver_rc = driver_delete_file( core, core->closure, path, fent );
   if( driver_rc != 0 ) {
      SG_error("driver_delete_file(%s %" PRIX64 ") rc = %d\n", path, fent->file_id, driver_rc );
      
      fs_entry_unlock( fent );
      fs_entry_unlock( parent );
      return driver_rc;
   }
   
   rc = 0;
   
   if( !local ) {
      // this is someone else's file; tell them to unlink
      Serialization::WriteMsg* detach_request = new Serialization::WriteMsg();

      fs_entry_init_write_message( detach_request, core, Serialization::WriteMsg::DETACH );
      
      fs_entry_prepare_detach_message( detach_request, path, fent, version );

      Serialization::WriteMsg* detach_ack = new Serialization::WriteMsg();
      
      // send the write message, or become the coordinator
      rc = fs_entry_send_write_or_coordinate( core, path, fent, detach_request, detach_ack );
      
      if( rc < 0 ) {
         SG_error( "fs_entry_send_write_or_coordinate(%s) rc = %d\n", path, rc );
      }
      else if( rc == 0 ) {
         // successfully sent
         if( detach_ack->type() != Serialization::WriteMsg::ACCEPTED ) {
            if( detach_ack->type() == Serialization::WriteMsg::ERROR ) {
               // could not detach on the remote end
               SG_error( "remote unlink error = %d (%s)\n", detach_ack->errorcode(), detach_ack->errortxt().c_str() );
               rc = detach_ack->errorcode();
            }
            else {
               // unknown message
               SG_error( "remote unlink invalid message %d\n", detach_ack->type() );
               rc = -EIO;
            }
         }
      }
      else {
         // we're now the coordinator.          
         local = true;
      }

      delete detach_ack;
      delete detach_request;
   }

   if( local ) {
      // we're responsible for this file
      // mark the file as deleted, so it won't show up again in any listing 
      fent->deletion_in_progress = true;
      
      // safe to unlock parent--it won't be empty (in a rmdir-able sense) until fent is fully garbage-collected, but fent won't be listed either
      fs_entry_unlock( parent );
      
      // garbage-collect, then unlink on the MS.  Loop this until we succeed in unlinking on the MS (which can only happen 
      // once all of fent's data has been garbage-collected).
      while( true ) {
         
         if( !no_manifest ) {
            // if we got the latest manifest, garbage-collect all writes on the file 
            rc = fs_entry_vacuumer_file( core, path, fent );
            
            if( rc != 0 ) {
               SG_error("fs_entry_vacuumer_vacuum_file( %s %" PRIX64 " ) rc = %d\n", path, fent->file_id, rc );
               
               // failed to garbage-collect...need to un-delete fent
               fent->deletion_in_progress = false;
               fs_entry_unlock( fent );
               
               return -EREMOTEIO;
            }
         }
         
         // tell the metadata server we just unlinked
         // preserve the entry information so we can issue a deletion
         struct md_entry ent;
         fs_entry_to_md_entry( core, &ent, fent, parent->file_id, parent->name );

         rc = ms_client_delete( core->ms, &ent );
         md_entry_free( &ent );
            
         if( rc != 0 ) {
            SG_error( "ms_client_delete(%s) rc = %d\n", path, rc );
            
            if( rc == -EAGAIN ) {
               if( !no_manifest ) {
                  // try vacuuming again--some write got added in between our garbage-collection and our unlink request
                  rc = 0;
                  continue;
               }
               else {
                  // there are un-garbage-collected writes, but we have no manifest, so we can't vacuum in order to proceed with the delete.
                  SG_error("MEMORY LEAK DETECTED: No manifest for %" PRIX64 " available; unable to vacuum!\n", fent->file_id );
                  
                  // failed to garbage-collect...need to un-delete fent
                  fent->deletion_in_progress = false;
                  fs_entry_unlock( fent );
                  
                  return -EUCLEAN;
               }
            }
            else {
               
               // something more serious 
               rc = -EREMOTEIO;
               
               fent->deletion_in_progress = false;
               fs_entry_unlock( fent );
               return rc;
            }
         }
         else {
            // success!
            break;
         }
      }
      
      // re-lock the parent--it's guaranteed to exist, since it's not empty 
      fs_entry_wlock( parent );
      
      // unlock fent--we're done with it 
      fs_entry_unlock( fent );
      
      // detatch fent from parent
      rc = fs_entry_detach_lowlevel( core, parent, fent );
      if( rc != 0 ) {
         SG_error("fs_entry_detach_lowlevel(%" PRIX64 ") rc = %d\n", fent->file_id, rc );
         fs_entry_unlock( parent );
         
         return rc;
      }
      
      fs_entry_unlock( parent );
   }
   
   return rc;
}
예제 #19
0
// make a node (regular files only at this time)
int fs_entry_mknod( struct fs_core* core, char const* path, mode_t mode, dev_t dev, uint64_t user, uint64_t vol ) {
    // only regular files at this time...
    if( ! ( S_ISREG( mode ) || S_ISFIFO( mode ) ) ) {
        return -ENOTSUP;
    }

    // revalidate this path
    int rc = fs_entry_revalidate_path( core, path );
    if( rc != 0 && rc != -ENOENT ) {
        // consistency cannot be guaranteed
        SG_error("fs_entry_revalidate_path(%s) rc = %d\n", path, rc );
        return rc;
    }

    int err = 0;

    // get the parent directory and lock it
    char* path_dirname = md_dirname( path, NULL );
    struct fs_entry* parent = fs_entry_resolve_path( core, path_dirname, user, vol, true, &err );
    free( path_dirname );

    if( !IS_DIR_READABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
        // not searchable
        fs_entry_unlock( parent );
        return -EACCES;
    }

    if( !IS_WRITEABLE( parent->mode, parent->owner, parent->volume, user, vol ) ) {
        // not writeable
        fs_entry_unlock( parent );
        return -EACCES;
    }

    uint64_t parent_id = parent->file_id;
    char* parent_name = strdup( parent->name );

    char* path_basename = md_basename( path, NULL );

    // make sure it doesn't exist already (or isn't in the process of being deleted, since we might have to re-create it if deleting it fails)
    if( fs_entry_set_find_name( parent->children, path_basename ) != NULL ) {
        free( path_basename );
        fs_entry_unlock( parent );
        free( parent_name );
        return -EEXIST;
    }

    struct fs_entry* child = (struct fs_entry*)calloc( sizeof(struct fs_entry), 1 );

    struct timespec ts;
    clock_gettime( CLOCK_REALTIME, &ts );
    int mmode = 0;
    if (S_ISFIFO(mode)) {
        mmode = ( mode & 0777 ) | S_IFIFO;
        err = fs_entry_init_fifo( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mmode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 );
    }
    if (S_ISREG(mode)) {
        mmode = ( mode & 0777 );
        err = fs_entry_init_file( core, child, path_basename, 0, fs_entry_next_file_version(), user, core->gateway, vol, mmode, 0, ts.tv_sec, ts.tv_nsec, 0, 0 );
    }

    if( err == 0 ) {

        // mark it as created in this session
        child->created_in_session = true;

        // we're creating, so this manifest is initialized (to zero blocks)
        child->manifest->initialize_empty( child->version );

        fs_entry_wlock( child );

        // call the driver
        err = driver_create_file( core, core->closure, path, child );

        if( err != 0 ) {
            // undo
            SG_error("driver_create_file(%s) rc = %d\n", path, err );

            child->open_count = 0;

            fs_entry_unlock( child );
            fs_entry_destroy( child, false );
            free( child );
        }

        else {

            // attach the file
            fs_entry_attach_lowlevel( core, parent, child );

            struct md_entry data;
            fs_entry_to_md_entry( core, &data, child, parent_id, parent_name );

            // create the child on the MS, obtaining its file ID and write nonce
            err = ms_client_create( core->ms, &child->file_id, &child->write_nonce, &data );

            md_entry_free( &data );

            if( err != 0 ) {
                SG_error( "ms_client_create(%s) rc = %d\n", path, err );
                err = -EREMOTEIO;

                child->open_count = 0;
                fs_entry_unlock( child );
                fs_entry_detach_lowlevel( core, parent, child );
                free( child );
            }
            else {
                fs_entry_unlock( child );
            }
        }
    }

    fs_entry_unlock( parent );

    free( parent_name );
    free( path_basename );

    return err;
}