MarFS_Namespace* push_namespace(MarFS_Namespace* dummy, MarFS_Repo* repo) { if (! dummy) { LOG(LOG_ERR, "NULL namespace\n"); exit(1); } if (!IS_ROOT_NS(dummy) && !repo) { LOG(LOG_ERR, "NULL repo\n"); exit(1); } MarFS_Namespace* ns = (MarFS_Namespace*)malloc(sizeof(MarFS_Namespace)); if (! ns) { LOG(LOG_ERR, "alloc failed for '%s'\n", dummy->name); exit(1); } if (_ns_count >= _ns_max) { LOG(LOG_ERR, "No room for namespqace '%s'\n", dummy->name); exit(1); } LOG(LOG_INFO, "namespace: %-16s (repo: %s)\n", dummy->name, repo->name); *ns = *dummy; // use <repo> for everything. RangeList* ranges = (RangeList*) malloc(sizeof(RangeList)); *ranges = (RangeList) { .min = 0, .max = -1, .repo = repo }; ns->range_list = ranges; ns->iwrite_repo = repo; // these make it quicker to parse parts of the paths ns->name_len = strlen(ns->name); ns->mnt_path_len = strlen(ns->mnt_path); ns->md_path_len = strlen(ns->md_path); _ns[_ns_count++] = ns; return ns; } int validate_config(); // fwd-decl int read_config(const char* config_fname) { /// // config_fname is ignored, for now, but will eventually hold everythying /// if (! config_fname) /// config_fname = CONFIG_DEFAULT; /// /// MarFS_mnt_top = "/marfs"; /// MarFS_mnt_top_len = strlen(MarFS_mnt_top); _marfs_config.version_major = 0; _marfs_config.version_minor = 1; _marfs_config.mnt_top = "/marfs"; _marfs_config.mnt_top_len = strlen(_marfs_config.mnt_top); _marfs_config.name = "static"; _marfs_config.name_len = strlen(_marfs_config.name); // ........................................................................... // hard-coded repositories // // For sproxyd, repo.name must match an existing fast-cgi path // For S3, repo.name must match an existing bucket // // ........................................................................... _repo_max = 64; /* the number we're about to allocate */ _repo = (MarFS_Repo**) malloc(_repo_max * sizeof(MarFS_Repo*)); MarFS_Repo r_dummy; r_dummy = (MarFS_Repo) { .name = "proxy", // repo is sproxyd: this must match fastcgi-path .host = "10.135.0.%d:81", .host_offset = 30, .host_count = 4, .access_method = ACCESSMETHOD_SPROXYD, .chunk_size = (1024 * 1024 * 1028), /* max MarFS object (tune to match storage) */ .is_online = 1, .auth = AUTH_S3_AWS_MASTER, .compression = COMPRESS_NONE, .correction = CORRECT_NONE, .encryption = ENCRYPT_NONE, .latency_ms = (10 * 1000) }; push_repo(&r_dummy); // tiny, so detailed debugging (where we watch every char go over the line) // won't be overwhelming at the scale needed for Multi. r_dummy = (MarFS_Repo) { .name = "sproxyd_2k", // repo is sproxyd: this must match fastcgi-path .host = "10.135.0.21:81", .access_method = ACCESSMETHOD_SPROXYD, .chunk_size = (2048), /* i.e. max MarFS object (small for debugging) */ .is_online = 1, .auth = AUTH_S3_AWS_MASTER, .compression = COMPRESS_NONE, .correction = CORRECT_NONE, .encryption = ENCRYPT_NONE, .latency_ms = (10 * 1000), }; push_repo(&r_dummy); // For Brett, unit-testing, small enough to make it easy to create MULTIs r_dummy = (MarFS_Repo) { .name = "sproxyd_1M", // repo is sproxyd: this must match fastcgi-path .host = "10.135.0.22:81", .access_method = ACCESSMETHOD_SPROXYD, .chunk_size = (1024 * 1024 * 1), /* max MarFS object (tune to match storage) */ .is_online = 1, .auth = AUTH_S3_AWS_MASTER, .compression = COMPRESS_NONE, .correction = CORRECT_NONE, .encryption = ENCRYPT_NONE, .latency_ms = (10 * 1000) }; push_repo(&r_dummy); // @@@-HTTPS: For Brett, unit-testing, small enough to make it easy to create MULTIs r_dummy = (MarFS_Repo) { .name = "sproxyd_1M_https", // repo is sproxyd: this must match fastcgi-path .host = "10.135.0.22:444", .access_method = ACCESSMETHOD_SPROXYD, .chunk_size = (1024 * 1024 * 1), /* max MarFS object (tune to match storage) */ .is_online = 1, .auth = AUTH_S3_AWS_MASTER, .compression = COMPRESS_NONE, .correction = CORRECT_NONE, .encryption = ENCRYPT_NONE, .latency_ms = (10 * 1000) }; push_repo(&r_dummy); // S3 on EMC ECS r_dummy = (MarFS_Repo) { .name = "emc_s3", // repo is s3: this must match existing bucket .host = "10.140.0.15:9020", //"10.143.0.1:80", .access_method = ACCESSMETHOD_S3_EMC, .chunk_size = (1024 * 1024 * 256), /* max MarFS object (tune to match storage) */ .is_online = 1, .auth = AUTH_S3_AWS_MASTER, .compression = COMPRESS_NONE, .correction = CORRECT_NONE, .encryption = ENCRYPT_NONE, .latency_ms = (10 * 1000), }; push_repo(&r_dummy); #if TBD // semi-direct experiment r_dummy = (MarFS_Repo) { .name = "semi", .host = "/gpfs/marfs-gpfs/fuse/semi", //"10.143.0.1:443", .access_method = ACCESSMETHOD_SEMI_DIRECT, .chunk_size = (1024 * 1024 * 1), /* max MarFS object (tune to match storage) */ .is_online = 1, .auth = AUTH_NONE, .compression = COMPRESS_NONE, .correction = CORRECT_NONE, .encryption = ENCRYPT_NONE, .latency_ms = (10 * 1000), }; push_repo(&r_dummy); #endif // ........................................................................... // hard-coded namespaces // // For sproxyd, namespace.name must match an existing sproxyd driver-alias // For S3, namespace.name is just part of the object-id // // NOTE: Two namespaces should not have the same mount-suffix, because // Fuse will use this to look-up namespaces. Two NSes also // shouldn't have the same name, in case someone wants to lookup // by-name. // ........................................................................... _ns_max = 64; /* the number we're about to allocate */ _ns = (MarFS_Namespace**) malloc(_ns_max * sizeof(MarFS_Namespace*)); MarFS_Namespace ns_dummy; // Brett, unit ns_dummy = (MarFS_Namespace) { .name = "brettk", .mnt_path = "/brettk", // "<mnt_top>/brettk" comes here .md_path = "/gpfs/marfs-gpfs/brettk/mdfs", .fsinfo_path = "/gpfs/marfs-gpfs/brettk/fsinfo", /* a file */ .trash_md_path = "/gpfs/marfs-gpfs/trash", // NOT NEC IN THE SAME FILESET! .iperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .bperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = -1, /* no limit */ .quota_names = -1, /* no limit */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("sproxyd_1M")); // @@@-HTTPS: Brett, unit ns_dummy = (MarFS_Namespace) { .name = "brettk_https", .mnt_path = "/brettk_https", // "<mnt_top>/brettk_https" comes here .md_path = "/gpfs/marfs-gpfs/brettk_https/mdfs", .fsinfo_path = "/gpfs/marfs-gpfs/brettk_https/fsinfo", /* a file */ .trash_md_path = "/gpfs/marfs-gpfs/trash", // NOT NEC IN THE SAME FILESET! .iperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .bperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = -1, /* no limit */ .quota_names = -1, /* no limit */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("sproxyd_1M_https")); // jti testing ns_dummy = (MarFS_Namespace) { .name = "jti", .mnt_path = "/jti", // "<mnt_top>/jti" comes here .md_path = "/gpfs/marfs-gpfs/jti/mdfs", .fsinfo_path = "/gpfs/marfs-gpfs/jti/fsinfo", /* a file */ .trash_md_path = "/gpfs/marfs-gpfs/trash", // NOT NEC IN THE SAME FILESET! .iperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .bperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = (1024L * 1024 * 1024), /* 1 GB of data */ .quota_names = 32, /* 32 names */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("proxy")); // EMC ECS install (with S3) ns_dummy = (MarFS_Namespace) { .name = "s3", .mnt_path = "/s3", // "<mnt_top>/s3" comes here .md_path = "/gpfs/fs2/s3/mdfs", .fsinfo_path = "/gpfs/fs2/s3/fsinfo", /* a file */ .trash_md_path = "/gpfs/fs2/trash", // NOT NEC IN THE SAME FILESET! .iperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .bperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = (1024L * 1024 * 1024), /* 1GB of data */ .quota_names = 32, /* 32 names */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("emc_s3")); // jti testing on machine without GPFS ns_dummy = (MarFS_Namespace) { .name = "ext4", .mnt_path = "/ext4", // "<mnt_top>/ext4" comes here .md_path = "/non_gpfs/mdfs", .trash_md_path = "/non_gpfs/trash", .fsinfo_path = "/non_gpfs/fsinfo", .iperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .bperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = (1024L * 1024 * 1024), /* 1 GB of data */ .quota_names = 32, /* 32 names */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("proxy")); #ifdef TBD // jti testing semi-direct ns_dummy = (MarFS_Namespace) { .name = "semi", .mnt_path = "/semi", .md_path = "/gpfs/marfs-gpfs/semi/mdfs", .trash_md_path = "/gpfs/marfs-gpfs/semi/trash", .fsinfo_path = "/gpfs/marfs-gpfs/semi/fsinfo", .iperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .bperms = ( R_META | W_META | R_DATA | W_DATA | T_DATA | U_DATA ), .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = (1024L * 1024 * 1024), /* 1 GB of data */ .quota_names = 32, /* 32 names */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("semi")); #endif // "root" is a special path // // NOTE: find_namespace_by_path() will only return this namespace if its // <path> matches our <mnt_path> exactly. That's because our // mnt_path is (necessarily) a suffix of all paths. ns_dummy = (MarFS_Namespace) { .name = "root", .mnt_path = "/", .md_path = "should_never_be_used", .trash_md_path = "should_never_be_used", .fsinfo_path = "should_never_be_used", .iperms = ( R_META ), /* marfs_getattr() does manual stuff */ .bperms = 0, .dirty_pack_percent = 0, .dirty_pack_threshold = 75, .quota_space = -1, /* no limit */ .quota_names = -1, /* no limit */ .shard_path = NULL, .shard_count = 0, }; push_namespace(&ns_dummy, find_repo_by_name("sproxyd_1M")); return 0; /* success */ } // ........................................................................... // NAMESPACES // ........................................................................... // Find the namespace corresponding to the mnt_suffx in a Namespace struct, // which corresponds with a "namespace" managed by fuse. We might // pontentially have many namespaces (should be cheap to have as many as // you want), and this lookup is done for every fuse call (and in parallel // from pftool). Also done every time we parse an object-ID xattr! Thus, // this should eventually be made efficient. // // One way to make this fast would be to look through all the namespaces // and identify the places where a path diverges for different namespaces. // This becomes a series of hardcoded substring-ops, on the path. Each one // identifies the next suffix in a suffix tree. (Attractive Chaos has an // open source suffix-array impl). The leaves would be pointers to // Namespaces. // // NOTE: If the fuse mount-point is "/A/B", and you provide a path like // "/A/B/C", then the "path" seen by fuse callbacks is "/C". In // otherwords, we should never see MarFS_mnt_top, as part of the // incoming path. // // For a quick first-cut, there's only one namespace. Your path is either // in it or fails. MarFS_Namespace* find_namespace_by_name(const char* name) { int i; size_t name_len = strlen( name ); for (i=0; i<_ns_count; ++i) { MarFS_Namespace* ns = _ns[i]; // We want to compare the whole name to ns->name, not just the first // name length characters. That will match names that are substrings // of name to name incorrectly. // // if (! strncmp(ns->name, name, ns->name_len)) if (( ns->name_len == name_len ) && ( ! strcmp( ns->name, name ))) { return ns; } } return NULL; } /* * @@@-HTTPS: * The path that is passed into this function always starts with the * "/" character. That character and any others up to the next "/" * character are the namespace's mnt_path. A namespace's mnt_path * must begin with the "/" character and not contain any other "/" * characters after the initial one by definition. It is the FUSE * mount point and we'll always use a one-level mount point. */ MarFS_Namespace* find_namespace_by_mnt_path(const char* path) { int i; char *path_dup; char *path_dup_token; size_t path_dup_len; path_dup = strdup( path ); path_dup_token = strtok( path_dup, "/" ); path_dup_len = strlen( path_dup ); /* * At this point path_dup will include the leading "/" and any other * characters up to, but not including, the next "/" character in * path. This includes path_dup being able to be "/" (the root * namespace. */ for (i=0; i<_ns_count; ++i) { MarFS_Namespace* ns = _ns[i]; if (( ns->mnt_path_len == path_dup_len ) && ( !strcmp( ns->mnt_path, path_dup ))) { free( path_dup ); return ns; } } free( path_dup ); return NULL; } // Let others traverse namespaces, without knowing how they are stored NSIterator namespace_iterator() { return (NSIterator){ .pos = 0 }; } MarFS_Namespace* namespace_next(NSIterator* it) { if (it->pos >= _ns_count) return NULL; else return _ns[it->pos++]; } // ........................................................................... // REPOS // ........................................................................... MarFS_Repo* find_repo(MarFS_Namespace* ns, size_t file_size, int interactive_write) { // bool if (interactive_write) return ns->iwrite_repo; else return find_in_range(ns->range_list, file_size); } // later, _repo will be a B-tree, or something, associating repo-names with // repos. MarFS_Repo* find_repo_by_name(const char* repo_name) { int i; for (i=0; i<_repo_count; ++i) { MarFS_Repo* repo = _repo[i]; if (!strcmp(repo_name, repo->name)) return repo; } return NULL; } MarFS_Repo* find_repo_by_range (MarFS_Namespace* ns, size_t file_size) { RangeList* range_list; if (ns) { for (range_list=ns->range_list; range_list; range_list=range_list->next) { if ( (file_size >= range_list->min) && ((file_size <= range_list->max) || (range_list->max == -1))) { return range_list->repo; } } } return NULL; } // Let others traverse repos, without knowing how they are stored RepoIterator repo_iterator() { return (RepoIterator){ .pos = 0 }; } MarFS_Repo* repo_next(RepoIterator* it) { if (it->pos >= _repo_count) return NULL; else return _repo[it->pos++]; } // Give us a pointer to your list-pointer. Your list-pointer should start // out having a value of NULL. We maintain the list of repos ASCCENDING by // min file-size handled. Return false in case of conflicts. Conflicts // include overlapping ranges, or gaps in ranges. Call with <max>==-1, to // make range from <min> to infinity. int insert_in_range(RangeList** list, size_t min, size_t max, MarFS_Repo* repo) { RangeList** insert = list; // ptr to place to store ptr to new element // leave <ptr> pointing to the inserted element RangeList* this; for (this=*list; this; this=this->next) { if (min < this->min) { // insert before <this> if (max == -1) { LOG(LOG_ERR, "range [%ld, -1] includes range [%ld, %ld]\n", min, this->min, this->max); return -1; } if (max < this->min) { LOG(LOG_ERR, "gap between range [%ld, %ld] and [%ld, %ld]\n", min, max, this->min, this->max); return -1; } if (max > this->min) { LOG(LOG_ERR, "overlap in range [%ld, %ld] and [%ld, %ld]\n", min, max, this->min, this->max); return -1; } // do the insert break; } insert = &this->next; } RangeList* elt = (RangeList*)malloc(sizeof(RangeList)); elt->min = min; elt->max = max; elt->repo = repo; elt->next = *insert; *insert = elt; return 0; /* success */ } // given a file-size, find the corresponding element in a RangeList, and // return the corresponding repo. insert_range() maintains repos in // descending order of the block-sizes they handle, to make this as quick // as possible. MarFS_Repo* find_in_range(RangeList* list, size_t block_size) { while (list) { if (block_size >= list->min) return list->repo; list = list->next; } return NULL; }
void match_node_xyz(RegionVector &part_mesh, double tolerance, std::vector<INT> &global_node_map, std::vector<INT> &local_node_map) { // See if any omitted element blocks... bool has_omissions = false; for (auto & elem : part_mesh) { if (elem->get_property("block_omission_count").get_int() > 0) { has_omissions = true; break; } } if (!has_omissions) { for (size_t i=0; i < local_node_map.size(); i++) { local_node_map[i] = i; } } else { std::vector<INT> dummy; eliminate_omitted_nodes(part_mesh, dummy, local_node_map); // The local_node_map is not quite in the correct format after the // call to 'eliminate_omitted_nodes'. We need all non-omitted // nodes to have local_node_map[i] == i. for (size_t i=0; i < local_node_map.size(); i++) { if (local_node_map[i] >= 0) local_node_map[i] = i; } } size_t part_count = part_mesh.size(); enum {X=0, Y=1, Z=2}; for (size_t ip=0; ip < part_count; ip++) { vector3d i_max; vector3d i_min; std::vector<double> i_coord; Ioss::NodeBlock *inb = part_mesh[ip]->get_node_blocks()[0]; inb->get_field_data("mesh_model_coordinates", i_coord); find_range(i_coord, i_min, i_max); size_t i_offset = part_mesh[ip]->get_property("node_offset").get_int(); for (size_t jp=ip+1; jp < part_count; jp++) { vector3d j_max; vector3d j_min; std::vector<double> j_coord; Ioss::NodeBlock *jnb = part_mesh[jp]->get_node_blocks()[0]; jnb->get_field_data("mesh_model_coordinates", j_coord); find_range(j_coord, j_min, j_max); size_t j_offset = part_mesh[jp]->get_property("node_offset").get_int(); // See if the ranges overlap... vector3d max; vector3d min; max.x = std::min(i_max.x, j_max.x); max.y = std::min(i_max.y, j_max.y); max.z = std::min(i_max.z, j_max.z); min.x = std::max(i_min.x, j_min.x); min.y = std::max(i_min.y, j_min.y); min.z = std::max(i_min.z, j_min.z); double delta[3]; int XYZ = X; delta[XYZ] = max.x - min.x; delta[Y] = max.y - min.y; if (delta[Y] > delta[XYZ]) XYZ = Y; delta[Z] = max.z - min.z; if (delta[Z] > delta[XYZ]) XYZ = Z; double epsilon = (delta[X] + delta[Y] + delta[Z]) / 1.0e3; if (epsilon < 0.0) { std::cout << "Parts " << ip << " and " << jp << " do not overlap.\n"; continue; } min -= epsilon; max += epsilon; if (tolerance >= 0.0) epsilon = tolerance; std::vector<INT> j_inrange; std::vector<INT> i_inrange; find_in_range(j_coord, min, max, j_inrange); find_in_range(i_coord, min, max, i_inrange); // Index sort all nodes on the coordinate range with the maximum delta. index_coord_sort(i_coord, i_inrange, XYZ); index_coord_sort(j_coord, j_inrange, XYZ); if (i_inrange.size() < j_inrange.size()) { do_matching(i_inrange, i_coord, i_offset, j_inrange, j_coord, j_offset, epsilon, XYZ, local_node_map); } else { do_matching(j_inrange, j_coord, j_offset, i_inrange, i_coord, i_offset, epsilon, XYZ, local_node_map); } } } // Build the global and local maps... size_t j = 1; for (size_t i=0; i < local_node_map.size(); i++) { if (local_node_map[i] == (INT)i) { global_node_map.push_back(j); local_node_map[i] = j-1; j++; } else if (local_node_map[i] >= 0) { local_node_map[i] = local_node_map[local_node_map[i]]; } } }
void *mf_map(mf_handle_t mf, off_t offset, size_t size, mf_mapmem_handle_t *mapmem_handle) { int res_code = 0; if((mf == MF_OPEN_FAILED) || (mapmem_handle == NULL)) { errno = EINVAL; *mapmem_handle = MF_OPEN_FAILED; write_log_to_file(Error,"mf_map: invalid input!\n"); return NULL; } ch_pool_t* ch_pool = (ch_pool_t *)mf; sem_wait(&(ch_pool -> lock)); chunk_t** chunk = (chunk_t**)mapmem_handle; off_t file_size = ch_pool -> file_size; size_t chunk_size_min = ch_pool -> chunk_size_min; if((offset > file_size) || (offset < 0)) { *mapmem_handle = MF_OPEN_FAILED; write_log_to_file(Error,"mf_map: invalid input!\n"); sem_post(&(ch_pool -> lock)); return NULL; } if((offset + size > file_size) && (offset < file_size)) { size = file_size - offset; if(size % chunk_size_min != 0) { } } if(size == 0) { write_log_to_file(Error,"mf_map: size of mapping = 0!\n"); sem_post(&(ch_pool -> lock)); return NULL; } off_t index = offset / chunk_size_min; off_t length = 0; if((offset + size) % chunk_size_min != 0) { length = (offset + size)/chunk_size_min - index + 1; } else { length = (offset + size)/chunk_size_min - index; } if(ch_pool -> flag == 0) { *chunk = take_value_ptr(ch_pool -> h_table, index, length); } else { *chunk = find_in_range(ch_pool, offset, size); } if ((*chunk) == NULL) { int res_code = ch_init(index, length, ch_pool); if(res_code) { write_log_to_file(Error,"mf_map: initialization of chunk failed!\n"); *mapmem_handle = MF_MAP_FAILED; sem_post(&(ch_pool -> lock)); return NULL; } *chunk = take_value_ptr(ch_pool -> h_table, index, length); } void* ptr = NULL; ptr = ((*chunk) -> data) + offset - ((*chunk) -> index)*chunk_size_min; if(ptr == MF_MAP_FAILED) { write_log_to_file(Error,"mf_map: pointer to memory is NULL!\n"); sem_post(&(ch_pool -> lock)); return MF_MAP_FAILED; } sem_post(&(ch_pool -> lock)); return ptr; }