// this init's the library if it hasn't been done yet bool plfs_init() { static pthread_mutex_t confmutex = PTHREAD_MUTEX_INITIALIZER; static PlfsConf *pconf = NULL; bool ret = true; if ( ! pconf ) { // not yet initialized. Try to do so. pthread_mutex_lock(&confmutex); // who should initialize? if (pconf) { // someone beat us in race. they will initialize. ret = true; } else { // we won race. we need to initialize. LogMessage::init(); pconf = get_plfs_conf(); if ( !pconf ) { ret = false; // something failed } else { ret = plfs_warm_path_resolution(pconf); if ( !ret ) { mlog(MLOG_WARN, "Unable to warm path resolution\n"); } } } pthread_mutex_unlock(&confmutex); } return ret; }
plfs_filetype plfs_get_filetype(const char *path) { bool found = false; PlfsConf *pconf = get_plfs_conf(); PlfsMount *pmount = find_mount_point(pconf, path, found); return ((found && pmount) ? pmount->file_type : PFT_UNKNOWN); }
/** * ByteRangeIndex::index_add: add an index record to a writeable index * * @param cof the open file * @param nbytes number of bytes we wrote * @param offset the logical offset of the record * @param pid the pid doing the writing * @param physoffset the physical offset in the data dropping of the data * @param begin the start timestamp * @param end the end timestamp * @return PLFS_SUCCESS or error code */ plfs_error_t ByteRangeIndex::index_add(Container_OpenFile * /* cof */, size_t nbytes, off_t offset, pid_t pid, off_t physoffset, double begin, double end) { plfs_error_t ret = PLFS_SUCCESS; HostEntry newent; Util::MutexLock(&this->bri_mutex, __FUNCTION__); if (!this->isopen) { /* shouldn't be possible */ mlog(IDX_CRIT, "index_add: but not open?!"); ret = PLFS_EINVAL; goto done; } /* attempt to extend a prev entry, if allowed */ if (get_plfs_conf()->compress_contiguous && !this->writebuf.empty() && this->writebuf.back().id == pid && this->writebuf.back().logical_offset + (off_t)this->writebuf.back().length == offset) { /* extend! */ this->writebuf.back().end_timestamp = end; this->writebuf.back().length += nbytes; } else { /* add new entry! */ newent.logical_offset = offset; newent.physical_offset = physoffset; newent.length = nbytes; newent.begin_timestamp = begin; newent.end_timestamp = end; newent.id = pid; this->writebuf.push_back(newent); } this->write_count++; this->write_bytes += nbytes; this->eof_tracker = max(this->eof_tracker, offset + (off_t)nbytes); /* XXX: carried over hardwired 1024 from old code */ if ((this->write_count % 1024) == 0) { ret = this->flush_writebuf(); } done: Util::MutexUnlock(&this->bri_mutex, __FUNCTION__); return(ret); }
LogicalFileSystem * plfs_get_logical_fs(const char *path) { mlog(PLFS_DBG, "ENTER %s: %s\n", __FUNCTION__,path); bool found = false; PlfsConf *pconf = get_plfs_conf(); PlfsMount *pmount = find_mount_point(pconf, path, found); if (!found || pmount == NULL) { return NULL; } return pmount->fs_ptr; }
/* * plfs_attach: attach a filesystem. must protect pmnt iostore data * with a mutex. * * @param pmnt the mount to attach to * @return PLFS_SUCCESS if attached, PLFS_E* on error */ plfs_error_t plfs_attach(PlfsMount *pmnt) { static pthread_mutex_t attachmutex = PTHREAD_MUTEX_INITIALIZER; int lcv; plfs_error_t rv = PLFS_SUCCESS; pthread_mutex_lock(&attachmutex); if (pmnt->attached) /* lost race, ok since someone else attached */ goto done; { /* begin: special case code for global_summary_dir */ PlfsConf *pconf = get_plfs_conf(); if (pconf->global_summary_dir != NULL && pconf->global_sum_io.store == NULL) { /* * XXX: this results in the bpath to the dir going in * the global_sum_io.bmpoint string. */ if (plfs_iostore_factory(pmnt, &pconf->global_sum_io) != PLFS_SUCCESS) { mlog(INT_WARN, "global_summary_dir %s: failed to attach!", pconf->global_summary_dir); } else if (!Util::isDirectory(pconf->global_sum_io.bmpoint.c_str(), pconf->global_sum_io.store)) { /* but keep it configured in, in case operator fixes it */ mlog(INT_WARN, "global_summary_dir %s is not a directory!", pconf->global_summary_dir); } } } /* end: special case code for global_summary_dir */ { /* begin: special case code for statfs */ if (pmnt->statfs != NULL) { if (plfs_iostore_factory(pmnt, &pmnt->statfs_io) != PLFS_SUCCESS) { mlog(INT_WARN, "statfs %s: %s: failed to attach!", pmnt->mnt_pt.c_str(), (*pmnt->statfs).c_str()); } } } /* end: special case code for statfs */ /* be careful about partly attached mounts */ for (lcv = 0 ; lcv < pmnt->nback && rv == PLFS_SUCCESS ; lcv++) { if (pmnt->backends[lcv]->store != NULL) continue; /* this one already done, should be ok */ rv = plfs_iostore_factory(pmnt, pmnt->backends[lcv]); } if (rv == PLFS_SUCCESS) pmnt->attached = 1; done: pthread_mutex_unlock(&attachmutex); return(rv); }
/** * plfs_phys_backlookup: lookup a physical path's backend info. the * behavior of the search varies depending on bpathout. if bpathout * is NULL, then we expect phys to be a backspec from a metalink and * we look for an exact match on bmpoint. if bpathout is !NULL, then * we expect the phys to contain a full physical path with a prefix, * bmpoint, and bnode (so we need a front end match on bmpoint). * bpathout will be NULL for Metalinks, non-NULL for Index chunk_map. * * @param phys the physical path string (from index, metalink, etc...) * @param pmnt the logical mount to look in (if null: global search) * @param backout where we place the result * @param bpathout also put bpath here if !NULL * @return PLFS_SUCCESS on success, PLFS_E* on failure */ plfs_error_t plfs_phys_backlookup(const char *phys, PlfsMount *pmnt, struct plfs_backend **backout, string *bpathout) { const char *prefix; int prelen; plfs_error_t rv = PLFS_SUCCESS; const char *bpath; PlfsConf *pconf; map<string,PlfsMount *>::iterator itr; prefix = phys; /* parse, w/special common shorthand cases */ if (prefix[0] == '/' || strcmp(prefix, "posix:") == 0) { prelen = 0; if (*prefix == 'p') prefix = prefix + (sizeof("posix:") - 1); bpath = prefix; } else { bpath = strstr(prefix, "://"); if (bpath) bpath = strchr(bpath+(sizeof("://")-1), '/'); if (bpath == NULL) { mlog(CON_INFO, "plfs_phys_backlookup: bad phys %s", phys); return(PLFS_EINVAL); } prelen = bpath - prefix; } /* narrow the search if we can... */ if (pmnt) { rv = plfs_phys_backlookup_mnt(prefix, prelen, bpath, pmnt, backout, bpathout); return(rv); } /* no mount provided, do a global search */ pconf = get_plfs_conf(); if (!pconf) { mlog(CON_CRIT, "plfs_phys_backlookup: no config found"); return(PLFS_EINVAL); } for (itr = pconf->mnt_pts.begin() ; itr != pconf->mnt_pts.end() ; itr++) { rv = plfs_phys_backlookup_mnt(prefix, prelen, bpath, itr->second, backout, bpathout); if (rv == PLFS_SUCCESS) break; } return(rv); }
/** * find_best_mount_point: find the best matching mount point (e.g. * choose /mnt/a/b/c over /mnt/a because it is a longer match). * * @param cleanlogical a cleaned version of the logical path * @param mpp pointer to the mount we found * @param mntlen length of the mount point string * @return PLFS_SUCCESS or PLFS_E */ plfs_error_t find_best_mount_point(const char *cleanlogical, PlfsMount **mpp, int *mntlen) { /* * XXX: the old expandPath() used a static to cache the PlfsConf * (prob to avoid the mutex lock in get_plfs_conf()). we * replicate that here. */ static PlfsConf *pconf = get_plfs_conf(); map<string,PlfsMount *>::iterator itr; PlfsMount *mymount, *xtry; unsigned int hitlen; plfs_error_t rv; size_t xtrylen; mymount = NULL; hitlen = 0; if (pconf == NULL) goto done; for (hitlen = 0, itr = pconf->mnt_pts.begin(); itr != pconf->mnt_pts.end(); itr++) { xtry = itr->second; xtrylen = xtry->mnt_pt.length(); if (hitlen > xtrylen) /* already found better match */ continue; if (strncmp(cleanlogical, xtry->mnt_pt.c_str(), xtrylen) == 0 && (cleanlogical[xtrylen] == '\0' || cleanlogical[xtrylen] == '/')) { mymount = xtry; hitlen = xtrylen; } } done: if (mymount) { /* make sure it is attached ... */ if (mymount->attached == 0) { rv = plfs_attach(mymount); if (rv != PLFS_SUCCESS) return(rv); } *mpp = mymount; *mntlen = hitlen; return(PLFS_SUCCESS); } return(PLFS_ENOENT); }
int plfs_query(Plfs_fd *fd, size_t *writers, size_t *readers, size_t *bytes_written, int *lazy_stat) { debug_enter(__FUNCTION__,fd->getPath()); bool reopen; int ret = 0; assert( fd != NULL); ret = fd->query(writers, readers, bytes_written, &reopen); if (lazy_stat) { PlfsConf *pconf = get_plfs_conf(); *lazy_stat = pconf->lazy_stat && !reopen; mlog(MLOG_DBG, "plfs_query lazy_stat: %d.\n", *lazy_stat); } debug_exit(__FUNCTION__,fd->getPath(),ret); return ret; }
bool plfs_is_mnt_ancestor(const char *path){ // this might be the weird thing where user has path /mnt/plfs/file // and they are calling container_access(/mnt) // AND they are on a machine // without FUSE and therefore /mnt doesn't actually exist // calls to /mnt/plfs/file will be resolved by plfs because that is // a virtual PLFS path that PLFS knows how to resolve but /mnt is // not a virtual PLFS path. So the really correct thing to do // would be to return a semantic error like EDEVICE which means // cross-device error. But code team is a whiner who doesn't want // to write code. So the second best thing to do is to check /mnt // for whether it is a substring of any of our valid mount points PlfsConf *pconf = get_plfs_conf(); map<string,PlfsMount *>::iterator itr; bool match = true; for(itr=pconf->mnt_pts.begin(); itr!=pconf->mnt_pts.end(); itr++) { // ok, check to see if the request target matches a mount point // can't just do a substring bec maybe a mount point is /mnt // and they're asking for /m. So tokenize and compare tokens string this_mnt = itr->first; vector<string> mnt_tokens; vector<string> target_tokens; Util::tokenize(this_mnt,"/",mnt_tokens); Util::tokenize(path,"/",target_tokens); vector<string> token_itr; match = true; for(size_t i=0; i<target_tokens.size(); i++) { if (i>=mnt_tokens.size()) { break; // no good } mlog(INT_DCOMMON, "%s: compare %s and %s", __FUNCTION__,mnt_tokens[i].c_str(), target_tokens[i].c_str()); if (mnt_tokens[i]!=target_tokens[i]) { match = false; break; } } if (match){ return true; } } return false; }
// returns PLFS_SUCCESS or PLFS_E* plfs_error_t plfs_dump_config(int check_dirs, int make_dir) { PlfsConf *pconf = get_plfs_conf(); static IOStore *fakestore = NULL; int simple; if ( ! pconf ) { cerr << "FATAL no plfsrc file found.\n" << endl; return PLFS_ENOENT; } if ( pconf->err_msg ) { cerr << "FATAL conf file error: " << *(pconf->err_msg) << endl; return PLFS_EINVAL; } /* * if we make it here, we've parsed correctly. if we are checking * dirs, then we need to attach to backends. in order to check * the global_summary_dir (if enabled), we do a one-off attach * here first. we also need a fake iostore to check local posix * mount points (e.g. for FUSE, but it doesn't make sense for MPI * or library access XXX). */ if (check_dirs) { if (pconf->global_summary_dir) { map<string,PlfsMount *>::iterator itr; PlfsMount *pmnt; itr = pconf->mnt_pts.begin(); /* note: get_plfs_conf() ensures there is at least 1 mnt */ pmnt = itr->second; (void) plfs_attach(pmnt); /* ignore ret val */ } /* XXX: generate a fake POSIX iostore, we'll never free it */ if (fakestore == NULL) { char *pp, *bmp, spec[2]; int pl; map<string,PlfsMount *>::iterator itr; PlfsMount *pmnt; itr = pconf->mnt_pts.begin(); /* note: get_plfs_conf() ensures there is at least 1 mnt */ pmnt = itr->second; spec[0] = '/'; spec[1] = 0; plfs_iostore_get(spec, &pp, &pl, &bmp, pmnt, &fakestore); } } plfs_error_t ret = PLFS_SUCCESS; cout << "Config file " << pconf->file << " correctly parsed:" << endl << "Num Hostdirs: " << pconf->num_hostdirs << endl << "Threadpool size: " << pconf->threadpool_size << endl << "Write index buffer size (mbs): " << pconf->buffer_mbs << endl << "Read index buffer size (mbs): " << pconf->read_buffer_mbs << endl << "Num Mountpoints: " << pconf->mnt_pts.size() << endl << "Lazy Stat: " << pconf->lazy_stat << endl << "Lazy Droppings: " << pconf->lazy_droppings << endl << "Compress Contiguous: " << pconf->compress_contiguous << endl << "Test Metalink: " << pconf->test_metalink << endl; if (pconf->global_summary_dir) { cout << "Global summary dir: " << pconf->global_summary_dir << endl; if(check_dirs) { ret = plfs_check_dir("global_summary_dir", pconf->global_sum_io.prefix, pconf->global_sum_io.store, pconf->global_sum_io.bmpoint,ret,make_dir); } } map<string,PlfsMount *>::iterator itr; for(itr=pconf->mnt_pts.begin(); itr!=pconf->mnt_pts.end(); itr++) { PlfsMount *pmnt = itr->second; int check_dirs_now = check_dirs; cout << "Mount Point " << itr->first << " :" << endl; cout << "\tExpected Workload " << (pmnt->file_type == CONTAINER ? "shared_file (N-1)" : pmnt->file_type == FLAT_FILE ? "file_per_proc (N-N)" : pmnt->file_type == SMALL_FILE ? "small_file (1-N)" : "UNKNOWN. WTF. email [email protected]") << endl; if (check_dirs && plfs_attach(pmnt) != PLFS_SUCCESS) { cout << "\tUnable to attach to mount point, disable check_dirs" << endl; check_dirs_now = 0; } if(check_dirs_now && fakestore != NULL) { ret = plfs_check_dir("mount_point","", fakestore,itr->first,ret,make_dir); } simple = (pmnt->ncanback == pmnt->nback) && (pmnt->nshadowback == pmnt->nback); if (simple) { printf("\tBackends: total=%d (no restrictions)\n", pmnt->nback); } else { printf("\tBackends: canonical=%d, shadow=%d, total=%d\n", pmnt->ncanback, pmnt->nshadowback, pmnt->nback); } ret = print_backends(pmnt, simple, check_dirs_now, ret, make_dir); cout << "\tGlib buffer size (mbs): " << pmnt->glib_buffer_mbs << endl; if(pmnt->syncer_ip) { cout << "\tSyncer IP: " << pmnt->syncer_ip->c_str() << endl; } if(pmnt->statfs) { cout << "\tStatfs: " << pmnt->statfs->c_str() << endl; if(check_dirs_now && pmnt->statfs_io.store != NULL) { ret=plfs_check_dir("statfs",pmnt->statfs_io.prefix, pmnt->statfs_io.store, pmnt->statfs->c_str(),ret,make_dir); } } #ifdef USE_SMALLFILE if (pmnt->file_type == SMALL_FILE) { cout << "\tMax writers: " << pmnt->max_writers << endl; cout << "\tMax cached smallfile containers: " << pmnt->max_smallfile_containers << endl; } #endif cout << "\tChecksum: " << pmnt->checksum << endl; } return ret; }
/** * ByteRangeIndex::insert_entry: insert a single ContainerEntry in index * * @param idxout the index to insert the entry into * @param add the entry to add * @return PLFS_SUCCESS or error code */ plfs_error_t ByteRangeIndex::insert_entry(map<off_t,ContainerEntry> &idxout, off_t *eof_trk, off_t *bbytes, ContainerEntry *add) { pair<map<off_t,ContainerEntry>::iterator,bool> ret; /* for map insert */ bool overlap = false; map<off_t,ContainerEntry>::iterator next, prev; mlog(IDX_DAPI, "insert_entry: offset %ld into %p", add->logical_offset, &idxout); /* track metadata as we merge it in */ if (add->logical_offset + (off_t)add->length > *eof_trk) { *eof_trk = add->logical_offset + add->length; } *bbytes += add->length; /* * ret.first is either us, or a prev defined dup key. * ret.first can be idxout.begin() if it is the first entry (but * it cannot be idxout.end(), since end is never a valid entry). */ ret = idxout.insert(pair<off_t,ContainerEntry>(add->logical_offset, *add)); next = ret.first; next++; prev = ret.first; if (prev != idxout.begin()) { /* don't backup if already at begin */ prev--; } if (ret.second == false) { /* duplicate key! */ mlog(IDX_DAPI, "insert_entry: dup key %ld", add->logical_offset); overlap = true; } else { /* check neighbors for overlap */ if (next != idxout.end() && add->overlap(next->second)) { mlog(IDX_DAPI, "insert_entry: overlap next %ld -> %ld", add->logical_offset, next->second.logical_offset); overlap = true; } /* could be 'else if', but let's mlog if we overlap both ends */ if (ret.first != idxout.begin() && prev->second.overlap(*add)) { mlog(IDX_DAPI, "insert_entry: overlap prev %ld -> %ld", add->logical_offset, prev->second.logical_offset); overlap = true; } } /* * if we have an overlap we need to fix it now. */ if (overlap) { /* if new entry is zero length, we can just discard it now */ if (add->length == 0) { if (ret.second) { /* if it got inserted, remove it */ idxout.erase(ret.first); } } else { /* XXX: has a return value we ignore */ ByteRangeIndex::insert_overlapped(idxout, *add, ret); } } else if (get_plfs_conf()->compress_contiguous) { /* * if it abuts with the one before it, merge it in. */ if (ret.first != idxout.begin() && add->follows(prev->second)) { mlog(IDX_DAPI, "insert_entry: merge %ld", add->logical_offset); prev->second.length += add->length; idxout.erase(ret.first); } } return(PLFS_SUCCESS); }