void unity_global::save_model(std::shared_ptr<model_base> model, const std::string& model_wrapper, const std::string& url) { logstream(LOG_INFO) << "Save model to " << sanitize_url(url) << std::endl; logstream(LOG_INFO) << "Model name: " << model->name() << std::endl; try { dir_archive dir; dir.open_directory_for_write(url); dir.set_metadata("contents", "model"); oarchive oarc(dir); oarc.write(CLASS_MAGIC_HEADER, strlen(CLASS_MAGIC_HEADER)); oarc << model->name(); oarc << model_wrapper; oarc << *model; if (dir.get_output_stream()->fail()) { std::string message = "Fail to write."; log_and_throw_io_failure(message); } dir.close(); } catch (std::ios_base::failure& e) { std::string message = "Unable to save model to " + sanitize_url(url) + ": " + e.what(); log_and_throw_io_failure(message); } catch (std::string& e) { log_and_throw(std::string("Unable to save model to ") + sanitize_url(url) + ": " + e); } catch (...) { log_and_throw(std::string("Unknown Error: Unable to save model to ") + sanitize_url(url)); } }
s3_device::s3_device(const std::string& filename, const bool write) { m_filename = filename; // split out the access key and secret key webstor::s3url url; webstor::parse_s3url(filename, url); m_s3fs = std::make_shared<dmlc::io::S3FileSystem>(); m_s3fs->SetCredentials(url.access_key_id, url.secret_key); std::string url_without_credentials; if (url.endpoint.empty()) { url_without_credentials = "s3://" + url.bucket + "/" + url.object_name; } else { url_without_credentials = "s3://" + url.endpoint + "/" + url.bucket + "/" + url.object_name; } auto uri = dmlc::io::URI(url_without_credentials.c_str()); if (write) { m_write_stream.reset(m_s3fs->Open(uri, "w")); } else { try { auto pathinfo = m_s3fs->GetPathInfo(uri); m_filesize = pathinfo.size; if (pathinfo.type != dmlc::io::kFile) { log_and_throw("Cannot open " + sanitize_url(filename)); } m_read_stream.reset(m_s3fs->OpenForRead(uri)); } catch (...) { log_and_throw("Cannot open " + sanitize_url(filename)); } } }
std::string unity_global::__read__(const std::string& url) { general_ifstream fin(url); if (!fin.good()) { fin.close(); log_and_throw_io_failure(std::string("Cannot open " + sanitize_url(url))); } std::stringstream ss; char* buf = new char[4096]; while(fin.good()) { fin.read(buf, 4096); size_t bytes_read = fin.gcount(); ss.write(buf, bytes_read); } delete[] buf; if (!fin.eof()) { fin.close(); log_and_throw_io_failure(std::string("Read fail " + sanitize_url(url))); } fin.close(); return ss.str(); }
bool unity_sgraph::load_graph(std::string target_dir) { log_func_entry(); try { dir_archive dir; dir.open_directory_for_read(target_dir); std::string contents; if (dir.get_metadata("contents", contents) == false || contents != "graph") { log_and_throw(std::string("Archive does not contain a graph.")); } iarchive iarc(dir); load(iarc); dir.close(); } catch (std::ios_base::failure& e) { std::string message = "Unable to load graph from " + sanitize_url(target_dir) + ": " + e.what(); log_and_throw_io_failure(message); } catch (std::string& e) { std::string message = "Unable to load graph from " + sanitize_url(target_dir) + ": " + e; log_and_throw(message); } catch (...) { std::string message = "Unable to load graph from " + sanitize_url(target_dir) + ": Unknown Error."; log_and_throw(message); } return true; }
bool unity_sgraph::save_graph(std::string target, std::string format) { log_func_entry(); try { if (format == "binary") { dir_archive dir; dir.open_directory_for_write(target); dir.set_metadata("contents", "graph"); oarchive oarc(dir); if (dir.get_output_stream()->fail()) { log_and_throw_io_failure("Fail to write"); } save(oarc); dir.close(); } else if (format == "json") { save_sgraph_to_json(get_graph(), target); } else if (format == "csv") { save_sgraph_to_csv(get_graph(), target); } else { log_and_throw("Unable to save to format : " + format); } } catch (std::ios_base::failure& e) { std::string message = "Unable to save graph to " + sanitize_url(target) + ": " + e.what(); log_and_throw_io_failure(message); } catch (std::string& e) { std::string message = "Unable to save graph to " + sanitize_url(target) + ": " + e; log_and_throw(message); } catch (...) { std::string message = "Unable to save graph to " + sanitize_url(target) + ": Unknown Error."; log_and_throw(message); } return true; }
general_ofstream::general_ofstream(std::string filename) try :general_ofstream_base(filename), opened_filename(filename) { } catch (const std::exception& e) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for write. " + e.what()); } catch (std::string e) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for write. " + e); } catch (...) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename)); }
general_ifstream::general_ifstream(std::string filename, bool gzip_compressed) try :general_ifstream_base(filename, gzip_compressed), opened_filename(filename) { } catch (const std::exception& e) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e.what()); } catch (std::string e) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e); } catch (...) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename)); }
general_ifstream::general_ifstream(std::string filename) // this is the function try block syntax which, together with the member // function pointer syntax, is probably the ugliest C++ syntactic element // every conceieved. try :general_ifstream_base(filename), opened_filename(filename) { } catch (const std::exception& e) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e.what()); } catch (std::string e) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e); } catch (...) { log_and_throw_io_failure("Cannot open " + sanitize_url(filename)); }
std::shared_ptr<file_ownership_handle> file_handle_pool::register_file(const std::string& file_name) { std::lock_guard<std::mutex> guard(this->m_mutex); std::shared_ptr<file_ownership_handle> ret = get_file_handle(file_name); if (!ret) { logstream(LOG_DEBUG) << "register_file_handle for file " << sanitize_url(file_name) << std::endl; ret = std::make_shared<file_ownership_handle>(file_name, boost::algorithm::starts_with(file_name, "cache://")); m_file_handles[file_name] = ret; } /** This seems to be the safest way to do this. Ideally, we would * like file_ownership_handle to take care of this. However, it * is not certain that the pool will be around when that object is * destroyed, so that brings up a number of possible rare corner * cases to check for. Doing this is simplest for now. */ if( (++this->num_file_registers) % (16*1024) == 0) { for(auto it = m_file_handles.begin(); it != m_file_handles.end();) { if(it->second.expired()) { // Advances to the next element, or m_file_handles.end(); it = m_file_handles.erase(it); } else { ++it; } } } return ret; }
/** * Fetches the contents of a block. * Returns true on success and false on failure. */ bool fetch_block(char* output, size_t block_number, size_t startpos, size_t length) { auto& bc = block_cache::get_instance(); std::string key = get_key_name(block_number); int64_t ret = bc.read(key, output, startpos, startpos + length); if (ret == length) return true; logstream(LOG_INFO) << "Fetching " << sanitize_url(m_filename) << " Block " << block_number << std::endl; // ok. failure... no such block or block is bad. We read it ourselves. // read the whole block auto block_start = block_number * READ_CACHING_BLOCK_SIZE; auto block_end = std::min(block_start + READ_CACHING_BLOCK_SIZE, m_file_size); // seek to the block and read the whole block at once auto& contents = get_contents(); contents->seek(block_start, std::ios_base::beg, std::ios_base::in); std::string block_contents(block_end - block_start, 0); auto bytes_read = contents->read(&(block_contents[0]), block_end - block_start); // read failed. if (bytes_read < block_end - block_start) return false; // write the block bool write_block_ok = bc.write(key, block_contents); if (write_block_ok == false) { logstream(LOG_ERROR) << "Unable to write block " << key << std::endl; // still ok. we can continue. but too many of these are bad. } // since we just read the block, lets fill the output const char* src = block_contents.c_str(); memcpy(output, src + startpos, length); return true; }
std::vector<std::string> unity_global::list_toolkit_classes_in_dynamic_module(std::string soname) { auto iter = dynamic_loaded_toolkits.find(soname); if (iter == dynamic_loaded_toolkits.end()) { throw("Toolkit name " + sanitize_url(soname) + " not found"); } else return iter->second.classes; }
variant_map_type unity_global::load_model(const std::string& url) { logstream(LOG_INFO) << "Load model from " << sanitize_url(url) << std::endl; try { dir_archive dir; dir.open_directory_for_read(url); std::string contents; if (dir.get_metadata("contents", contents) == false || contents != "model") { log_and_throw(std::string("Archive does not contain a model.")); } iarchive iarc(dir); std::string model_name; std::string model_wrapper; char buf[256] = ""; size_t magic_header_size = strlen(CLASS_MAGIC_HEADER); iarc.read(buf, magic_header_size); if (strcmp(buf, CLASS_MAGIC_HEADER)) { log_and_throw(std::string("Invalid model file.")); } iarc >> model_name; logstream(LOG_INFO) << "Model name: " << model_name << std::endl; iarc >> model_wrapper; std::shared_ptr<model_base> model_ptr = classes->get_toolkit_class(model_name); iarc >> *(model_ptr); if (dir.get_input_stream()->fail()) { std::string message = "Fail to read."; log_and_throw_io_failure(message); } dir.close(); variant_map_type ret; variant_set_value<std::shared_ptr<model_base>>(ret["model_base"], model_ptr); flexible_type flex_model_wrapper = (flexible_type)model_wrapper; variant_set_value<flexible_type>(ret["model_wrapper"], flex_model_wrapper); return ret; } catch (std::ios_base::failure& e) { std::string message = "Unable to load model from " + sanitize_url(url) + ": " + e.what(); log_and_throw_io_failure(message); } catch (std::string& e) { log_and_throw(std::string("Unable to load model from ") + sanitize_url(url) + ": " + e); } catch (const std::exception& e) { log_and_throw(std::string("Unable to load model from ") + sanitize_url(url) + ": " + e.what()); } catch (...) { log_and_throw(std::string("Unknown Error: Unable to load model from ") + sanitize_url(url)); } }
void unity_global::__write__(const std::string& url, const std::string& content) { general_ofstream fout(url); if (!fout.good()) { fout.close(); log_and_throw_io_failure(std::string("Cannot open " + sanitize_url(url))); } fout << content; fout.close(); }
void s3_device::close(std::ios_base::openmode mode) { if (mode == std::ios_base::out && m_write_stream) { logstream(LOG_INFO) << "S3 Finalizing write to " << sanitize_url(m_filename) << std::endl; m_write_stream->Close(); m_write_stream.reset(); } else if (mode == std::ios_base::in && m_read_stream) { m_read_stream->Close(); m_read_stream.reset(); } }
void general_fstream_sink::open_file(std::string file, bool gzip_compressed) { sanitized_filename = sanitize_url(file); out_file = std::make_shared<union_fstream>(file, std::ios_base::out | std::ios_base::binary); is_gzip_compressed = gzip_compressed; if (gzip_compressed) { compressor = std::make_shared<boost::iostreams::gzip_compressor>(); } // get the underlying stream inside the union stream underlying_stream = out_file->get_ostream(); }
gboolean subscribe_method(gchar *url) { add_feed *feed = g_new0(add_feed, 1); feed->feed_url = url; feed->add=1; feed->enabled=feed->validate=1; feed->fetch_html = 0; if (feed->feed_url && strlen(feed->feed_url)) { g_print("New Feed received: %s\n", url); feed->feed_url = sanitize_url(feed->feed_url); d("sanitized feed URL: %s\n", feed->feed_url); if (g_hash_table_find(rf->hr, check_if_match, feed->feed_url)) { rss_error(NULL, NULL, _("Error adding feed."), _("Feed already exists!")); //return FALSE; /* we return true here since org.gnome.feed.Reader * doesn't support status */ return TRUE; } if (setup_feed(feed)) { gchar *msg = g_strdup_printf(_("Importing URL: %s"), feed->feed_url); taskbar_push_message(msg); g_free(msg); } if (rf->treeview) store_redraw(GTK_TREE_VIEW(rf->treeview)); save_gconf_feed(); #if (DATASERVER_VERSION >= 2033001) camel_operation_pop_message (NULL); #else camel_operation_end(NULL); #endif } g_free(url); return TRUE; }
std::string unity_global::get_graphlab_object_type(const std::string& url) { logstream(LOG_INFO) << "Getting graphlab object type stored at: " << sanitize_url(url) << std::endl; // valid values are: model, graph. sframe, sarray return dir_archive::get_directory_metadata(url, "contents"); }
std::string unity_global::load_toolkit(std::string soname, std::string module_subpath) { // rewrite "local" protocol std::string protocol = fileio::get_protocol(soname); if (protocol == "local") { soname = fileio::remove_protocol(soname); } so_registration_list regentry; regentry.original_soname = soname; logstream(LOG_INFO) << "Attempt loading of " << sanitize_url(soname) << std::endl; // see if the file exists and whether we need to donwnload it if (fileio::try_to_open_file(soname) == false) { return "Unable to open file " + sanitize_url(soname); } if (protocol != "") { // there is a protocol associated. We need to copy this file to local // issue a copy to copy it to the local temp directory std::string tempname = get_temp_name(); fileio::copy(soname, tempname); soname = tempname; } if (!file_contains_substring(soname, "get_toolkit_function_registration") && !file_contains_substring(soname, "get_toolkit_class_registration")) { return soname + " is not a valid extension"; } // get the base name of the shared library (without the .so) std::string modulename = fileio::get_filename(regentry.original_soname); std::vector<std::string> split_names; boost::algorithm::split(split_names, modulename, boost::is_any_of(".")); if (split_names.size() == 0) return "Invalid filename"; if (module_subpath.empty()) { regentry.modulename = split_names[0]; } else if (module_subpath == "..") { regentry.modulename = ""; } else { regentry.modulename = module_subpath + "." + split_names[0]; } // goody. now for the dl loading #ifndef _WIN32 void* dl = dlopen(soname.c_str(), RTLD_NOW | RTLD_LOCAL); #else void *dl = (void *)LoadLibrary(soname.c_str()); #endif logstream(LOG_INFO) << "Library load of " << sanitize_url(soname) << std::endl; regentry.effective_soname = soname; regentry.dl = dl; // check for failure if (dl == NULL) { #ifndef _WIN32 char* err = dlerror(); // I think we need to copy this out early std::string ret = err; logstream(LOG_ERROR) << "Unable to load " << sanitize_url(soname) << ": " << ret << std::endl; if (err) return ret; else return "dlopen failed due to an unknown error"; #else std::string ret = get_last_err_str(GetLastError()); logstream(LOG_ERROR) << "Unable to load " << sanitize_url(soname) << ": " << ret << std::endl; if (!ret.empty()) return ret; else return "LoadLibrary failed due to an unknown error"; #endif } /**************************************************************************/ /* */ /* Function Registration */ /* */ /**************************************************************************/ // get the registration symbols std::vector<std::string> toolkit_function_reg_names {"get_toolkit_function_registration", "_Z33get_toolkit_function_registrationv", "__Z33get_toolkit_function_registrationv"}; get_toolkit_function_registration_type get_toolkit_function_registration = nullptr; for (auto reg_name : toolkit_function_reg_names) { get_toolkit_function_registration = reinterpret_cast<get_toolkit_function_registration_type> ( #ifndef _WIN32 dlsym(dl, reg_name.c_str()) #else (void *)GetProcAddress((HMODULE)dl, reg_name.c_str()) #endif ); if (get_toolkit_function_registration != nullptr) break; } // register functions if (get_toolkit_function_registration) { auto functions = (*get_toolkit_function_registration)(); for (auto& fn: functions) { if (!regentry.modulename.empty()) { fn.name = regentry.modulename + "." + fn.name; } fn.description["file"] = regentry.original_soname; logstream(LOG_INFO) << "Adding function: " << fn.name << std::endl; regentry.functions.push_back(fn.name); } toolkit_functions->register_toolkit_function(functions); } /**************************************************************************/ /* */ /* Class Registration */ /* */ /**************************************************************************/ std::vector<std::string> toolkit_class_reg_names {"get_toolkit_class_registration", "_Z30get_toolkit_class_registrationv", "__Z30get_toolkit_class_registrationv"}; get_toolkit_class_registration_type get_toolkit_class_registration = nullptr; for (auto reg_name : toolkit_class_reg_names) { get_toolkit_class_registration = reinterpret_cast<get_toolkit_class_registration_type> ( #ifndef _WIN32 dlsym(dl, reg_name.c_str()) #else (void *)GetProcAddress((HMODULE)dl, reg_name.c_str()) #endif ); if (get_toolkit_class_registration != nullptr) break; } // register classes if (get_toolkit_class_registration) { auto class_reg = (*get_toolkit_class_registration)(); for (auto& cl: class_reg) { if (!regentry.modulename.empty()) { cl.name = regentry.modulename + "." + cl.name; } cl.description["file"] = regentry.original_soname; logstream(LOG_INFO) << "Adding class : " << cl.name << std::endl; regentry.functions.push_back(cl.name); } classes->register_toolkit_class(class_reg); } if (regentry.functions.empty() && regentry.classes.empty()) { // nothing has been registered! unload the dl #ifndef _WIN32 dlclose(dl); #else FreeLibrary((HMODULE)dl); #endif return "No functions or classes registered by " + sanitize_url(soname); } // note that it is possible to load a toolkit multiple times. // It is not safe to unload previously loaded toolkits since I may have // a reference to it (for instance a class). We just keep loading over // and hope for the best. // store and remember the dlhandle and what was registered; dynamic_loaded_toolkits[regentry.original_soname] = regentry; return std::string(); }