Exemplo n.º 1
0
 void unity_global::save_model(std::shared_ptr<model_base> model,
                               const std::string& model_wrapper,
                               const std::string& url) {
   logstream(LOG_INFO) << "Save model to " << sanitize_url(url) << std::endl;
   logstream(LOG_INFO) << "Model name: " << model->name() << std::endl;
   try {
     dir_archive dir;
     dir.open_directory_for_write(url);
     dir.set_metadata("contents", "model");
     oarchive oarc(dir);
     oarc.write(CLASS_MAGIC_HEADER, strlen(CLASS_MAGIC_HEADER));
     oarc << model->name();
     oarc << model_wrapper;
     oarc << *model;
     if (dir.get_output_stream()->fail()) {
       std::string message = "Fail to write.";
       log_and_throw_io_failure(message);
     }
     dir.close();
   } catch (std::ios_base::failure& e) {
     std::string message = "Unable to save model to " + sanitize_url(url) + ": " + e.what();
     log_and_throw_io_failure(message);
   } catch (std::string& e) {
     log_and_throw(std::string("Unable to save model to ") + sanitize_url(url) + ": " + e);
   } catch (...) {
     log_and_throw(std::string("Unknown Error: Unable to save model to ") + sanitize_url(url));
   }
 }
Exemplo n.º 2
0
s3_device::s3_device(const std::string& filename, const bool write) {
  m_filename = filename;
  // split out the access key and secret key
  webstor::s3url url;
  webstor::parse_s3url(filename, url);
  m_s3fs = std::make_shared<dmlc::io::S3FileSystem>();
  m_s3fs->SetCredentials(url.access_key_id, url.secret_key);
  std::string url_without_credentials;
  if (url.endpoint.empty()) {
     url_without_credentials = "s3://" + url.bucket + "/" + url.object_name;
  } else {
    url_without_credentials = "s3://" + url.endpoint + "/" + url.bucket + "/" + url.object_name;
  }
  auto uri = dmlc::io::URI(url_without_credentials.c_str());
  if (write) {
    m_write_stream.reset(m_s3fs->Open(uri, "w"));
  } else {
    try {
      auto pathinfo = m_s3fs->GetPathInfo(uri);
      m_filesize = pathinfo.size;
      if (pathinfo.type != dmlc::io::kFile) {
        log_and_throw("Cannot open " + sanitize_url(filename));
      }
      m_read_stream.reset(m_s3fs->OpenForRead(uri));
    } catch (...) {
      log_and_throw("Cannot open " + sanitize_url(filename));
    }
  }
}
Exemplo n.º 3
0
  std::string unity_global::__read__(const std::string& url) {
    general_ifstream fin(url);
    if (!fin.good()) {
      fin.close();
      log_and_throw_io_failure(std::string("Cannot open " + sanitize_url(url)));
    }

    std::stringstream ss;
    char* buf = new char[4096];
    while(fin.good()) {
      fin.read(buf, 4096);
      size_t bytes_read = fin.gcount();
      ss.write(buf, bytes_read);
    }

    delete[] buf;

    if (!fin.eof()) {
      fin.close();
      log_and_throw_io_failure(std::string("Read fail " + sanitize_url(url)));
    }

    fin.close();
    return ss.str();
  }
Exemplo n.º 4
0
bool unity_sgraph::load_graph(std::string target_dir) {
  log_func_entry();
  try {
    dir_archive dir;
    dir.open_directory_for_read(target_dir);
    std::string contents;
    if (dir.get_metadata("contents", contents) == false ||
        contents != "graph") {
      log_and_throw(std::string("Archive does not contain a graph."));
    }
    iarchive iarc(dir);
    load(iarc);
    dir.close();
  } catch (std::ios_base::failure& e) {
    std::string message = "Unable to load graph from " + sanitize_url(target_dir)
      + ": " + e.what();
    log_and_throw_io_failure(message);
  } catch (std::string& e) {
    std::string message = "Unable to load graph from " + sanitize_url(target_dir)
      + ": " + e;
    log_and_throw(message);
  } catch (...) {
    std::string message = "Unable to load graph from " + sanitize_url(target_dir)
      + ": Unknown Error.";
    log_and_throw(message);
  }
  return true;
}
Exemplo n.º 5
0
bool unity_sgraph::save_graph(std::string target, std::string format) {
  log_func_entry();
  try {
    if (format == "binary") {
      dir_archive dir;
      dir.open_directory_for_write(target);
      dir.set_metadata("contents", "graph");
      oarchive oarc(dir);
      if (dir.get_output_stream()->fail()) {
        log_and_throw_io_failure("Fail to write");
      }
      save(oarc);
      dir.close();
    } else if (format == "json") {
      save_sgraph_to_json(get_graph(), target);
    } else if (format == "csv") {
      save_sgraph_to_csv(get_graph(), target);
    } else {
      log_and_throw("Unable to save to format : " + format);
    }
  } catch (std::ios_base::failure& e) {
    std::string message =
        "Unable to save graph to " + sanitize_url(target) + ": " + e.what();
    log_and_throw_io_failure(message);
  } catch (std::string& e) {
    std::string message =
        "Unable to save graph to " + sanitize_url(target) + ": " + e;
    log_and_throw(message);
  } catch (...) {
    std::string message =
        "Unable to save graph to " + sanitize_url(target) + ": Unknown Error.";
    log_and_throw(message);
  }
  return true;
}
Exemplo n.º 6
0
general_ofstream::general_ofstream(std::string filename)
    try :general_ofstream_base(filename), opened_filename(filename) { } catch (const std::exception& e) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for write. " + e.what());
    } catch (std::string e) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for write. " + e);
    } catch (...) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename));
    }
Exemplo n.º 7
0
general_ifstream::general_ifstream(std::string filename, bool gzip_compressed)
    try :general_ifstream_base(filename, gzip_compressed), 
     opened_filename(filename) { 
     } catch (const std::exception& e) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e.what());
    } catch (std::string e) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e);
    } catch (...) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename));
    } 
Exemplo n.º 8
0
general_ifstream::general_ifstream(std::string filename)
    // this is the function try block syntax which, together with the member
    // function pointer syntax, is probably the ugliest C++ syntactic element
    // every conceieved.
    try :general_ifstream_base(filename), opened_filename(filename) 
    { 
    }  catch (const std::exception& e) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e.what());
    } catch (std::string e) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename) + " for read. " + e);
    } catch (...) {
      log_and_throw_io_failure("Cannot open " + sanitize_url(filename));
    } 
Exemplo n.º 9
0
std::shared_ptr<file_ownership_handle> file_handle_pool::register_file(const std::string& file_name) {
  std::lock_guard<std::mutex> guard(this->m_mutex);
  std::shared_ptr<file_ownership_handle> ret = get_file_handle(file_name);

  if (!ret) {
    logstream(LOG_DEBUG) << "register_file_handle for file " << sanitize_url(file_name) << std::endl;

    
    ret = std::make_shared<file_ownership_handle>(file_name, boost::algorithm::starts_with(file_name, "cache://"));
    m_file_handles[file_name] = ret;
  }

  /**  This seems to be the safest way to do this.  Ideally, we would
   *   like file_ownership_handle to take care of this.  However, it
   *   is not certain that the pool will be around when that object is
   *   destroyed, so that brings up a number of possible rare corner
   *   cases to check for.  Doing this is simplest for now. 
   */
  if( (++this->num_file_registers) % (16*1024) == 0) {
    for(auto it = m_file_handles.begin(); it != m_file_handles.end();) {
      if(it->second.expired()) {
        // Advances to the next element, or m_file_handles.end(); 
        it = m_file_handles.erase(it);
      } else {
        ++it;
      }
    }
  }

  return ret;
}
Exemplo n.º 10
0
  /**
   * Fetches the contents of a block.
   * Returns true on success and false on failure.
   */
  bool fetch_block(char* output, 
                   size_t block_number,
                   size_t startpos, 
                   size_t length) {
    auto& bc = block_cache::get_instance();
    std::string key = get_key_name(block_number);
    int64_t ret = bc.read(key, output, startpos, startpos + length);
    if (ret == length) return true;

    logstream(LOG_INFO) << "Fetching " << sanitize_url(m_filename) << " Block " << block_number << std::endl;
    // ok. failure... no such block or block is bad. We read it ourselves.
    // read the whole block
    auto block_start = block_number * READ_CACHING_BLOCK_SIZE;
    auto block_end = std::min(block_start +  READ_CACHING_BLOCK_SIZE, m_file_size);
    // seek to the block and read the whole block at once
    auto& contents = get_contents();
    contents->seek(block_start, std::ios_base::beg, std::ios_base::in);
    std::string block_contents(block_end - block_start, 0);
    auto bytes_read = contents->read(&(block_contents[0]),
                                     block_end - block_start);
    // read failed.
    if (bytes_read < block_end - block_start) return false;

    // write the block
    bool write_block_ok = bc.write(key, block_contents);
    if (write_block_ok == false) {
      logstream(LOG_ERROR) << "Unable to write block " << key << std::endl;
      // still ok. we can continue. but too many of these are bad.
    }
    // since we just read the block, lets fill the output
    const char* src = block_contents.c_str();
    memcpy(output, src + startpos, length);
    return true;
  }
Exemplo n.º 11
0
 std::vector<std::string> unity_global::list_toolkit_classes_in_dynamic_module(std::string soname) {
   auto iter = dynamic_loaded_toolkits.find(soname);
   if (iter == dynamic_loaded_toolkits.end()) {
     throw("Toolkit name " + sanitize_url(soname) + " not found");
   }
   else return iter->second.classes;
 }
Exemplo n.º 12
0
  variant_map_type unity_global::load_model(const std::string& url) {
    logstream(LOG_INFO) << "Load model from " << sanitize_url(url) << std::endl;
    try {

      dir_archive dir;
      dir.open_directory_for_read(url);
      std::string contents;
      if (dir.get_metadata("contents", contents) == false || contents != "model") {
        log_and_throw(std::string("Archive does not contain a model."));
      }
      iarchive iarc(dir);

      std::string model_name;
      std::string model_wrapper;
      char buf[256] = "";
      size_t magic_header_size = strlen(CLASS_MAGIC_HEADER);
      iarc.read(buf, magic_header_size);
      if (strcmp(buf, CLASS_MAGIC_HEADER)) {
        log_and_throw(std::string("Invalid model file."));
      }
      iarc >> model_name;
      logstream(LOG_INFO) << "Model name: " << model_name << std::endl;
      iarc >> model_wrapper;
      std::shared_ptr<model_base> model_ptr = classes->get_toolkit_class(model_name);
      iarc  >> *(model_ptr);
      if (dir.get_input_stream()->fail()) {
        std::string message = "Fail to read.";
        log_and_throw_io_failure(message);
      }
      dir.close();
      variant_map_type ret;
      variant_set_value<std::shared_ptr<model_base>>(ret["model_base"], model_ptr);
      flexible_type flex_model_wrapper = (flexible_type)model_wrapper;
      variant_set_value<flexible_type>(ret["model_wrapper"], flex_model_wrapper);
      return ret;
    } catch (std::ios_base::failure& e) {
      std::string message = "Unable to load model from " + sanitize_url(url) + ": " + e.what();
      log_and_throw_io_failure(message);
    } catch (std::string& e) {
      log_and_throw(std::string("Unable to load model from ") + sanitize_url(url) + ": " + e);
    } catch (const std::exception& e) {
      log_and_throw(std::string("Unable to load model from ") + sanitize_url(url) + ": " + e.what());
    } catch (...) {
      log_and_throw(std::string("Unknown Error: Unable to load model from ") + sanitize_url(url));
    }
  }
Exemplo n.º 13
0
 void unity_global::__write__(const std::string& url, const std::string& content) {
   general_ofstream fout(url);
   if (!fout.good()) {
     fout.close();
     log_and_throw_io_failure(std::string("Cannot open " + sanitize_url(url)));
   }
   fout << content;
   fout.close();
 }
Exemplo n.º 14
0
void s3_device::close(std::ios_base::openmode mode) {
  if (mode == std::ios_base::out && m_write_stream) {
    logstream(LOG_INFO) << "S3 Finalizing write to " << sanitize_url(m_filename) << std::endl;
    m_write_stream->Close();
    m_write_stream.reset();
  } else if (mode == std::ios_base::in && m_read_stream) {
    m_read_stream->Close();
    m_read_stream.reset();
  }
}
Exemplo n.º 15
0
void general_fstream_sink::open_file(std::string file, bool gzip_compressed) {
  sanitized_filename = sanitize_url(file);
  out_file = std::make_shared<union_fstream>(file, std::ios_base::out | std::ios_base::binary);
  is_gzip_compressed = gzip_compressed;
  if (gzip_compressed) {
    compressor = std::make_shared<boost::iostreams::gzip_compressor>();
  }
  // get the underlying stream inside the union stream
  underlying_stream = out_file->get_ostream();
}
Exemplo n.º 16
0
gboolean
subscribe_method(gchar *url)
{
	add_feed *feed = g_new0(add_feed, 1);
	feed->feed_url = url;
	feed->add=1;
	feed->enabled=feed->validate=1;
	feed->fetch_html = 0;
	if (feed->feed_url && strlen(feed->feed_url)) {
		g_print("New Feed received: %s\n", url);
		feed->feed_url = sanitize_url(feed->feed_url);
		d("sanitized feed URL: %s\n", feed->feed_url);
		if (g_hash_table_find(rf->hr, check_if_match, feed->feed_url)) {
			rss_error(NULL, NULL, _("Error adding feed."),
				_("Feed already exists!"));
				//return FALSE;
				/* we return true here since org.gnome.feed.Reader
				 * doesn't support status */
				return TRUE;
		}
		if (setup_feed(feed)) {
			gchar *msg = g_strdup_printf(_("Importing URL: %s"),
					feed->feed_url);
			taskbar_push_message(msg);
			g_free(msg);
		}
		if (rf->treeview)
			store_redraw(GTK_TREE_VIEW(rf->treeview));
		save_gconf_feed();
#if (DATASERVER_VERSION >= 2033001)
		camel_operation_pop_message (NULL);
#else
	camel_operation_end(NULL);
#endif
	}
	g_free(url);
	return TRUE;
}
Exemplo n.º 17
0
  std::string unity_global::get_graphlab_object_type(const std::string& url) {
    logstream(LOG_INFO) << "Getting graphlab object type stored at: " << sanitize_url(url) << std::endl;

    // valid values are: model, graph. sframe, sarray
    return dir_archive::get_directory_metadata(url, "contents");
  }
Exemplo n.º 18
0
  std::string unity_global::load_toolkit(std::string soname,
                                         std::string module_subpath) {
    // rewrite "local" protocol
    std::string protocol = fileio::get_protocol(soname);
    if (protocol == "local") {
      soname = fileio::remove_protocol(soname);
    }

    so_registration_list regentry;
    regentry.original_soname = soname;
    logstream(LOG_INFO) << "Attempt loading of " << sanitize_url(soname) << std::endl;

    // see if the file exists and whether we need to donwnload it
    if (fileio::try_to_open_file(soname) == false) {
      return "Unable to open file " + sanitize_url(soname);
    }

    if (protocol != "") {
      // there is a protocol associated. We need to copy this file to local
      // issue a copy to copy it to the local temp directory
      std::string tempname = get_temp_name();
      fileio::copy(soname, tempname);
      soname = tempname;
    }
    if (!file_contains_substring(soname, "get_toolkit_function_registration") &&
        !file_contains_substring(soname, "get_toolkit_class_registration")) {
      return soname + " is not a valid extension";
    }



    // get the base name of the shared library (without the .so)
    std::string modulename = fileio::get_filename(regentry.original_soname);
    std::vector<std::string> split_names;
    boost::algorithm::split(split_names, modulename, boost::is_any_of("."));
    if (split_names.size() == 0) return "Invalid filename";
    if (module_subpath.empty()) {
      regentry.modulename = split_names[0];
    } else if (module_subpath == "..") {
      regentry.modulename = "";
    } else {
      regentry.modulename = module_subpath + "." + split_names[0];
    }

    // goody. now for the dl loading
#ifndef _WIN32
    void* dl = dlopen(soname.c_str(), RTLD_NOW | RTLD_LOCAL);
#else
    void *dl = (void *)LoadLibrary(soname.c_str());
#endif
    logstream(LOG_INFO) << "Library load of " << sanitize_url(soname) << std::endl;
    regentry.effective_soname = soname;
    regentry.dl = dl;
    // check for failure
    if (dl == NULL) {
#ifndef _WIN32
      char* err = dlerror();
      // I think we need to copy this out early
      std::string ret = err;
      logstream(LOG_ERROR) << "Unable to load " << sanitize_url(soname) << ": " << ret << std::endl;
      if (err) return ret;
      else return "dlopen failed due to an unknown error";
#else
      std::string ret = get_last_err_str(GetLastError());
      logstream(LOG_ERROR) << "Unable to load " << sanitize_url(soname) << ": " << ret << std::endl;
      if (!ret.empty()) return ret;
      else return "LoadLibrary failed due to an unknown error";
#endif
    }

  /**************************************************************************/
  /*                                                                        */
  /*                         Function Registration                          */
  /*                                                                        */
  /**************************************************************************/
    // get the registration symbols
    std::vector<std::string> toolkit_function_reg_names
                {"get_toolkit_function_registration",
                  "_Z33get_toolkit_function_registrationv",
                  "__Z33get_toolkit_function_registrationv"};

    get_toolkit_function_registration_type get_toolkit_function_registration = nullptr;
    for (auto reg_name : toolkit_function_reg_names) {
      get_toolkit_function_registration =
          reinterpret_cast<get_toolkit_function_registration_type>
          (
#ifndef _WIN32
           dlsym(dl, reg_name.c_str())
#else
           (void *)GetProcAddress((HMODULE)dl, reg_name.c_str())
#endif
           );
      if (get_toolkit_function_registration != nullptr) break;
    }

    // register functions
    if (get_toolkit_function_registration) {
      auto functions = (*get_toolkit_function_registration)();
      for (auto& fn: functions) {
        if (!regentry.modulename.empty()) {
          fn.name = regentry.modulename + "." + fn.name;
        }
        fn.description["file"] = regentry.original_soname;
        logstream(LOG_INFO) << "Adding function: " << fn.name << std::endl;
        regentry.functions.push_back(fn.name);
      }
      toolkit_functions->register_toolkit_function(functions);
    }

/**************************************************************************/
/*                                                                        */
/*                           Class Registration                           */
/*                                                                        */
/**************************************************************************/

    std::vector<std::string> toolkit_class_reg_names
                {"get_toolkit_class_registration",
                 "_Z30get_toolkit_class_registrationv",
                 "__Z30get_toolkit_class_registrationv"};
    get_toolkit_class_registration_type get_toolkit_class_registration = nullptr;
    for (auto reg_name : toolkit_class_reg_names) {
      get_toolkit_class_registration =
          reinterpret_cast<get_toolkit_class_registration_type>
          (
#ifndef _WIN32
           dlsym(dl, reg_name.c_str())
#else
           (void *)GetProcAddress((HMODULE)dl, reg_name.c_str())
#endif
           );
      if (get_toolkit_class_registration != nullptr) break;
    }

    // register classes
    if (get_toolkit_class_registration) {
      auto class_reg = (*get_toolkit_class_registration)();
      for (auto& cl: class_reg) {
        if (!regentry.modulename.empty()) {
          cl.name = regentry.modulename + "." + cl.name;
        }
        cl.description["file"] = regentry.original_soname;
        logstream(LOG_INFO) << "Adding class : " << cl.name << std::endl;
        regentry.functions.push_back(cl.name);
      }
      classes->register_toolkit_class(class_reg);
    }


    if (regentry.functions.empty() && regentry.classes.empty()) {
      // nothing has been registered! unload the dl
#ifndef _WIN32
      dlclose(dl);
#else
      FreeLibrary((HMODULE)dl);
#endif
      return "No functions or classes registered by " + sanitize_url(soname);
    }
    // note that it is possible to load a toolkit multiple times.
    // It is not safe to unload previously loaded toolkits since I may have
    // a reference to it (for instance a class). We just keep loading over
    // and hope for the best.

    // store and remember the dlhandle and what was registered;
    dynamic_loaded_toolkits[regentry.original_soname] = regentry;
    return std::string();
  }