bool RuleTableLoaderStandard::Load(FormatType format , const std::vector<FactorType> &input , const std::vector<FactorType> &output , const std::string &inFile , const std::vector<float> &weight , size_t /* tableLimit */ , const LMList &languageModels , const WordPenaltyProducer* wpProducer , RuleTableTrie &ruleTable) { PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format"); const StaticData &staticData = StaticData::Instance(); const std::string& factorDelimiter = staticData.GetFactorDelimiter(); string lineOrig; size_t count = 0; std::ostream *progress = NULL; IFVERBOSE(1) progress = &std::cerr; util::FilePiece in(inFile.c_str(), progress); // reused variables vector<float> scoreVector; StringPiece line; std::string hiero_before, hiero_after; while(true) { try { line = in.ReadLine(); } catch (const util::EndOfFileException &e) { break; } if (format == HieroFormat) { // inefficiently reformat line hiero_before.assign(line.data(), line.size()); ReformatHieroRule(hiero_before, hiero_after); line = hiero_after; } util::TokenIter<util::MultiCharacter> pipes(line, "|||"); StringPiece sourcePhraseString(*pipes); StringPiece targetPhraseString(*++pipes); StringPiece scoreString(*++pipes); StringPiece alignString(*++pipes); // TODO(bhaddow) efficiently handle default instead of parsing this string every time. StringPiece ruleCountString = ++pipes ? *pipes : StringPiece("1 1"); if (++pipes) { stringstream strme; strme << "Syntax error at " << ruleTable.GetFilePath() << ":" << count; UserMessage::Add(strme.str()); abort(); } bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } scoreVector.clear(); for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) { char *err_ind; scoreVector.push_back(strtod(s->data(), &err_ind)); UTIL_THROW_IF(err_ind == s->data(), util::Exception, "Bad score " << *s << " on line " << count); } const size_t numScoreComponents = ruleTable.GetFeature()->GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { stringstream strme; strme << "Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count; UserMessage::Add(strme.str()); abort(); } // parse source & find pt node // constituent labels Word sourceLHS, targetLHS; // source Phrase sourcePhrase( 0); sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS); // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(Output); targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS); targetPhrase->SetSourcePhrase(sourcePhrase); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString, sourcePhrase); targetPhrase->SetTargetLHS(targetLHS); targetPhrase->SetRuleCount(ruleCountString, scoreVector[0]); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); // component score, for n-best output std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore); std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore); targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weight, languageModels,wpProducer); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS); phraseColl.Add(targetPhrase); count++; } // sort and prune each target phrase collection SortAndPrune(ruleTable); return true; }
bool HyperTreeLoader::Load(AllOptions const& opts, const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &ff, HyperTree &trie, boost::unordered_set<std::size_t> &sourceTermSet) { PrintUserTime(std::string("Start loading HyperTree")); sourceTermSet.clear(); std::size_t count = 0; std::ostream *progress = NULL; IFVERBOSE(1) progress = &std::cerr; util::FilePiece in(inFile.c_str(), progress); // reused variables std::vector<float> scoreVector; StringPiece line; double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); HyperPathLoader hyperPathLoader; Phrase dummySourcePhrase; { Word *lhs = NULL; dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs); delete lhs; } while(true) { try { line = in.ReadLine(); } catch (const util::EndOfFileException &e) { break; } util::TokenIter<util::MultiCharacter> pipes(line, "|||"); StringPiece sourceString(*pipes); StringPiece targetString(*++pipes); StringPiece scoreString(*++pipes); StringPiece alignString; if (++pipes) { StringPiece temp(*pipes); alignString = temp; } ++pipes; // counts scoreVector.clear(); for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) { int processed; float score = converter.StringToFloat(s->data(), s->length(), &processed); UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count); scoreVector.push_back(FloorScore(TransformScore(score))); } const std::size_t numScoreComponents = ff.GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count); } // Source-side HyperPath sourceFragment; hyperPathLoader.Load(sourceString, sourceFragment); ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet); // Target-side TargetPhrase *targetPhrase = new TargetPhrase(&ff); Word *targetLHS = NULL; targetPhrase->CreateFromString(Output, output, targetString, &targetLHS); targetPhrase->SetTargetLHS(targetLHS); targetPhrase->SetAlignmentInfo(alignString); if (++pipes) { StringPiece sparseString(*pipes); targetPhrase->SetSparseScore(&ff, sparseString); } if (++pipes) { StringPiece propertiesString(*pipes); targetPhrase->SetProperties(propertiesString); } targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); targetPhrase->EvaluateInIsolation(dummySourcePhrase, ff.GetFeaturesToApply()); // Add rule to trie. TargetPhraseCollection::shared_ptr phraseColl = GetOrCreateTargetPhraseCollection(trie, sourceFragment); phraseColl->Add(targetPhrase); count++; } // sort and prune each target phrase collection if (ff.GetTableLimit()) { SortAndPrune(trie, ff.GetTableLimit()); } return true; }
bool RuleTableLoaderStandard::Load(FormatType format , const std::vector<FactorType> &input , const std::vector<FactorType> &output , const std::string &inFile , size_t /* tableLimit */ , RuleTableTrie &ruleTable) { PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format"); const StaticData &staticData = StaticData::Instance(); const std::string& factorDelimiter = staticData.GetFactorDelimiter(); string lineOrig; size_t count = 0; std::ostream *progress = NULL; IFVERBOSE(1) progress = &std::cerr; util::FilePiece in(inFile.c_str(), progress); // reused variables vector<float> scoreVector; StringPiece line; std::string hiero_before, hiero_after; double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); while(true) { try { line = in.ReadLine(); } catch (const util::EndOfFileException &e) { break; } if (format == HieroFormat) { // inefficiently reformat line hiero_before.assign(line.data(), line.size()); ReformatHieroRule(hiero_before, hiero_after); line = hiero_after; } util::TokenIter<util::MultiCharacter> pipes(line, "|||"); StringPiece sourcePhraseString(*pipes); StringPiece targetPhraseString(*++pipes); StringPiece scoreString(*++pipes); StringPiece alignString; if (++pipes) { StringPiece temp(*pipes); alignString = temp; } if (++pipes) { StringPiece str(*pipes); //counts } bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } scoreVector.clear(); for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) { int processed; float score = converter.StringToFloat(s->data(), s->length(), &processed); UTIL_THROW_IF(isnan(score), util::Exception, "Bad score " << *s << " on line " << count); scoreVector.push_back(FloorScore(TransformScore(score))); } const size_t numScoreComponents = ruleTable.GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { stringstream strme; strme << "Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count; UserMessage::Add(strme.str()); abort(); } // parse source & find pt node // constituent labels Word *sourceLHS; Word *targetLHS; // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(); targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS); // source Phrase sourcePhrase; sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString); targetPhrase->SetTargetLHS(targetLHS); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); if (++pipes) { StringPiece sparseString(*pipes); targetPhrase->SetSparseScore(&ruleTable, sparseString); } if (++pipes) { StringPiece propertiesString(*pipes); targetPhrase->SetProperties(propertiesString); } targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector); targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply()); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS); phraseColl.Add(targetPhrase); count++; } // sort and prune each target phrase collection SortAndPrune(ruleTable); return true; }
inline children launch_pipeline(const Entries &entries) { BOOST_ASSERT(entries.size() >= 2); children cs; detail::file_handle fhinvalid; boost::scoped_array<detail::pipe> pipes(new detail::pipe[entries.size() - 1]); #if defined(BOOST_POSIX_API) { typename Entries::size_type i = 0; const typename Entries::value_type::context_type &ctx = entries[i].context; detail::info_map infoin, infoout; if (ctx.stdin_behavior.get_type() != stream_behavior::close) { detail::stream_info si = detail::stream_info(ctx.stdin_behavior, false); infoin.insert(detail::info_map::value_type(STDIN_FILENO, si)); } BOOST_ASSERT(ctx.stdout_behavior.get_type() == stream_behavior::close); detail::stream_info si2(close_stream(), true); si2.type_ = detail::stream_info::use_handle; si2.handle_ = pipes[i].wend().release(); infoout.insert(detail::info_map::value_type(STDOUT_FILENO, si2)); if (ctx.stderr_behavior.get_type() != stream_behavior::close) { detail::stream_info si = detail::stream_info(ctx.stderr_behavior, true); infoout.insert(detail::info_map::value_type(STDERR_FILENO, si)); } detail::posix_setup s; s.work_directory = ctx.work_directory; pid_t pid = detail::posix_start(entries[i].executable, entries[i].arguments, ctx.environment, infoin, infoout, s); detail::file_handle fhstdin; if (ctx.stdin_behavior.get_type() == stream_behavior::capture) { fhstdin = detail::posix_info_locate_pipe(infoin, STDIN_FILENO, false); BOOST_ASSERT(fhstdin.valid()); } cs.push_back(child(pid, fhstdin, fhinvalid, fhinvalid)); } for (typename Entries::size_type i = 1; i < entries.size() - 1; ++i) { const typename Entries::value_type::context_type &ctx = entries[i].context; detail::info_map infoin, infoout; BOOST_ASSERT(ctx.stdin_behavior.get_type() == stream_behavior::close); detail::stream_info si1(close_stream(), false); si1.type_ = detail::stream_info::use_handle; si1.handle_ = pipes[i - 1].rend().release(); infoin.insert(detail::info_map::value_type(STDIN_FILENO, si1)); BOOST_ASSERT(ctx.stdout_behavior.get_type() == stream_behavior::close); detail::stream_info si2(close_stream(), true); si2.type_ = detail::stream_info::use_handle; si2.handle_ = pipes[i].wend().release(); infoout.insert(detail::info_map::value_type(STDOUT_FILENO, si2)); if (ctx.stderr_behavior.get_type() != stream_behavior::close) { detail::stream_info si = detail::stream_info(ctx.stderr_behavior, true); infoout.insert(detail::info_map::value_type(STDERR_FILENO, si)); } detail::posix_setup s; s.work_directory = ctx.work_directory; pid_t pid = detail::posix_start(entries[i].executable, entries[i].arguments, ctx.environment, infoin, infoout, s); cs.push_back(child(pid, fhinvalid, fhinvalid, fhinvalid)); } { typename Entries::size_type i = entries.size() - 1; const typename Entries::value_type::context_type &ctx = entries[i].context; detail::info_map infoin, infoout; BOOST_ASSERT(ctx.stdin_behavior.get_type() == stream_behavior::close); detail::stream_info si1(close_stream(), false); si1.type_ = detail::stream_info::use_handle; si1.handle_ = pipes[i - 1].rend().release(); infoin.insert(detail::info_map::value_type(STDIN_FILENO, si1)); if (ctx.stdout_behavior.get_type() != stream_behavior::close) { detail::stream_info si = detail::stream_info(ctx.stdout_behavior, true); infoout.insert(detail::info_map::value_type(STDOUT_FILENO, si)); } if (ctx.stderr_behavior.get_type() != stream_behavior::close) { detail::stream_info si = detail::stream_info(ctx.stderr_behavior, true); infoout.insert(detail::info_map::value_type(STDERR_FILENO, si)); } detail::posix_setup s; s.work_directory = ctx.work_directory; pid_t pid = detail::posix_start(entries[i].executable, entries[i].arguments, ctx.environment, infoin, infoout, s); detail::file_handle fhstdout, fhstderr; if (ctx.stdout_behavior.get_type() == stream_behavior::capture) { fhstdout = detail::posix_info_locate_pipe(infoout, STDOUT_FILENO, true); BOOST_ASSERT(fhstdout.valid()); } if (ctx.stderr_behavior.get_type() == stream_behavior::capture) { fhstderr = detail::posix_info_locate_pipe(infoout, STDERR_FILENO, true); BOOST_ASSERT(fhstderr.valid()); } cs.push_back(child(pid, fhinvalid, fhstdout, fhstderr)); } #elif defined(BOOST_WINDOWS_API) STARTUPINFOA si; detail::win32_setup s; s.startupinfo = &si; { typename Entries::size_type i = 0; const typename Entries::value_type::context_type &ctx = entries[i].context; detail::stream_info sii = detail::stream_info(ctx.stdin_behavior, false); detail::file_handle fhstdin; if (sii.type_ == detail::stream_info::use_pipe) fhstdin = sii.pipe_->wend(); BOOST_ASSERT(ctx.stdout_behavior.get_type() == stream_behavior::close); detail::stream_info sio(close_stream(), true); sio.type_ = detail::stream_info::use_handle; sio.handle_ = pipes[i].wend().release(); detail::stream_info sie(ctx.stderr_behavior, true); s.work_directory = ctx.work_directory; ::ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); PROCESS_INFORMATION pi = detail::win32_start(entries[i].executable, entries[i].arguments, ctx.environment, sii, sio, sie, s); if (!::CloseHandle(pi.hThread)) boost::throw_exception(boost::system::system_error(boost::system::error_code(::GetLastError(), boost::system::get_system_category()), "boost::process::launch_pipeline: CloseHandle failed")); cs.push_back(child(pi.dwProcessId, fhstdin, fhinvalid, fhinvalid, detail::file_handle(pi.hProcess))); } for (typename Entries::size_type i = 1; i < entries.size() - 1; ++i) { const typename Entries::value_type::context_type &ctx = entries[i].context; BOOST_ASSERT(ctx.stdin_behavior.get_type() == stream_behavior::close); detail::stream_info sii(close_stream(), false); sii.type_ = detail::stream_info::use_handle; sii.handle_ = pipes[i - 1].rend().release(); detail::stream_info sio(close_stream(), true); sio.type_ = detail::stream_info::use_handle; sio.handle_ = pipes[i].wend().release(); detail::stream_info sie(ctx.stderr_behavior, true); s.work_directory = ctx.work_directory; ::ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); PROCESS_INFORMATION pi = detail::win32_start(entries[i].executable, entries[i].arguments, ctx.environment, sii, sio, sie, s); if (!::CloseHandle(pi.hThread)) boost::throw_exception(boost::system::system_error(boost::system::error_code(::GetLastError(), boost::system::get_system_category()), "boost::process::launch_pipeline: CloseHandle failed")); cs.push_back(child(pi.dwProcessId, fhinvalid, fhinvalid, fhinvalid, detail::file_handle(pi.hProcess))); } { typename Entries::size_type i = entries.size() - 1; const typename Entries::value_type::context_type &ctx = entries[i].context; BOOST_ASSERT(ctx.stdin_behavior.get_type() == stream_behavior::close); detail::stream_info sii(close_stream(), false); sii.type_ = detail::stream_info::use_handle; sii.handle_ = pipes[i - 1].rend().release(); detail::file_handle fhstdout, fhstderr; detail::stream_info sio(ctx.stdout_behavior, true); if (sio.type_ == detail::stream_info::use_pipe) fhstdout = sio.pipe_->rend(); detail::stream_info sie(ctx.stderr_behavior, true); if (sie.type_ == detail::stream_info::use_pipe) fhstderr = sie.pipe_->rend(); s.work_directory = ctx.work_directory; ::ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); PROCESS_INFORMATION pi = detail::win32_start(entries[i].executable, entries[i].arguments, ctx.environment, sii, sio, sie, s); if (!::CloseHandle(pi.hThread)) boost::throw_exception(boost::system::system_error(boost::system::error_code(::GetLastError(), boost::system::get_system_category()), "boost::process::launch_pipeline: CloseHandle failed")); cs.push_back(child(pi.dwProcessId, fhinvalid, fhstdout, fhstderr, detail::file_handle(pi.hProcess))); } #endif return cs; }
bool RuleTrieLoader::Load(const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &ff, RuleTrie &trie) { PrintUserTime(std::string("Start loading text phrase table. Moses format")); const StaticData &staticData = StaticData::Instance(); // const std::string &factorDelimiter = staticData.GetFactorDelimiter(); std::size_t count = 0; std::ostream *progress = NULL; IFVERBOSE(1) progress = &std::cerr; util::FilePiece in(inFile.c_str(), progress); // reused variables std::vector<float> scoreVector; StringPiece line; double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); while(true) { try { line = in.ReadLine(); } catch (const util::EndOfFileException &e) { break; } util::TokenIter<util::MultiCharacter> pipes(line, "|||"); StringPiece sourcePhraseString(*pipes); StringPiece targetPhraseString(*++pipes); StringPiece scoreString(*++pipes); StringPiece alignString; if (++pipes) { StringPiece temp(*pipes); alignString = temp; } if (++pipes) { StringPiece str(*pipes); //counts } bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } scoreVector.clear(); for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) { int processed; float score = converter.StringToFloat(s->data(), s->length(), &processed); UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count); scoreVector.push_back(FloorScore(TransformScore(score))); } const std::size_t numScoreComponents = ff.GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count); } // parse source & find pt node // constituent labels Word *sourceLHS = NULL; Word *targetLHS; // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(&ff); // targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS); targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS); // source Phrase sourcePhrase; // sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS); sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString); targetPhrase->SetTargetLHS(targetLHS); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); if (++pipes) { StringPiece sparseString(*pipes); targetPhrase->SetSparseScore(&ff, sparseString); } if (++pipes) { StringPiece propertiesString(*pipes); targetPhrase->SetProperties(propertiesString); } targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply()); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( trie, *sourceLHS, sourcePhrase); phraseColl.Add(targetPhrase); // not implemented correctly in memory pt. just delete it for now delete sourceLHS; count++; } // sort and prune each target phrase collection if (ff.GetTableLimit()) { SortAndPrune(trie, ff.GetTableLimit()); } return true; }
Variant HHVM_FUNCTION(proc_open, const String& cmd, const Array& descriptorspec, VRefParam pipesParam, const String& cwd /* = null_string */, const Variant& env /* = null_variant */, const Variant& other_options /* = null_variant */) { if (RuntimeOption::WhitelistExec && !check_cmd(cmd.data())) { return false; } if (cmd.size() != strlen(cmd.c_str())) { raise_warning("NULL byte detected. Possible attack"); return false; } Variant pipes(pipesParam, Variant::WithRefBind{}); std::vector<DescriptorItem> items; std::string scwd = ""; if (!cwd.empty()) { scwd = cwd.c_str(); } else if (!g_context->getCwd().empty()) { scwd = g_context->getCwd().c_str(); } Array enva; if (env.isNull()) { // Build out an environment that conceptually matches what we'd // see if we were to iterate the environment and call getenv() // for each name. // Env vars defined in the hdf file go in first for (const auto& envvar : RuntimeOption::EnvVariables) { enva.set(String(envvar.first), String(envvar.second)); } // global environment overrides the hdf for (char **env = environ; env && *env; env++) { char *p = strchr(*env, '='); if (p) { String name(*env, p - *env, CopyString); String val(p + 1, CopyString); enva.set(name, val); } } // and then any putenv() changes take precedence for (ArrayIter iter(g_context->getEnvs()); iter; ++iter) { enva.set(iter.first(), iter.second()); } } else { enva = env.toArray(); } pid_t child; if (LightProcess::Available()) { // light process available // there is no need to do any locking, because the forking is delegated // to the light process if (!pre_proc_open(descriptorspec, items)) return false; const int item_size = items.size(); std::vector<int> created; created.reserve(item_size); std::vector<int> intended; intended.reserve(item_size); for (int i = 0; i < item_size; i++) { const auto& item = items[i]; created.push_back(item.childend); intended.push_back(item.index); } std::vector<std::string> envs; for (ArrayIter iter(enva); iter; ++iter) { StringBuffer nvpair; nvpair.append(iter.first().toString()); nvpair.append('='); nvpair.append(iter.second().toString()); std::string tmp = nvpair.detach().c_str(); if (tmp.find('\n') == std::string::npos) { envs.push_back(tmp); } } child = LightProcess::proc_open(cmd.c_str(), created, intended, scwd.c_str(), envs); assert(child); return post_proc_open(cmd, pipes, enva, items, child); } else { /* the unix way */ Lock lock(DescriptorItem::s_mutex); if (!pre_proc_open(descriptorspec, items)) return false; child = fork(); if (child) { // the parent process return post_proc_open(cmd, pipes, enva, items, child); } } assert(child == 0); /* this is the child process */ /* close those descriptors that we just opened for the parent stuff, * dup new descriptors into required descriptors and close the original * cruft */ for (auto& item : items) { item.dupChild(); } if (scwd.length() > 0 && chdir(scwd.c_str())) { // chdir failed, the working directory remains unchanged } std::vector<String> senvs; // holding those char * char **envp = build_envp(enva, senvs); execle("/bin/sh", "sh", "-c", cmd.data(), NULL, envp); free(envp); _exit(127); }
Variant HHVM_FUNCTION(proc_open, const String& cmd, const Array& descriptorspec, VRefParam pipesParam, const Variant& cwd /* = uninit_variant */, const Variant& env /* = uninit_variant */, const Variant& other_options /* = uninit_variant */) { if (RuntimeOption::WhitelistExec && !check_cmd(cmd.data())) { return false; } if (cmd.size() != strlen(cmd.c_str())) { raise_warning("NULL byte detected. Possible attack"); return false; } Variant pipes(pipesParam, Variant::WithRefBind{}); std::vector<DescriptorItem> items; std::string scwd = ""; if (!cwd.isNull() && cwd.isString() && !cwd.asCStrRef().empty()) { scwd = cwd.asCStrRef().c_str(); } else if (!g_context->getCwd().empty()) { scwd = g_context->getCwd().c_str(); } Array enva; if (env.isNull()) { if (is_cli_mode()) { enva = cli_env(); } else { // Build out an environment that conceptually matches what we'd // see if we were to iterate the environment and call getenv() // for each name. // Env vars defined in the hdf file go in first for (const auto& envvar : RuntimeOption::EnvVariables) { enva.set(String(envvar.first), String(envvar.second)); } // global environment overrides the hdf for (char **env = environ; env && *env; env++) { char *p = strchr(*env, '='); if (p) { String name(*env, p - *env, CopyString); String val(p + 1, CopyString); enva.set(name, val); } } } // and then any putenv() changes take precedence for (ArrayIter iter(g_context->getEnvs()); iter; ++iter) { enva.set(iter.first(), iter.second()); } } else { enva = env.toArray(); } #ifdef _WIN32 PROCESS_INFORMATION pi; HANDLE childHandle; STARTUPINFO si; BOOL newprocok; SECURITY_ATTRIBUTES security; DWORD dwCreateFlags = 0; char *command_with_cmd; UINT old_error_mode; char cur_cwd[MAXPATHLEN]; bool suppress_errors = false; bool bypass_shell = false; if (!other_options.isNull() && other_options.isArray()) { auto arr = other_options.asCArrRef(); if (arr.exists(String("suppress_errors", CopyString), true)) { auto v = arr[String("suppress_errors", CopyString)]; if ((v.isBoolean() && v.asBooleanVal()) || (v.isInteger() && v.asInt64Val())) { suppress_errors = true; } } if (arr.exists(String("bypass_shell", CopyString), true)) { auto v = arr[String("bypass_shell", CopyString)]; if ((v.isBoolean() && v.asBooleanVal()) || (v.isInteger() && v.asInt64Val())) { bypass_shell = true; } } } /* we use this to allow the child to inherit handles */ memset(&security, 0, sizeof(security)); security.nLength = sizeof(security); security.bInheritHandle = true; security.lpSecurityDescriptor = nullptr; memset(&si, 0, sizeof(si)); si.cb = sizeof(si); si.dwFlags = STARTF_USESTDHANDLES; si.hStdInput = GetStdHandle(STD_INPUT_HANDLE); si.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE); si.hStdError = GetStdHandle(STD_ERROR_HANDLE); if (!pre_proc_open(descriptorspec, items)) return false; /* redirect stdin/stdout/stderr if requested */ for (size_t i = 0; i < items.size(); i++) { switch (items[i].index) { case 0: si.hStdInput = items[i].childend; break; case 1: si.hStdOutput = items[i].childend; break; case 2: si.hStdError = items[i].childend; break; } } memset(&pi, 0, sizeof(pi)); if (suppress_errors) { old_error_mode = SetErrorMode( SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX); } dwCreateFlags = NORMAL_PRIORITY_CLASS; if (!RuntimeOption::ServerExecutionMode()) { dwCreateFlags |= CREATE_NO_WINDOW; } char *envp = build_envp(enva); if (bypass_shell) { newprocok = CreateProcess( nullptr, strdup(cmd.c_str()), &security, &security, TRUE, dwCreateFlags, envp, scwd.c_str(), &si, &pi); } else { std::string command_with = "cmd.exe /c "; command_with += cmd.toCppString(); newprocok = CreateProcess( nullptr, strdup(command_with.c_str()), &security, &security, TRUE, dwCreateFlags, envp, scwd.c_str(), &si, &pi); } free(envp); if (suppress_errors) { SetErrorMode(old_error_mode); } if (newprocok == FALSE) { DWORD dw = GetLastError(); char* msg; FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, dw, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&msg, 0, nullptr); /* clean up all the descriptors */ for (size_t i = 0; i < items.size(); i++) { CloseHandle(items[i].childend); if (items[i].parentend) { CloseHandle(items[i].parentend); } } raise_warning("CreateProcess failed, error code - %u: %s", dw, msg); LocalFree(msg); return false; } childHandle = pi.hProcess; DWORD child = pi.dwProcessId; CloseHandle(pi.hThread); return post_proc_open(cmd, pipes, enva, items, (pid_t)child, childHandle); #else pid_t child; if (LightProcess::Available()) { // light process available // there is no need to do any locking, because the forking is delegated // to the light process if (!pre_proc_open(descriptorspec, items)) return false; const int item_size = items.size(); std::vector<int> created; created.reserve(item_size); std::vector<int> intended; intended.reserve(item_size); for (int i = 0; i < item_size; i++) { const auto& item = items[i]; created.push_back(item.childend); intended.push_back(item.index); } std::vector<std::string> envs; for (ArrayIter iter(enva); iter; ++iter) { StringBuffer nvpair; nvpair.append(iter.first().toString()); nvpair.append('='); nvpair.append(iter.second().toString()); std::string tmp = nvpair.detach().c_str(); if (tmp.find('\n') == std::string::npos) { envs.push_back(tmp); } } child = LightProcess::proc_open(cmd.c_str(), created, intended, scwd.c_str(), envs); assert(child); return post_proc_open(cmd, pipes, enva, items, child); } else { /* the unix way */ Lock lock(DescriptorItem::s_mutex); if (!pre_proc_open(descriptorspec, items)) return false; child = fork(); if (child) { // the parent process return post_proc_open(cmd, pipes, enva, items, child); } } assert(child == 0); /* this is the child process */ /* close those descriptors that we just opened for the parent stuff, * dup new descriptors into required descriptors and close the original * cruft */ for (auto& item : items) { item.dupChild(); } if (scwd.length() > 0 && chdir(scwd.c_str())) { // chdir failed, the working directory remains unchanged } std::vector<String> senvs; // holding those char * char **envp = build_envp(enva, senvs); execle("/bin/sh", "sh", "-c", cmd.data(), nullptr, envp); free(envp); _exit(127); #endif }
bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input , const std::vector<FactorType> &output , const string &filePath , const vector<float> &weight , size_t tableLimit , const LMList &languageModels , float weightWP) { const_cast<LMList&>(languageModels).InitializeBeforeSentenceProcessing(); const StaticData &staticData = StaticData::Instance(); m_tableLimit = tableLimit; util::FilePiece inFile(filePath.c_str(), staticData.GetVerboseLevel() >= 1 ? &std::cerr : NULL); size_t line_num = 0; size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info const std::string& factorDelimiter = staticData.GetFactorDelimiter(); Phrase sourcePhrase(0); std::vector<float> scv; scv.reserve(m_numScoreComponent); TargetPhraseCollection *preSourceNode = NULL; std::string preSourceString; while(true) { ++line_num; StringPiece line; try { line = inFile.ReadLine(); } catch (util::EndOfFileException &e) { break; } util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter("|||")); StringPiece sourcePhraseString(GrabOrDie(pipes, filePath, line_num)); StringPiece targetPhraseString(GrabOrDie(pipes, filePath, line_num)); StringPiece scoreString(GrabOrDie(pipes, filePath, line_num)); bool isLHSEmpty = !util::TokenIter<util::AnyCharacter, true>(sourcePhraseString, util::AnyCharacter(" \t")); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( filePath << ":" << line_num << ": pt entry contains empty source, skipping\n"); continue; } //target std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase()); targetPhrase->CreateFromString(output, targetPhraseString, factorDelimiter); scv.clear(); for (util::TokenIter<util::AnyCharacter, true> token(scoreString, util::AnyCharacter(" \t")); token; ++token) { char *err_ind; // Token is always delimited by some form of space. Also, apparently strtod is portable but strtof isn't. scv.push_back(FloorScore(TransformScore(static_cast<float>(strtod(token->data(), &err_ind))))); if (err_ind == token->data()) { stringstream strme; strme << "Bad number " << token << " on line " << line_num; UserMessage::Add(strme.str()); abort(); } } if (scv.size() != m_numScoreComponent) { stringstream strme; strme << "Size of scoreVector != number (" <<scv.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num; UserMessage::Add(strme.str()); abort(); } size_t consumed = 3; if (pipes) { targetPhrase->SetAlignmentInfo(*pipes++); ++consumed; } ScoreComponentCollection sparse; if (pipes) pipes++; //counts if (pipes) { //sparse features SparsePhraseDictionaryFeature* spdf = GetFeature()->GetSparsePhraseDictionaryFeature(); if (spdf) { sparse.Assign(spdf,(pipes++)->as_string()); } } // scv good to go sir! targetPhrase->SetScore(m_feature, scv, sparse, weight, weightWP, languageModels); // Check number of entries delimited by ||| agrees across all lines. for (; pipes; ++pipes, ++consumed) {} if (numElement != consumed) { if (numElement == NOT_FOUND) { numElement = consumed; } else { stringstream strme; strme << "Syntax error at " << filePath << ":" << line_num; UserMessage::Add(strme.str()); abort(); } } //TODO: Would be better to reuse source phrases, but ownership has to be //consistent across phrase table implementations sourcePhrase.Clear(); sourcePhrase.CreateFromString(input, sourcePhraseString, factorDelimiter); //Now that the source phrase is ready, we give the target phrase a copy targetPhrase->SetSourcePhrase(sourcePhrase); if (preSourceString == sourcePhraseString && preSourceNode) { preSourceNode->Add(targetPhrase.release()); } else { preSourceNode = CreateTargetPhraseCollection(sourcePhrase); preSourceNode->Add(targetPhrase.release()); preSourceString.assign(sourcePhraseString.data(), sourcePhraseString.size()); } } // sort each target phrase collection m_collection.Sort(m_tableLimit); /* // TODO ASK OLIVER WHY THIS IS NEEDED const_cast<LMList&>(languageModels).CleanUpAfterSentenceProcessing(); */ return true; }