bool CAgencyInfoRetriver::Init(const Stroka& strDoc2AgFile) { if (!strDoc2AgFile.empty() && isexist(strDoc2AgFile.c_str())) m_DocId2AgNameFile = TFile(strDoc2AgFile.c_str(), OpenExisting | RdOnly); return true; }
void WriteFile( const char *file, const vector<vector<double>> &vec, const char *head, const char *mes) { FILE *out = open(file, "w", mes); if(head) { Stroka tmp = Stroka(head) + "\n"; fputs(tmp.c_str(), out); } if(vec.size() == 0) return; char Buf[256]; for(size_t i = 0; i < vec[0].size(); i++) { for(size_t j = 0; j < vec.size(); j++) { if(vec[j][i] == NullNumber) fputs(" \"\" ", out); else { sprintf(Buf, "%10.9g ", vec[j][i]); fputs(Buf, out); } } fputs("\x0d\x0a", out); } close(out); }
bool CWorkGrammar::SaveFirstAndFollowSets(Stroka GrammarFileName) const { FILE * fp = fopen(GrammarFileName.c_str(), "wb"); fprintf(fp, "FIRST sets:\n\n"); for (int i = 0; i < (int)m_FirstSets.size(); i++) { fprintf(fp, "%s = { ", m_UniqueGrammarItems[i].m_ItemStrId.c_str()); for (yset<size_t>::const_iterator it_term = m_FirstSets[i].begin(); it_term != m_FirstSets[i].end(); it_term++) fprintf(fp, "%s ", m_UniqueGrammarItems[*it_term].m_ItemStrId.c_str()); fprintf(fp, "}\n"); } fprintf(fp, "\n\nFOLLOW sets:\n\n"); for (int i = 0; i < (int)m_FollowSets.size(); i++) { fprintf(fp, "%s = { ", m_UniqueGrammarItems[i].m_ItemStrId.c_str()); for (yset<size_t>::const_iterator it_term = m_FollowSets[i].begin(); it_term != m_FollowSets[i].end(); it_term++) fprintf(fp, "%s ", m_UniqueGrammarItems[*it_term].m_ItemStrId.c_str()); fprintf(fp, "}\n"); } fclose(fp); return true; };
static void TestIconv(const Stroka& utf8, const Stroka& other, ECharset enc) { Wtroka wide0 = CharToWide(utf8, CODES_UTF8); Wtroka wide1 = CharToWide(other, enc); UNIT_ASSERT(wide0 == wide1); Stroka temp = WideToChar(wide0, CODES_UTF8); UNIT_ASSERT(temp == utf8); temp = WideToChar(wide0, enc); UNIT_ASSERT(temp == other); temp = Recode(enc, CODES_UTF8, other); UNIT_ASSERT(temp == utf8); temp = Recode(CODES_UTF8, enc, utf8); UNIT_ASSERT(temp == other); size_t read = 0; size_t written = 0; RECODE_RESULT res = RecodeToUnicode(enc, other.c_str(), wide1.begin(), other.size(), wide1.size(), read, written); UNIT_ASSERT(res == RECODE_OK); UNIT_ASSERT(read == other.size()); UNIT_ASSERT(written == wide1.size()); UNIT_ASSERT(wide0 == wide1); res = RecodeFromUnicode(enc, wide0.c_str(), temp.begin(), wide0.size(), temp.size(), read, written); UNIT_ASSERT(res == RECODE_OK); UNIT_ASSERT(read == wide0.size()); UNIT_ASSERT(written == other.size()); UNIT_ASSERT(temp == other); }
void CLRCollectionSet::SaveStateCollection(Stroka GrammarFileName, const CWorkGrammar* p_WorkGrammar) const { if (GrammarFileName.empty()) return; FILE * fp = fopen(GrammarFileName.c_str(), "wb"); for (int i = 0; i < (int)m_ItemSets.size(); i++) { fprintf(fp, "I%i =\n", i); for (yset<CLRItem>::const_iterator it = m_ItemSets[i].begin(); it != m_ItemSets[i].end(); it++) { fprintf(fp, "\t%s -> ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_LeftPart].m_ItemStrId.c_str()); for (int j = 0; j < (int)it->m_pRule->m_RightPart.m_Items.size(); j++) { if (j == (int)it->m_DotSymbolNo) fprintf(fp, "; "); fprintf(fp, "%s ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_RightPart.m_Items[j]].m_ItemStrId.c_str()); } if (it->m_DotSymbolNo == it->m_pRule->m_RightPart.m_Items.size()) fprintf(fp, "; "); fprintf(fp, "\n"); } fprintf(fp, "\n"); } fprintf(fp, "\n"); for (ymap< CStateAndSymbol, size_t>::const_iterator it_goto = m_GotoFunction.begin(); it_goto != m_GotoFunction.end(); it_goto++) fprintf(fp, "GOTO( I%" PRISZT ", %s ) = I%" PRISZT "\n", it_goto->first.m_StateNo, p_WorkGrammar->m_UniqueGrammarItems[it_goto->first.m_SymbolNo].m_ItemStrId.c_str(), it_goto->second); fclose(fp); }
ECharset CCommonParm::ParseEncoding(const Stroka& encodingName) const { ECharset enc = CharsetByName(encodingName.c_str()); if (enc == CODES_UNKNOWN) ythrow yexception() << "Unknown encoding: \"" << encodingName << "\""; return enc; }
void PrintItemSet(const CWorkGrammar* pWorkGrammar, const yset<CLRItem>& ItemSet) { for (yset<CLRItem>::const_iterator it = ItemSet.begin(); it != ItemSet.end(); it++) { Stroka s = pWorkGrammar->GetRuleStr(*it->m_pRule, it->m_DotSymbolNo); printf ("%s\n", s.c_str()); }; };
ECharset CCommonParm::ParseEncoding(const Stroka& encodingName, ECharset& res) { ECharset enc = CharsetByName(encodingName.c_str()); if (enc == CODES_UNKNOWN) m_strError += "\nUnkown encoding: \"" + encodingName + "\""; else res = enc; return enc; }
bool TGztParser::ParseChainedFieldValues(TFieldValueDescriptorProto* value) { DO(ParseSingleFieldValue(value)); // try read several more values, interleaved with "+" or "|" if (!LookingAtListDelimiter()) return true; // What was previously read into @value was actually a first item of chained list, // not a single value. So transform @value to a list and place its current content // as first sub-value of this list. // Re-use previuosly allocated items THolder<TFieldValueDescriptorProto> sub_value; if (value->mutable_list()->mutable_value()->ClearedCount() > 0) { sub_value.Reset(value->mutable_list()->mutable_value()->ReleaseCleared()); sub_value->CopyFrom(*value); //sub_value->Swap(value); -- Swap is unsafe here because it creates cycles for some reason! } else sub_value.Reset(new TFieldValueDescriptorProto(*value)); value->Clear(); value->set_type(TFieldValueDescriptorProto::TYPE_LIST); value->mutable_list()->mutable_value()->AddAllocated(sub_value.Release()); // only single kind of separating token is allowed at single chained list. // so next we only accept a delimiters of same level which are equal to the first one. Stroka delimiter = CurrentToken().text; if (delimiter == "|") value->mutable_list()->set_type(TValuesListDescriptorProto::PIPE_DELIMITED); else if (delimiter == "+") value->mutable_list()->set_type(TValuesListDescriptorProto::PLUS_DELIMITED); else YASSERT(false); const char* delim_text = delimiter.c_str(); while (TryConsume(delim_text)) DO(ParseSingleFieldValue(value->mutable_list()->add_value())); // it is an error to meet any list delimiter here (as it will be mixed with previuos delimiters). if (!LookingAtListDelimiter()) return true; else { AddError(Substitute("Distinct kinds of delimiters (\"$0\" and \"$1\") " "should not be mixed in a single chained list at same level.", delimiter, CurrentToken().text)); return false; } }
void CProcessor::PrintSpeed(Stroka strUrl) { if (DocCount % 50 == 0) { time_t curTime = time(&curTime); CTimeSpan cts(curTime - StartTime); double vm, tm; // Объем и время vm = (double) TotalVolume / 1048576.0; tm = (double) cts.GetTotalSeconds(); double speed = (vm * 3600) / tm; Stroka intervalTime; intervalTime = Substitute("$0:$1:$2", cts.GetHours(), cts.GetMinutes(), cts.GetSeconds()); double iTouchedSents = 0; if (SentenceCount != 0) iTouchedSents = 100 * TouchedSentenceCount / SentenceCount; Stroka suTime; suTime = Sprintf("Time:%s Doc:%lu Vol:%.2fMb Speed:%.0fMb/h (%s), Used sentences:%.2f%%", intervalTime.c_str(), DocCount, vm, speed, strUrl.c_str(), iTouchedSents); Clog << suTime << '\r'; } }
// make vector of columns (for tableData) // else - make vector of rows int ReadFile( const char *file, vector<vector<double>> &vec, const char *mes, int makeVectorOfColumns) { if(!file) throw info_except("File name is null?\n%s\n", mes); FILE *in = open(file, "r", mes); vec.clear(); if(!in) { if(mes != NULL) throw info_except("Cannot open file %s.\n%s\n", file, mes); else return 0; } std::vector<Stroka> line; vector<double> cur; Stroka err = ""; if(mes) err = mes; while(GetLine(in, line)) { cur.clear(); for(size_t i = 0; i < line.size(); i++) { double f; if(!IsDouble(line[i].c_str(), f, 1)) { cur.clear(); break; } cur.push_back(f); } if(cur.size() == 0) continue; if(makeVectorOfColumns) { if(vec.size() == 0) vec.resize(cur.size()); if(cur.size() != vec.size()) throw info_except( "File <%s>\nstr <%s>\nsize of str %i vector size %i\n%s", file, Str::JoinLine(line).c_str(), cur.size(), vec.size(), err.c_str()); for(size_t i = 0; i < line.size(); i++) vec[i].push_back(cur[i]); } else vec.push_back(cur); } close(in); return 1; }
TImpl(const Stroka& style, const Stroka& base = "") { InitError(); TxmlDocHolder sheetDoc(xmlParseMemory(~style, +style)); if (!!base) { xmlNodeSetBase(sheetDoc->children, (xmlChar*)base.c_str()); } if (!sheetDoc) ythrow yexception() << "cannot parse xml of xslt: " << ErrorMessage; Stylesheet.Reset(xsltParseStylesheetDoc(sheetDoc.Get())); if (!Stylesheet) ythrow yexception() << "cannot parse xslt: " << ErrorMessage; // sheetDoc - ownership transferred to Stylesheet sheetDoc.Release(); }
TGramBitSet HumanGrammemsToGramBitSet(const TWtringBuf& strGrammems) { Stroka str = WideToChar(~strGrammems, +strGrammems, CODES_WIN); VectorStrok gramTokens = splitStrokuBySet(str.c_str(), ","); TGramBitSet grammars; for (size_t j = 0; j < gramTokens.size(); j++) { if (gramTokens[j].empty()) continue; TGrammar gr = TGrammarIndex::GetCode(gramTokens[j]); if (gr != gInvalid) grammars.Set(gr); } return grammars; }
size_t CWorkGrammar::GetCountOfRoots() const { size_t Result = 0; Stroka Dump; for (size_t i=0; i<m_UniqueGrammarItems.size(); i++) if (m_UniqueGrammarItems[i].m_bGrammarRoot) { Result++; Dump += m_UniqueGrammarItems[i].GetDumpString(); Dump += ","; } if (Result > 1) fprintf (stderr, "Roots : %s\n",Dump.c_str()); return Result; };
void WriteToLogFile(const Stroka& sGrammarFileLog, Stroka& str, bool bRW) { if (sGrammarFileLog.empty()) return; str += '\n'; THolder<TFile> f; if (bRW) f.Reset(new TFile(sGrammarFileLog, CreateAlways | WrOnly | Seq)); else f.Reset(new TFile(sGrammarFileLog, OpenAlways | WrOnly | Seq | ForAppend)); TFileOutput out(*f); out.Write(str.c_str()); };
// adding a new root non-terminal "$NewRoot" and a special // symbol for end of input ("$") bool CWorkGrammar::AugmentGrammar(yvector<CRuleAgreement>& Agreements) { if (GetCountOfRoots() != 1) ythrow yexception() << "A simple grammar should have only one root."; for (size_t i = 0; i < m_UniqueGrammarItems.size(); ++i) if (m_UniqueGrammarItems[i].m_ItemStrId == NEW_ROOT || m_UniqueGrammarItems[i].m_ItemStrId == END_OF_INPUT) { YASSERT(false); return false; } size_t rootIndex = 0; for (size_t i = 0; i < m_UniqueGrammarItems.size(); ++i) if (m_UniqueGrammarItems[i].m_bGrammarRoot) { rootIndex = i; break; } CGrammarItem I; // adding a special symbol (end of input) I.m_ItemStrId = END_OF_INPUT; NStr::Assign(I.m_Lemma, END_OF_INPUT.c_str()); I.m_Type = siString; m_UniqueGrammarItems.push_back(I); // adding a new root I.m_ItemStrId = NEW_ROOT; I.m_Type = siMeta; m_UniqueGrammarItems.push_back(I); CWorkRule R; R.m_OriginalRuleNo = Agreements.size(); Agreements.push_back(CRuleAgreement()); R.m_LeftPart = m_UniqueGrammarItems.size() - 1; R.m_RightPart.m_SynMainItemNo = 0; R.m_RightPart.m_Items.push_back(rootIndex); R.m_RightPart.m_Items.push_back(m_UniqueGrammarItems.size() - 2); m_EncodedRules.insert(R); return true; };
bool CTarArchiveReader::Init(const Stroka& archiveName, const Stroka& _strDoc2AgFile, const Stroka& str_treat_as) { stroka str_treat_as_ci(str_treat_as); // case-insensitive try { Stroka strDoc2AgFile = _strDoc2AgFile; if (strDoc2AgFile.empty()) strDoc2AgFile = archiveName + ".d2ag"; CAgencyInfoRetriver::Init(strDoc2AgFile); if (str_treat_as_ci == "html") i_format = 4; else if (str_treat_as_ci == "text") i_format = 2; else ythrow yexception() << "unknown \"treat-as\" value: " << str_treat_as; tar_open(&p_TAR, archiveName.c_str()); return true; } catch (yexception& e) { ythrow yexception() << "Error in \"CYndexArchiveReader::Init\" (" << e.what() << ")"; } }
bool IsDir(const Stroka& path) { return isdir(path.c_str()) == 0; }
Stroka MatterStable::TwoPhaseBoundary::SetBnd( map<Stroka, vector<double>> &data, const Stroka &res_file_name, double SplMisfit, double NumSplPnt) { double Dmin = 1e10, Dmax = -1, Tmin = 1e10, Tmax = -1, Num = data["T"].size(); for(double i = 0; i < Num; i++) { double t = data["T"][i], dmin = data["Dl_T"][i], dmax = data["Dr_T"][i]; if(t > Tmax) Tmax = t; if(t < Tmin) Tmin = t; if(dmin < Dmin) Dmin = dmin; if(dmax > Dmax) Dmax = dmax; } TwoPhaseBoundary Bnd; Bnd.Dmax = Dmax; Bnd.Dmin = Dmin; Bnd.Tmax = Tmax; Bnd.Tmin = Tmin; Stroka ret; ret += MakeSpline( Bnd.P_T, NumSplPnt, SplMisfit * 0.0001, &(data["P_T"][0]), &(data["T"][0]), Num, "P_T"); ret += MakeSpline( Bnd.E_T, NumSplPnt, SplMisfit, &(data["E_T"][0]), &(data["T"][0]), Num, "E_T"); ret += MakeSpline( Bnd.dPdT_T, NumSplPnt, SplMisfit * 1e-6, &(data["dPdT_T"][0]), &(data["T"][0]), Num, "dPdT_T"); ret += MakeSpline( Bnd.Dr_T, NumSplPnt, SplMisfit, &(data["Dr_T"][0]), &(data["T"][0]), Num, "Dr_T"); ret += MakeSpline( Bnd.Dl_T, NumSplPnt, SplMisfit, &(data["Dl_T"][0]), &(data["T"][0]), Num, "Dl_T"); ret += MakeSpline( Bnd.DSr_T, NumSplPnt, SplMisfit, &(data["DSr_T"][0]), &(data["T"][0]), Num, "DSr_T"); ret += MakeSpline( Bnd.DSl_T, NumSplPnt, SplMisfit, &(data["DSl_T"][0]), &(data["T"][0]), Num, "DSl_T"); FilterTextOut out(res_file_name.c_str()); Bnd.save_data_state(out); return ret; };