bool CAgencyInfoRetriver::Init(const Stroka& strDoc2AgFile)
{
    if (!strDoc2AgFile.empty() && isexist(strDoc2AgFile.c_str()))
        m_DocId2AgNameFile = TFile(strDoc2AgFile.c_str(), OpenExisting | RdOnly);

    return true;
}
Exemple #2
0
 void WriteFile(
     const char *file,
     const vector<vector<double>> &vec,
     const char *head,
     const char *mes) {
     FILE *out = open(file, "w", mes);
     if(head) {
         Stroka tmp = Stroka(head) + "\n";
         fputs(tmp.c_str(), out);
     }
     if(vec.size() == 0)
         return;
     char Buf[256];
     for(size_t i = 0; i < vec[0].size(); i++) {
         for(size_t j = 0; j < vec.size(); j++) {
             if(vec[j][i] == NullNumber)
                 fputs("    \"\"      ", out);
             else {
                 sprintf(Buf, "%10.9g  ", vec[j][i]);
                 fputs(Buf, out);
             }
         }
         fputs("\x0d\x0a", out);
     }
     close(out);
 }
Exemple #3
0
bool    CWorkGrammar::SaveFirstAndFollowSets(Stroka GrammarFileName) const
{
    FILE * fp = fopen(GrammarFileName.c_str(), "wb");

    fprintf(fp, "FIRST sets:\n\n");

    for (int i = 0; i < (int)m_FirstSets.size(); i++) {
        fprintf(fp, "%s = { ", m_UniqueGrammarItems[i].m_ItemStrId.c_str());
        for (yset<size_t>::const_iterator it_term = m_FirstSets[i].begin(); it_term != m_FirstSets[i].end(); it_term++)
            fprintf(fp, "%s ", m_UniqueGrammarItems[*it_term].m_ItemStrId.c_str());
        fprintf(fp, "}\n");
    }

    fprintf(fp, "\n\nFOLLOW sets:\n\n");
    for (int i = 0; i < (int)m_FollowSets.size(); i++) {
        fprintf(fp, "%s = { ", m_UniqueGrammarItems[i].m_ItemStrId.c_str());
        for (yset<size_t>::const_iterator it_term = m_FollowSets[i].begin(); it_term != m_FollowSets[i].end(); it_term++)
            fprintf(fp, "%s ", m_UniqueGrammarItems[*it_term].m_ItemStrId.c_str());
        fprintf(fp, "}\n");
    }

    fclose(fp);

    return true;
};
Exemple #4
0
static void TestIconv(const Stroka& utf8, const Stroka& other, ECharset enc) {
    Wtroka wide0 = CharToWide(utf8, CODES_UTF8);
    Wtroka wide1 = CharToWide(other, enc);

    UNIT_ASSERT(wide0 == wide1);

    Stroka temp = WideToChar(wide0, CODES_UTF8);
    UNIT_ASSERT(temp == utf8);

    temp = WideToChar(wide0, enc);
    UNIT_ASSERT(temp == other);

    temp = Recode(enc, CODES_UTF8, other);
    UNIT_ASSERT(temp == utf8);

    temp = Recode(CODES_UTF8, enc, utf8);
    UNIT_ASSERT(temp == other);

    size_t read = 0;
    size_t written = 0;

    RECODE_RESULT res = RecodeToUnicode(enc, other.c_str(), wide1.begin(), other.size(), wide1.size(), read, written);
    UNIT_ASSERT(res == RECODE_OK);
    UNIT_ASSERT(read == other.size());
    UNIT_ASSERT(written == wide1.size());
    UNIT_ASSERT(wide0 == wide1);

    res = RecodeFromUnicode(enc, wide0.c_str(), temp.begin(), wide0.size(), temp.size(), read, written);
    UNIT_ASSERT(res == RECODE_OK);
    UNIT_ASSERT(read == wide0.size());
    UNIT_ASSERT(written == other.size());
    UNIT_ASSERT(temp == other);
}
void CLRCollectionSet::SaveStateCollection(Stroka GrammarFileName, const CWorkGrammar* p_WorkGrammar) const
{
    if (GrammarFileName.empty()) return;
    FILE * fp = fopen(GrammarFileName.c_str(), "wb");

    for (int i = 0; i < (int)m_ItemSets.size(); i++) {
        fprintf(fp, "I%i =\n", i);

        for (yset<CLRItem>::const_iterator it = m_ItemSets[i].begin(); it != m_ItemSets[i].end(); it++) {
            fprintf(fp, "\t%s -> ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_LeftPart].m_ItemStrId.c_str());

            for (int j = 0; j < (int)it->m_pRule->m_RightPart.m_Items.size(); j++) {
                if (j == (int)it->m_DotSymbolNo)
                    fprintf(fp, "; ");
                fprintf(fp, "%s ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_RightPart.m_Items[j]].m_ItemStrId.c_str());
            }

            if (it->m_DotSymbolNo == it->m_pRule->m_RightPart.m_Items.size())
                fprintf(fp, "; ");
            fprintf(fp, "\n");
        }
        fprintf(fp, "\n");
    }

    fprintf(fp, "\n");
    for (ymap< CStateAndSymbol, size_t>::const_iterator it_goto = m_GotoFunction.begin(); it_goto != m_GotoFunction.end(); it_goto++)
        fprintf(fp, "GOTO( I%" PRISZT ", %s ) = I%" PRISZT "\n", it_goto->first.m_StateNo, p_WorkGrammar->m_UniqueGrammarItems[it_goto->first.m_SymbolNo].m_ItemStrId.c_str(), it_goto->second);

    fclose(fp);
}
ECharset CCommonParm::ParseEncoding(const Stroka& encodingName) const {
    ECharset enc = CharsetByName(encodingName.c_str());
    if (enc == CODES_UNKNOWN)
        ythrow yexception() << "Unknown encoding: \"" << encodingName << "\"";

    return enc;
}
void PrintItemSet(const CWorkGrammar* pWorkGrammar, const yset<CLRItem>& ItemSet)
{
    for (yset<CLRItem>::const_iterator it = ItemSet.begin(); it != ItemSet.end(); it++) {
        Stroka s = pWorkGrammar->GetRuleStr(*it->m_pRule, it->m_DotSymbolNo);
        printf ("%s\n", s.c_str());
    };

};
ECharset CCommonParm::ParseEncoding(const Stroka& encodingName, ECharset& res) {
    ECharset enc = CharsetByName(encodingName.c_str());
    if (enc == CODES_UNKNOWN)
        m_strError += "\nUnkown encoding: \"" + encodingName + "\"";
    else
        res = enc;

    return enc;
}
bool TGztParser::ParseChainedFieldValues(TFieldValueDescriptorProto* value)
{
    DO(ParseSingleFieldValue(value));

    // try read several more values, interleaved with "+" or "|"
    if (!LookingAtListDelimiter())
        return true;

    // What was previously read into @value was actually a first item of chained list,
    // not a single value. So transform @value to a list and place its current content
    // as first sub-value of this list.


    // Re-use previuosly allocated items
    THolder<TFieldValueDescriptorProto> sub_value;
    if (value->mutable_list()->mutable_value()->ClearedCount() > 0) {
        sub_value.Reset(value->mutable_list()->mutable_value()->ReleaseCleared());
        sub_value->CopyFrom(*value);
        //sub_value->Swap(value);    -- Swap is unsafe here because it creates cycles for some reason!
    }
    else
        sub_value.Reset(new TFieldValueDescriptorProto(*value));

    value->Clear();

    value->set_type(TFieldValueDescriptorProto::TYPE_LIST);
    value->mutable_list()->mutable_value()->AddAllocated(sub_value.Release());

    // only single kind of separating token is allowed at single chained list.
    // so next we only accept a delimiters of same level which are equal to the first one.
    Stroka delimiter = CurrentToken().text;
    if (delimiter == "|")
        value->mutable_list()->set_type(TValuesListDescriptorProto::PIPE_DELIMITED);
    else if (delimiter == "+")
        value->mutable_list()->set_type(TValuesListDescriptorProto::PLUS_DELIMITED);
    else
        YASSERT(false);

    const char* delim_text = delimiter.c_str();
    while (TryConsume(delim_text))
        DO(ParseSingleFieldValue(value->mutable_list()->add_value()));

    // it is an error to meet any list delimiter here (as it will be mixed with previuos delimiters).
    if (!LookingAtListDelimiter())
        return true;
    else {
        AddError(Substitute("Distinct kinds of delimiters (\"$0\" and \"$1\") "
                            "should not be mixed in a single chained list at same level.",
                            delimiter, CurrentToken().text));
        return false;
    }
}
Exemple #10
0
void CProcessor::PrintSpeed(Stroka strUrl)
{
    if (DocCount % 50 == 0) {
        time_t curTime = time(&curTime);
        CTimeSpan cts(curTime - StartTime);
        double vm, tm;  //  Объем и время
        vm = (double) TotalVolume / 1048576.0;
        tm = (double) cts.GetTotalSeconds();
        double speed = (vm * 3600) / tm;
        Stroka intervalTime;
        intervalTime = Substitute("$0:$1:$2", cts.GetHours(), cts.GetMinutes(), cts.GetSeconds());

        double iTouchedSents = 0;
        if (SentenceCount != 0)
            iTouchedSents = 100 * TouchedSentenceCount / SentenceCount;

        Stroka suTime;
        suTime = Sprintf("Time:%s Doc:%lu Vol:%.2fMb Speed:%.0fMb/h (%s), Used sentences:%.2f%%",
            intervalTime.c_str(), DocCount, vm, speed, strUrl.c_str(), iTouchedSents);
        Clog << suTime << '\r';
    }
}
Exemple #11
0
 //  make vector of columns (for tableData)
 //  else - make vector of rows
 int ReadFile(
     const char *file,
     vector<vector<double>> &vec,
     const char *mes,
     int makeVectorOfColumns) {
     if(!file)
         throw info_except("File name is null?\n%s\n", mes);
     FILE *in = open(file, "r", mes);
     vec.clear();
     if(!in) {
         if(mes != NULL)
             throw info_except("Cannot open file %s.\n%s\n", file, mes);
         else
             return 0;
     }
     std::vector<Stroka> line;
     vector<double> cur;
     Stroka err = "";
     if(mes)
         err = mes;
     while(GetLine(in, line)) {
         cur.clear();
         for(size_t i = 0; i < line.size(); i++) {
             double f;
             if(!IsDouble(line[i].c_str(), f, 1)) {
                 cur.clear();
                 break;
             }
             cur.push_back(f);
         }
         if(cur.size() == 0)
             continue;
         if(makeVectorOfColumns) {
             if(vec.size() == 0)
                 vec.resize(cur.size());
             if(cur.size() != vec.size())
                 throw info_except(
                     "File <%s>\nstr <%s>\nsize of str %i vector size %i\n%s",
                     file,
                     Str::JoinLine(line).c_str(),
                     cur.size(),
                     vec.size(),
                     err.c_str());
             for(size_t i = 0; i < line.size(); i++)
                 vec[i].push_back(cur[i]);
         } else
             vec.push_back(cur);
     }
     close(in);
     return 1;
 }
 TImpl(const Stroka& style, const Stroka& base = "") {
     InitError();
     TxmlDocHolder sheetDoc(xmlParseMemory(~style, +style));
     if (!!base) {
         xmlNodeSetBase(sheetDoc->children, (xmlChar*)base.c_str());
     }
     if (!sheetDoc)
         ythrow yexception() << "cannot parse xml of xslt: " << ErrorMessage;
     Stylesheet.Reset(xsltParseStylesheetDoc(sheetDoc.Get()));
     if (!Stylesheet)
         ythrow yexception() << "cannot parse xslt: " << ErrorMessage;
     // sheetDoc - ownership transferred to Stylesheet
     sheetDoc.Release();
 }
TGramBitSet HumanGrammemsToGramBitSet(const TWtringBuf& strGrammems)
{
    Stroka str = WideToChar(~strGrammems, +strGrammems, CODES_WIN);
    VectorStrok gramTokens = splitStrokuBySet(str.c_str(), ",");

    TGramBitSet grammars;
    for (size_t j = 0; j < gramTokens.size(); j++) {
        if (gramTokens[j].empty())
            continue;
        TGrammar gr = TGrammarIndex::GetCode(gramTokens[j]);
        if (gr != gInvalid)
            grammars.Set(gr);
    }
    return grammars;
}
Exemple #14
0
size_t CWorkGrammar::GetCountOfRoots()  const
{
    size_t Result  = 0;
    Stroka Dump;
    for (size_t i=0; i<m_UniqueGrammarItems.size(); i++)
        if (m_UniqueGrammarItems[i].m_bGrammarRoot) {
            Result++;
            Dump += m_UniqueGrammarItems[i].GetDumpString();
            Dump += ",";
        }
    if (Result > 1)
        fprintf (stderr, "Roots : %s\n",Dump.c_str());

    return Result;
};
Exemple #15
0
void WriteToLogFile(const Stroka& sGrammarFileLog, Stroka& str, bool bRW)
{
    if (sGrammarFileLog.empty())
        return;

    str += '\n';

    THolder<TFile> f;
    if (bRW)
        f.Reset(new TFile(sGrammarFileLog, CreateAlways | WrOnly | Seq));
    else
        f.Reset(new TFile(sGrammarFileLog, OpenAlways | WrOnly | Seq | ForAppend));

    TFileOutput out(*f);
    out.Write(str.c_str());
};
Exemple #16
0
// adding a new root non-terminal "$NewRoot" and a special
// symbol for end of input ("$")
bool CWorkGrammar::AugmentGrammar(yvector<CRuleAgreement>& Agreements)
{
    if (GetCountOfRoots() != 1)
        ythrow yexception() << "A simple grammar should have only one root.";

    for (size_t i = 0; i < m_UniqueGrammarItems.size(); ++i)
        if (m_UniqueGrammarItems[i].m_ItemStrId == NEW_ROOT || m_UniqueGrammarItems[i].m_ItemStrId == END_OF_INPUT) {
            YASSERT(false);
            return false;
        }

    size_t rootIndex = 0;
    for (size_t i = 0; i < m_UniqueGrammarItems.size(); ++i)
        if (m_UniqueGrammarItems[i].m_bGrammarRoot) {
            rootIndex = i;
            break;
        }

    CGrammarItem I;

    //  adding a special symbol (end of input)
    I.m_ItemStrId = END_OF_INPUT;
    NStr::Assign(I.m_Lemma, END_OF_INPUT.c_str());
    I.m_Type = siString;
    m_UniqueGrammarItems.push_back(I);

    //  adding a new root
    I.m_ItemStrId = NEW_ROOT;
    I.m_Type = siMeta;
    m_UniqueGrammarItems.push_back(I);

    CWorkRule R;
    R.m_OriginalRuleNo = Agreements.size();
    Agreements.push_back(CRuleAgreement());

    R.m_LeftPart = m_UniqueGrammarItems.size() - 1;
    R.m_RightPart.m_SynMainItemNo = 0;
    R.m_RightPart.m_Items.push_back(rootIndex);
    R.m_RightPart.m_Items.push_back(m_UniqueGrammarItems.size() - 2);
    m_EncodedRules.insert(R);
    return true;
};
bool  CTarArchiveReader::Init(const Stroka& archiveName, const Stroka& _strDoc2AgFile, const Stroka& str_treat_as)
{
    stroka str_treat_as_ci(str_treat_as);   // case-insensitive
    try {
        Stroka strDoc2AgFile = _strDoc2AgFile;
        if (strDoc2AgFile.empty())
            strDoc2AgFile = archiveName + ".d2ag";

        CAgencyInfoRetriver::Init(strDoc2AgFile);

        if (str_treat_as_ci == "html")
            i_format = 4;
        else if (str_treat_as_ci == "text")
            i_format = 2;
        else
            ythrow yexception() << "unknown \"treat-as\" value: " << str_treat_as;

        tar_open(&p_TAR, archiveName.c_str());
        return true;
    } catch (yexception& e) {
        ythrow yexception() << "Error in \"CYndexArchiveReader::Init\" (" << e.what() << ")";
    }
}
Exemple #18
0
bool IsDir(const Stroka& path) {
    return isdir(path.c_str()) == 0;
}
Exemple #19
0
Stroka MatterStable::TwoPhaseBoundary::SetBnd(
    map<Stroka, vector<double>> &data,
    const Stroka &res_file_name,
    double SplMisfit,
    double NumSplPnt) {
    double Dmin = 1e10, Dmax = -1, Tmin = 1e10, Tmax = -1, Num = data["T"].size();
    for(double i = 0; i < Num; i++) {
        double t = data["T"][i], dmin = data["Dl_T"][i], dmax = data["Dr_T"][i];
        if(t > Tmax)
            Tmax = t;
        if(t < Tmin)
            Tmin = t;
        if(dmin < Dmin)
            Dmin = dmin;
        if(dmax > Dmax)
            Dmax = dmax;
    }
    TwoPhaseBoundary Bnd;
    Bnd.Dmax = Dmax;
    Bnd.Dmin = Dmin;
    Bnd.Tmax = Tmax;
    Bnd.Tmin = Tmin;
    Stroka ret;
    ret += MakeSpline(
        Bnd.P_T,
        NumSplPnt,
        SplMisfit * 0.0001,
        &(data["P_T"][0]),
        &(data["T"][0]),
        Num,
        "P_T");
    ret += MakeSpline(
        Bnd.E_T, NumSplPnt, SplMisfit, &(data["E_T"][0]), &(data["T"][0]), Num, "E_T");
    ret += MakeSpline(
        Bnd.dPdT_T,
        NumSplPnt,
        SplMisfit * 1e-6,
        &(data["dPdT_T"][0]),
        &(data["T"][0]),
        Num,
        "dPdT_T");

    ret += MakeSpline(
        Bnd.Dr_T, NumSplPnt, SplMisfit, &(data["Dr_T"][0]), &(data["T"][0]), Num, "Dr_T");
    ret += MakeSpline(
        Bnd.Dl_T, NumSplPnt, SplMisfit, &(data["Dl_T"][0]), &(data["T"][0]), Num, "Dl_T");
    ret += MakeSpline(
        Bnd.DSr_T,
        NumSplPnt,
        SplMisfit,
        &(data["DSr_T"][0]),
        &(data["T"][0]),
        Num,
        "DSr_T");
    ret += MakeSpline(
        Bnd.DSl_T,
        NumSplPnt,
        SplMisfit,
        &(data["DSl_T"][0]),
        &(data["T"][0]),
        Num,
        "DSl_T");

    FilterTextOut out(res_file_name.c_str());
    Bnd.save_data_state(out);
    return ret;
};