コード例 #1
0
ファイル: dctcnv4.cpp プロジェクト: Filosoft/vabamorf
//
// 1. ring: teeme sõnaliigijärjendite 2ndtabeli
//
void TeeS6naLiikideTabel(
    CFSFileName &inFileName)
    {
    CPFSFile in;
    CFSbaseSTRING rida;
    //
    printf(FSTSTR("%s "), (const FSTCHAR *)inFileName);
    if(in.Open(inFileName, FSTSTR("rb"))==false)
        {
        printf(FSTSTR("%s: ei saa faili avatud\n"), (const FSTCHAR *)inFileName);
        exit( EXIT_FAILURE );
        }
    while(in.ReadLine(&rida)==true)
        {
        int idx;
        TeeSLL(&rida); // jupitame sisendrea
        if(sonaliikideMassiiv.Get(&sonaLiigiString, &idx)==NULL)    // pole 2ndtabelis...
            {                                                       // ...lisame
            if(sonaliikideMassiiv.AddClone(sonaLiigiString, idx)==NULL)
                {
                printf(FSTSTR("Ei saa lisada sõnaliigijärjendit\n"));
                assert( false );
                exit( EXIT_FAILURE );
                }
            }
        }
    in.Close();
    }
コード例 #2
0
void LEX2DCT::LoeTekstifailist(
    const TAGS2DCT& tags
    )
    {
    lexArr.Start(500,500);
    CPFSFile in;
    if(in.Open(FSTSTR("lex.txt"), FSTSTR("rb"))==false)
        throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                " ", "Jama andmefaili lex.txt avamisega");
    CFSAString rida;
    for(int reaNr=1; in.ReadLine(&rida)==true; reaNr++)
        {        
        LEXINF* lexInf=lexArr.AddPlaceHolder();
        // sõna [  N] tag1=prob1 ... tagN=probN
        rida.Trim();// white space eest-tagant maha
        rida+=' ';  // tühik lõppu
        int pos1=(int)rida.Find(' '), pos2, pos3;
        if(pos1<=0)
            throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                " ", "Jamane rida andmefailis lex.txt", (const char*)rida);
        if(rida[pos1+5]!=']' || rida[pos1+6]!=' ' || rida[pos1+7]=='\0')
            throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                " ", "Jamane rida andmefailis lex.txt", (const char*)rida);
        CFSAString tagStr, probStr;
        //CFSWString wTagStr;
        lexInf->str=rida.Mid(0,pos1);
        sscanf(((const char*)rida)+pos1+2, "%d", &(lexInf->n));
        lexInf->tagIdxProb=new LEXINF::LEXINFEL[lexInf->n];

        pos1+=7;
        for(int i=0; i<lexInf->n; i++)
            {
            if((pos2=(int)rida.Find('=', pos1))<=0)
                 throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                    " ", "Jamane rida andmefailis lex.txt", (const char*)rida);
            if((pos3=(int)rida.Find(' ',pos2))<=0)
                 throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                    " ", "Jamane rida andmefailis lex.txt", (const char*)rida);

            tagStr=rida.Mid(pos1, pos2-pos1);
            if((lexInf->tagIdxProb[i].tagIdx=tags.GetIdx(&tagStr))<0)
                throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                    " ", "Tundmatu ühestamismärgendandmefailis lex.txt", (const char*)rida);

            sscanf((const char*)rida+pos2+1, "%e", &(lexInf->tagIdxProb[i].tagProb));
            pos1=pos3+1;
            }
        for(int i=1; i<lexInf->n; i++)
            {
            assert(lexInf->tagIdxProb[i-1].tagIdx<lexInf->tagIdxProb[i].tagIdx);
            }
        printf("%d\r", lexArr.idxLast);
        }
    printf("%d\n", lexArr.idxLast);
    printf("Leksikoni järjestamine...");
    lexArr.Sort();
    printf("OK\n");
    }
コード例 #3
0
void TAGS2DCT::LoeTekstifailist(void)
    {
    CFSFileName fileName(FSTSTR("taglist.txt"));
    CPFSFile in;
    if(in.Open(fileName, FSTSTR("rb"))==false)
        throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__, __LINE__," ", "Ei suuda avada faili taglist.txt");
    TMPLPTRARRAYBIN<PCFSAString,CFSAString>::Start(100,10);
    CFSAString rida;
    PCFSAString tagStr;

    // Loeme märgendite loendi mällu
    while(in.ReadLine(&rida)==true)
        {
        tagStr=rida.Mid(4);
        tagStr.Trim();
        if(TMPLPTRARRAYBIN<PCFSAString,CFSAString>::AddClone(tagStr)==NULL)
            throw VEAD(ERR_HMM_MOOTOR, ERR_NOMEM, __FILE__, __LINE__," ");
        }
    in.Close();
    printf("Märgendite järjestamine...");
    // Garanteerime järjestatuse
    TMPLPTRARRAYBIN<PCFSAString,CFSAString>::Sort();
    // Kontrollime veel üle, et ikka tõesti järjestatud
    for(int i=1; i<idxLast; i++)
        {
        if(*(operator[](i-1)) >= *(operator[](i)))
            throw VEAD(ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__," ",
                "Jama märgendite järjekorraga andmefailis taglist.txt");
        }
    printf("OK\n");

    gramm1.Start(idxLast);
    CFSFileName fileName2(FSTSTR("margcnt.txt"));
    if(in.Open(fileName2, FSTSTR("rb"))==false)
        throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__, __LINE__," ", "Ei suuda avada faili margcnt.txt");

    for(int i=0; i<idxLast; i++)
        {
        if(in.ReadLine(&rida)==false)
             throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," ");
        int tyhikuPos=rida.Find(' ');
        if(tyhikuPos<=0)
            throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," ");
        CFSAString tag(rida.Left(tyhikuPos));
        if(tag!=*(operator[](i)))
            throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," ");
        int nKorda;
        if(sscanf(((const char*)rida)+tyhikuPos, "%d", &nKorda)!=1)
            throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," ");
        gramm1.Obj(i)=nKorda;
        }        
    }
コード例 #4
0
ファイル: dctcnv4.cpp プロジェクト: Filosoft/vabamorf
//
// Tükelda sisendfaili rida
//
void TeeSLL(CFSbaseSTRING *rida) // real alati 1 tühik lõpus
    {
    int tyveLopp, tykikeseAlgus, tykikeseLopp;

    rida->TrimRight();
    (*rida) += FSxSTR(" ");

    if((tyveLopp=rida->Find((FSxSTR(" "))[0]))== -1)
        {
        printf(FSTSTR("Ei leia tüve lõppu\n"));
        assert( false );
        }
    tyvi = rida->Left(tyveLopp);
    tykikeseAlgus=tyveLopp+1;

    sonaLiigiString=(const FSxCHAR*)(rida->Mid(tykikeseAlgus, 1));
    if((*rida)[tykikeseAlgus+1] != (FSxSTR("="))[0])
        {
        printf(FSTSTR("Võrdusmärk puudu\n"));
        assert( false );        
        }
    if((tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus))== -1)
        {
        printf(FSTSTR("Jama\n"));
        assert( false );        
        }
    tyveInf=rida->Mid(tykikeseAlgus+2, tykikeseLopp-tykikeseAlgus-2);
    tykikeseAlgus=tykikeseLopp+1;
    tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus);
    
    while((*rida)[tykikeseAlgus] != 0)
        {
        sonaLiigiString +=(const FSxCHAR*)(rida->Mid(tykikeseAlgus, 1));
        if((*rida)[tykikeseAlgus+1] != (FSxSTR("="))[0])
            {
            printf(FSTSTR("Võrdusmärk puudu\n"));
            assert( false );        
            }
        if((tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus))== -1)
            {
            printf(FSTSTR("Jama\n"));
            assert( false );        
            }
        tyveInf += FSxSTR(" ");
        tyveInf += rida->Mid(tykikeseAlgus+2, tykikeseLopp-tykikeseAlgus-2);
        tykikeseAlgus=tykikeseLopp+1;
        tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus);
        }
    }
コード例 #5
0
ファイル: stlspeller.cpp プロジェクト: estnltk/estnltk
int main(int argc, char* argv[])
#endif
{
	FSUNUSED(argc);
	FSUNUSED(argv);

	try {
		vabamorf::speller spl(FSTSTR("et.dct"));

		for (size_t i = 0; words[i]; i++) {
			std::wcout << words[i] << L" -- ";
			if (spl.spell(words[i])) {
				std::wcout << L"OK\n";
			} else {
				std::wcout << L"Vigane, soovitan:";
				std::vector<std::wstring> suggs = spl.suggest(words[i]);
				for (size_t j = 0; j < suggs.size(); j++) {
					std::wcout << L" " << suggs[j];
				}
				std::wcout << L"\n";
			}
		}

	} catch (const vabamorf::exception &) {
		std::wcerr << L"Viga!\n";
	}
	return 0;
}
コード例 #6
0
ファイル: fsreg.cpp プロジェクト: Filosoft/vabamorf
int CFSReg::IniRead(const CFSString &szFileName, const CFSString &szSectionName, const CFSString &szVarName, CFSString *pszData)
{
	long lDataLen=1024;
	int iRes=(GetPrivateProfileString(szSectionName, szVarName, FSTSTR(""), pszData->GetBuffer(lDataLen), lDataLen, szFileName)==0);
	pszData->ReleaseBuffer();
	return iRes;
}
コード例 #7
0
ファイル: json.cpp プロジェクト: urdvr/vabamorf
CFSVar CJSONReader::ReadConst()
{
	CFSAString szStr=ReadText();
	if (szStr=="true") return CFSVar(true);
	if (szStr=="false") return CFSVar(true);
	if (szStr=="null") return CFSVar();
	throw CJSONException(CFSString(FSTSTR("Unknown constant '")) + FSStrAtoT(szStr, FSCP_UTF8) + FSTSTR("'"));
}
コード例 #8
0
ファイル: json.cpp プロジェクト: urdvr/vabamorf
CFSVar CJSONReader::Read()
{
	m_iCollectData=1;
	GetChar(true);
	CFSVar Data=ReadVal("");
	if (m_cCh) throw CJSONException(FSTSTR("Partially parsed file"));
	return Data;
}
コード例 #9
0
ファイル: t3tagspre.cpp プロジェクト: theranger/vabamorf
void T3TAGSPRE::TagsFromCooked(
    const CFSFileName& fileName,
    const PFSCODEPAGE codePage)
    {
    VOTAFAILIST in(fileName, FSTSTR("rb"), codePage);
    FSXSTRING rida, sona;
    FSXSTRING margend, *rec;
    int algus, lopp, idx, i, nRida=0;
    TMPLPTRARRAYBIN<FSXSTRING,CFSWString> mrgndid(130,50);

    margend=FSWSTR("***VAHE***");   //lausevahe iga märgendite loend peab sisaldama seda
    rec=mrgndid.AddClone(margend);
    assert(rec!=NULL); // märgendi lisamine äpardus

    margend=FSWSTR("X");             //iga märgendite loend peab sisaldama seda
    rec=mrgndid.AddClone(margend);
    assert(rec!=NULL); // märgendi lisamine äpardus

    //printf("%10d/%3d  -- reast/märgendit\r", nRida, mrgndid.idxLast);
    for(nRida=0; in.Rida(rida)==true; nRida++)
        {
        //printf("%10d/%3d\r", nRida, mrgndid.idxLast);
        rida.Trim();
        rida += FSWSTR(" ");
        for(algus=0; (lopp=(int)(rida.Find((FSWCHAR)' ', algus)))>0; algus=lopp+1)
            {
            sona=rida.Mid(algus, lopp-algus);
            algus=lopp+1;
            lopp=(int)(rida.Find((FSWCHAR)' ', algus));
            //assert(lopp > 0);
            if(lopp <= 0)
                throw VEAD(__FILE__, __LINE__, "Vigane COOKED-fail");
            margend=(rida.Mid(algus, lopp-algus));

            rec=mrgndid.Get(&margend, &idx);
            if(rec==NULL) // sellist veel polnud, tuleb lisada idx-indaks
                {
                rec=mrgndid.AddClone(margend, idx);
                assert(rec!=NULL); // märgendi lisamine apardus
                }
            }
        }
    //printf("%10d/%3d  -- reast/märgendit\n", nRida, mrgndid.idxLast);
    // Tõstame märgendid ümber

    for(i=1; i<mrgndid.idxLast; i++)
        {
        if(*(mrgndid[i-1]) >= *(mrgndid[i]))
            throw VEAD( ERR_HMM_MOOTOR, ERR_OPN, __FILE__, __LINE__," ",
                                                      "jama märgendite järjestusega");
        }
    for(i=0; i<mrgndid.idxLast; i++)
        {
        margendid.AddClone(*(mrgndid[i]));
        }
    margendid.Sort();
    }
コード例 #10
0
ファイル: strcnv.cpp プロジェクト: theranger/vabamorf
void CONV_HTML_UC2::Start(
    const FSTCHAR* path,
    const bool _ignoramp_,
    const bool _autosgml_
)
{
    ignoramp=_ignoramp_;
    autosgml=_autosgml_;
    if(path!=NULL) // v�tame loendi failist
    {
        CFSString tabeliFailiNimi(FSTSTR("sgml-uc-cnv.txt"));
        CFSString tabeliFailiPikkNimi;
        CFSString p(path);
        // Otsime �les, millises kataloogis teisendustabel
        if(Which(&tabeliFailiPikkNimi, &p, &tabeliFailiNimi)==false)
            throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__,__LINE__, "$Revision: 557 $",
                       "Ei leia SGML olemite faili sgml-uc-cnv.txt");
        // Avame teisenustabelit sisaldaa faili
        CPFSFile tabeliFail;
        if(tabeliFail.Open(tabeliFailiPikkNimi, FSTSTR("rb"))==false)
            throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__,__LINE__, "$Revision: 557 $"
                       "Ei suuda avada SGML olemite faili"
#if !defined( _UNICODE )
                       ,(const char*)tabeliFailiPikkNimi
#endif
                      );
        // Loeme failist teisendustabeli m�llu
        SGML_UC* rec;
        sgml2uc.Start(100,10);
        uc2sgml.Start(100,10);
        sgml_stringi_max_pikkus=0;
        int n;
        while((rec=sgml2uc.AddPlaceHolder())->Start(tabeliFail)==true)
        {
            uc2sgml.AddPtr(rec); // sellesse massiivi panema ainult viida
            if((n=(int)strlen(rec->sgml))>sgml_stringi_max_pikkus)
                sgml_stringi_max_pikkus=n;
        }
        sgml2uc.Del(); // kustutame  viimase, sest sinna ei �nnestunud lugeda
        sgml2uc.Sort(SGML_UC::sortBySGMLStr);   // selle massiivi j�rjestame SGML olemite j�rgi
        uc2sgml.Sort(SGML_UC::sortByUCchar);    // selle massiivi j�rjestame UNICODEi s�mbolite j�rgi
    }
}
コード例 #11
0
ファイル: dctcnv4.cpp プロジェクト: Filosoft/vabamorf
//
// 2.ring: indeksid asemele
//
void PaneIndeksidAsemele(
    CFSFileName &inFileName,
    CFSFileName &outFileName)
    {
    CPFSFile in, out;
    CFSbaseSTRING rida, uusRida;

    if(in.Open(inFileName, FSTSTR("rb"))==false)
        {
        printf(FSTSTR("%s: ei saa faili avatud\n"), (const FSTCHAR *)inFileName);
        exit( EXIT_FAILURE );
        }
    printf(FSTSTR("%s "), (const FSTCHAR *)outFileName);
    if(out.Open(outFileName, FSTSTR("wb+"))==false)
        {
        printf(FSTSTR("\n%s: ei saa faili luua\n"), (const FSTCHAR *)outFileName);
        exit( EXIT_FAILURE );
        }
    while(in.ReadLine(&rida)==true)
        {
        int idx;
        TeeSLL(&rida); // jupitame uuesti sisendrea
        if((idx=sonaliikideMassiiv.GetIdx(&sonaLiigiString)) < 0)
            {
            printf(FSTSTR("Ei leia juba tehtud sõnaliigijärjendit\n"));
            assert( false );        
            }
        // paneme uue rea kokku
        uusRida.Format(FSxSTR("%s %d=%s\n"), (const FSxCHAR*)tyvi, idx, (const FSxCHAR*)tyveInf);
        out.WriteString((const FSxCHAR *)(uusRida), uusRida.GetLength());
        }
    in.Close();
    out.Close();
    }
コード例 #12
0
ファイル: fsreg.cpp プロジェクト: Filosoft/vabamorf
int CFSReg::Split(const CFSString &szPath, HKEY *hRoot, CFSString *pszFolder, CFSString *pszFile)
{
	static struct _Roots{
		TCHAR *Name;
		HKEY hKey;
	}const Roots[]={
		{FSTSTR("HKEY_CLASSES_ROOT\\"), HKEY_CLASSES_ROOT},
		{FSTSTR("HKEY_CURRENT_USER\\"), HKEY_CURRENT_USER},
		{FSTSTR("HKEY_LOCAL_MACHINE\\"), HKEY_LOCAL_MACHINE},
		{FSTSTR("HKEY_USERS\\"), HKEY_USERS},
		{0, 0}
	};
	if (szPath.IsEmpty()) {
		return -1;
	}

	CFSString szFolder;
	for (INTPTR ip=0; Roots[ip].Name; ip++){
		if (szPath.StartsWith(Roots[ip].Name)) {
			*hRoot=Roots[ip].hKey;
			szFolder=szPath.Mid(FSStrLen(Roots[ip].Name));
			break;
		}
	}

	if (szFolder.IsEmpty()) {
		return -1;
	}
	INTPTR ipPos=szFolder.ReverseFind('\\');
	if (ipPos==-1) {
		*pszFolder=szFolder;
		pszFile->Empty();
	}
	else {
		*pszFolder=szFolder.Left(ipPos);
		*pszFile=szFolder.Mid(ipPos+1);
	}
	return 0;
}
コード例 #13
0
ファイル: linguistic.cpp プロジェクト: glensc/synthts_et
void CLinguistic::Open(const CFSFileName &FileName)
{
	if (m_pMorph) {
		throw CLinguisticException(CLinguisticException::MAINDICT, CLinguisticException::OPEN);
	}
	try {
		m_pMorph=new ETMRFAS(0, FileName, FSTSTR(""));
	} catch(const VEAD&) {
		Close();
		throw CLinguisticException(CLinguisticException::MAINDICT, CLinguisticException::UNDEFINED);
	} catch(...) {
		Close();
		throw;
	}
}
コード例 #14
0
ファイル: json.cpp プロジェクト: tpetmanson/jsvabamorf
CFSAString CJSONReader::ReadString()
{
	char cQuote=m_cCh;
	CFSAString szStr;
	while (GetChar()) {
		if (m_cCh=='\\') {
			szStr+=m_cCh;
			if (GetChar()) szStr+=m_cCh;
			else break;
		} else if (m_cCh==cQuote) {
			GetChar(true);
			return szStr;
		} else {
			szStr+=m_cCh;
		}
	}
	throw CJSONException(FSTSTR("Missing end of string"));
}
コード例 #15
0
ファイル: json.cpp プロジェクト: urdvr/vabamorf
CFSAString CJSONReader::ReadString()
{
	char cQuote=m_cCh;
	CFSAString szStr;
	while (GetChar()) {
		if (m_cCh=='\\') {
			if (!GetChar()) break;
			if (m_cCh=='\\') szStr+='\\';
			else if (m_cCh=='\'' && cQuote=='\'') szStr+='\'';
			else if (m_cCh=='"' && cQuote=='"') szStr+='"';
			else if (m_cCh=='n') szStr+='\n';
			else if (m_cCh=='r') szStr+='\r';
			else if (m_cCh=='t') szStr+='\t';
			else if (m_cCh=='b') szStr+='\b';
			else if (m_cCh=='f') szStr+='\f';
			else if (m_cCh=='u') {
				bool Error=false;
				wchar_t Char=0;
				for (INTPTR ip=0; !Error && ip<4; ip++) {
					Error=!GetChar();
					char Code=m_cCh;
					Char*=0x10;
					if (Code>='0' && Code<='9') Char+=Code-'0';
					else if (Code>='a' && Code<='f') Char+=Code-'f'+10;
					else if (Code>='A' && Code<='F') Char+=Code-'F'+10;
					else Error=true;
				}
				if (Error) break;
				szStr+=FSStrWtoA(Char, FSCP_UTF8);
			}
			else break;
		} else if (m_cCh==cQuote) {
			GetChar(true);
			return szStr;
		} else {
			szStr+=m_cCh;
		}
	}
	throw CJSONException(FSTSTR("Missing end of string"));
}
コード例 #16
0
void T3PAKISON::Run(void)
    {
    // "taglist.txt"
    // "lex.txt" - leksikon
    // "3grammid.txt"

    CFSFileName dctFileName(FSTSTR("et3.dct"));
    DCTMETASTRCT meta;
    meta.Creat(dctFileName);

    TAGS2DCT tags;
    tags.Run(meta);

    MKLASSID2DCT mklassid;
    mklassid.Run(meta, tags);

    LEX2DCT lex;
    lex.Run(meta, tags);

    NGRAMS2DCT ngrams;
    ngrams.Run(meta, tags);

    meta.Write();
    }
コード例 #17
0
ファイル: dctcnv4.cpp プロジェクト: Filosoft/vabamorf
int main(int argc, FSTCHAR **argv)
    {
    CFSFileName inDCT, outDCT, inPRF, outPRF, outSL;
    const FSTCHAR *outS6Nlaiend = FSTSTR(".s6n");
    //
    for(argc--, argv++; argc > 0; argc--, argv++)
        {
        if(argv[0][0]==(FSTCHAR)'-' && argv[0][1]==(FSTCHAR)'d')
            {
            inDCT=argv[0]+2;
            int punkt=inDCT.Find((FSTCHAR)'.');
            outDCT = (punkt== -1)
                            ? inDCT + outS6Nlaiend
                            : inDCT.Left(punkt) + outS6Nlaiend;
            }
        else if(argc > 0 && argv[0][0]==(FSTCHAR)'-' && argv[0][1]==(FSTCHAR)'p')
            {
            inPRF=argv[0]+2;
            int punkt=inPRF.Find((FSTCHAR)'.');
            outPRF = (punkt== -1)
                            ? inPRF + outS6Nlaiend
                            : inPRF.Left(punkt) + outS6Nlaiend;
            }
        else if(argc > 0 && argv[0][0]==(FSTCHAR)'-' && argv[0][1]==(FSTCHAR)'j')
            {
            outSL=argv[0]+2;
            }
        else
            {
            printf(FSTSTR("%s: jama lipp\n"), argv[0]);
            return EXIT_FAILURE;
            }
        }
    //
    // 1. ring: teeme sõnaliigujärjendite (2nd)tabeli
    //
    TeeS6naLiikideTabel(inDCT);
    TeeS6naLiikideTabel(inPRF);
    //
    // 2.ring: indeksid asemele
    //
    printf(FSTSTR("--> "));
    PaneIndeksidAsemele(inDCT, outDCT);
    PaneIndeksidAsemele(inPRF, outPRF);
    //
    // sõnaliikide 2ndtabel ka teksti faili
    // iga tabeli element ise real
    //
    int i;
    CPFSFile sl;
    printf(FSTSTR("%s\n"), (const FSTCHAR *)outSL);
    if(sl.Open(outSL, FSTSTR("wb+"))==false)
        {
        printf(FSTSTR("\n%s: ei saa faili luua\n"), (const FSTCHAR *)outSL);
        return EXIT_FAILURE;
        }
    RIDA *slptr;
    printf(FSTSTR("  %d erinevat sõnaliigijärjendit\n"), sonaliikideMassiiv.idxLast);
    for(i=0; (slptr=sonaliikideMassiiv[i])!=NULL; i++)
        {
        (*slptr) += FSxSTR("\n");
        const FSxCHAR *slstr = (const FSxCHAR *)(*slptr);
        const int len = slptr->GetLength();
        sl.WriteString(slstr, len);
        } 
    sl.Close();
    printf(FSTSTR("\n"));

    return EXIT_SUCCESS;
    }
コード例 #18
0
ファイル: noomen.cpp プロジェクト: theranger/vabamorf
int Tmain(int argc, FSTCHAR **argv)
    {

    CPFSFile infile;                   /*  viit sisendfailile  */
    CPFSFile outf, errf;              /*  viidad v@ljundfailidele */
    CPFSFile lgrf;                     /*  viit lõpugruppide failile LGR*/
    FSXSTRING inbuf;		/*  toodeldav rida  */
    FSXSTRING outbuf;		/*  toodeldav rida  */

    CFSFileName fn0=argv[1];
    CFSFileName fn1=argv[2];
    CFSFileName fn2=argv[3];
    CFSFileName fn3=FSTSTR("lgr");

    int  total, converted, unknown;
    int  k;
    int kk, era, k2, k3;
    FSXSTRING inbuf_era, inbuf_para_ind;

    converted = unknown = 0;
    if (argc != 4)
	    {
        printf("kirjuta paras arv parameetreid või keri poti laadale\n");
        return EXIT_FAILURE;
	    }
    if (infile.Open( fn0,       FSTSTR("rb" )) == false ||
          outf.Open( fn1,       FSTSTR("wb" )) == false ||
          errf.Open( fn2,       FSTSTR("wb" )) == false ||
          lgrf.Open( fn3, FSTSTR("ab+")) == false)
        {
	    printf( "Can't open files\n" ); // fopen
	    exit( 1 );
	    }
    loe_lgr( &lgrf );            /* loe lõpugrupid mällu */
    tyvi[0].tyv = FSxSTR("");

    for (total=0; infile.ReadLine(&inbuf)==true; total++)
        {
        kk = inbuf.Find(FSxSTR(".0!"));
	    if (kk!=-1)                      /* on 0-muuttyybi sõna */
	        {
	        tyybinr = 1;            /* lihtsalt et poleks 0 */
            k2 = inbuf.Find(FSxSTR("!\\"));
		    if (k2!=-1)
			    {
                FSXSTRING tmp;
                tmp = (const FSxCHAR *)inbuf.Mid(k2+2);
                k3 = tmp.Find(FSxSTR("\\"));
			    if (k3!=-1)
				    {
                    sliik = (const FSxCHAR *)tmp.Left(k3);
				    }
			    }
            if (k2==-1 || k3==-1)
			    {
			    unknown++;
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
				    {
				    printf("\ntulemuse kirjut. error-faili ebaõnnestus");
				    return EXIT_FAILURE;
				    }
			    continue;          /* vt järgmisi ridu */
			    }
		    k = nomnul( &inbuf );
	        if (!k)
		        {
		        unknown++;
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
	        converted++;
	        lisa_lgr( &lgrf );    /* leiab igale selle parad tyvele lgr nr */
	        tee_rida(&inbuf, &outbuf);            /* teeb rea nr|tyvi,lgr/tyvi,lgr... */
            if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false)
		        {
		        printf("\ntulemuse kirjut. väljundfaili ebaõnnestus");
		        return EXIT_FAILURE;
		        }
	        nullityv();            /* et saaks järgmist parad. teha */
	        continue;              /* et ei vaataks nii, nagu 'norm' ridu */
	        }
        era = inbuf.Find(FSxSTR("**"));
        inbuf_era = FSxSTR("");
        if (era != -1)
            inbuf_era = (const FSxCHAR *)inbuf.Mid(era-1);
        inbuf = (const FSxCHAR *)inbuf.Left(inbuf.GetLength()-inbuf_era.GetLength());

        kk = inbuf.Find(FSxSTR("&&"));
        inbuf_para_ind = FSxSTR("");
        if (kk != -1)
            inbuf_para_ind = (const FSxCHAR *)inbuf.Mid(kk);
        inbuf = (const FSxCHAR *)inbuf.Left(inbuf.GetLength()-inbuf_para_ind.GetLength());
	    k = era_ind( &inbuf );
	    if (!k)                           /* oli mingi viga */
	        {
	        unknown++;
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		        {
		        printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		        return EXIT_FAILURE;
		        }
	        continue;          /* vt järgmisi ridu */
	        }
	    k = era_reegel( &inbuf );
	    if (!k)                           /* oli mingi viga */
	        {
	        unknown++;
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		        {
		        printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		        return EXIT_FAILURE;
		        }
	        continue;          /* vt järgmisi ridu */
	        }
	    k = era_tyvi( &inbuf );
	    if (!k)                           /* oli mingi viga */
	        {
	        unknown++;
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		        {
		        printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		        return EXIT_FAILURE;
		        }
	        continue;          /* vt järgmisi ridu */
	        }
	    k = tee_para( );
	    if (!k)                           /* oli mingi viga */
	        {
	        unknown++;
	        nullityv();            /* et saaks järgmist parad. teha */
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		        {
		        printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		        return EXIT_FAILURE;
		        }
	        continue;          /* vt järgmisi ridu */
	        }
	    if ( kk != -1 )                   /* leidub parall. indeks */
	        {
            inbuf_para_ind = (const FSxCHAR *)inbuf_para_ind.Mid(2);
            if (inbuf_para_ind.Find(FSxSTR("."))==-1)
		        {                /* => on uus sonaliik; => salvest. senine*/
		        lisa_lgr( &lgrf );  /*leiab igale selle parad tyvele lgr nr */
		        tee_rida(&inbuf, &outbuf);           /* teeb rea nr|tyvi,lgr/tyvi,lgr... */
                if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. väljundfaili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        nullityv();            /* et saaks järgmist parad. teha */
		        era_tyvi( &inbuf );     /* nullityv() oli ka tyved kaotand */
		        }
            k = era_ind(&inbuf_para_ind);
	        if (!k)                           /* oli mingi viga */
		        {
		        unknown++;
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
            k = era_reegel( &inbuf_para_ind );
	        if (!k)                           /* oli mingi viga */
		        {
		        unknown++;
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
            if (inbuf_para_ind.Find(FSxSTR("(")) == 0)
		        {
                for (k=0; tyvi[k].tyv.GetLength() > 0; k++)
		            tyvi[k].sulg = 1;
		        }
	        k = par_para( );
	        if (!k)                           /* oli mingi viga */
		        {
		        unknown++;
		        nullityv();            /* et saaks järgmist parad. teha */
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
	        }
	    if ( era != -1 )               /* leidub ka erandeid */
	        {
            inbuf_era = (const FSxCHAR *)inbuf_era.Mid(3);
	        k = nomerand( &inbuf_era );
	        if (!k)                           /* oli mingi viga */
		        {
		        unknown++;
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
	        }
	    converted++;
	    lisa_lgr( &lgrf );      /* leiab igale selle parad tyvele lgr nr */
	    tee_rida(&inbuf, &outbuf);            /* teeb rea nr|tyvi,lgr/tyvi,lgr... */
        if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false)
	        {
	        printf("\ntulemuse kirjut. väljundfaili ebaõnnestus");
	        return EXIT_FAILURE;
	        }
	    nullityv();            /* et saaks järgmist parad. teha */

	    /*STAT:*/    /* tooseisu v@ljastamine ekraanile */
	    if ( total % 100 == 0 )
	        printf( "\r  KOKKU: %6d KORRAS: %6d SEGASEID: %6d ",
			      total, converted, unknown );
	    }

    /*FILE_END:*/
    printf( "\r  KOKKU: %6d KORRAS: %6d SEGASEID: %6d ",
			  total, converted, unknown );
    Tprintf( FSTSTR("** NOM: %s\n"), (const FSTCHAR*)fn0 );

    infile.Close();
    outf.Close();
    errf.Close();
    lgrf.Close();
	return EXIT_SUCCESS;
    }
コード例 #19
0
ファイル: verb.cpp プロジェクト: Filosoft/vabamorf
int main(int argc, FSTCHAR **argv)
{
    CPFSFile infile;                   /*  viit sisendfailile  */
    CPFSFile outf, errf;              /*  viidad v@ljundfailidele */
    CPFSFile lgrf;                     /*  viit lõpugruppide failile LGR*/
    FSXSTRING inbuf;		/*  toodeldav rida  */
    FSXSTRING outbuf;		/*  toodeldav rida  */

    CFSFileName fn_0, fn_1, fn_2;
    int  total, converted, unknown;
    int  k;
    int kk, era;
    FSXSTRING inbuf_era, inbuf_para_ind;

    total = converted = unknown = 0;
    if (argc != 4)
        {
        printf("pane argumendid taha !\n");
        return EXIT_FAILURE;
        }
    else
        {
        fn_0=argv[1];
        fn_1=argv[2];
        fn_2=argv[3];
        }

    if (infile.Open(fn_0,      FSTSTR("rb" )) == false ||
        outf.Open( fn_1,       FSTSTR("wb" )) == false ||
        errf.Open( fn_2,       FSTSTR("wb" )) == false ||
        lgrf.Open( FSTSTR("lgr"), FSTSTR("ab+")) == false)
        {
        printf( "Can't open files\n" );
        return EXIT_FAILURE;
        }
    loe_lgr( &lgrf );            /* loe lõpugrupid mällu */
    tyvi[0].tyv = FSxSTR("");


    for (total=0; infile.ReadLine(&inbuf)==true; total++)
        {
        era = inbuf.Find(FSxSTR("**"));
        if (era != -1)
            {
            tyybinr = 1;    /* lihtsalt et poleks 0; vaja tee_rida() jaoks */
            k = inbuf.Find(FSWSTR("!\\"));
            if ( k == -1)       /* polegi indeksit; ei saa olla... */
                sliik = FSWSTR("V");
            else
                {
                sliik = (const FSWCHAR *)inbuf.Mid(k+2);
                k = sliik.Find(FSWSTR("\\"));
                if ( k == -1)
                    {
                    printf("\nsonaliik sassis ");
                    return EXIT_FAILURE;
                    }
                sliik = (const FSWCHAR *)sliik.Left(k);
                }
            k = verbera( &inbuf );
            if (!k)                           /* oli mingi viga */
                {
                unknown++;
                nullityv();            /* et saaks järgmist parad. teha */
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
                    {
                    printf("\ntulemuse kirjut. error-faili ebaõnnestus");
                    return EXIT_FAILURE;
                    }
                continue;          /* vt järgmisi ridu */
                }
            converted++;
            lisa_lgr( &lgrf );      /* leiab igale selle parad tyvele lgr nr */
            tee_rida(&inbuf, &outbuf);            /* teeb rea nr|tyvi,lgr/tyvi,lgr... */
            if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false)
                {
                printf("\ntulemuse kirjut. väljundfaili ebaõnnestus");
                return EXIT_FAILURE;
                }
            nullityv();            /* et saaks järgmist parad. teha */
            continue;              /* vt järgmist kirjet */
            }
        kk = inbuf.Find(FSxSTR("&&"));
        inbuf_para_ind = FSxSTR("");
        if (kk != -1)
            inbuf_para_ind = (const FSxCHAR *)inbuf.Mid(kk);
        inbuf = (const FSxCHAR *)inbuf.Left(inbuf.GetLength()-inbuf_para_ind.GetLength());
        k = era_ind( &inbuf );
        if (!k)                           /* oli mingi viga */
            {
            unknown++;
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
                {
                printf("\ntulemuse kirjut. error-faili ebaõnnestus");
                return EXIT_FAILURE;
                }
            continue;          /* vt järgmisi ridu */
            }
        k = era_tyvi( &inbuf );
        if (!k)                           /* oli mingi viga */
            {
            unknown++;
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
                {
                printf("\ntulemuse kirjut. error-faili ebaõnnestus");
                return EXIT_FAILURE;
                }
            continue;          /* vt järgmisi ridu */
            }
        k = vteepar( );
        if (!k)                           /* oli mingi viga */
            {
            unknown++;
            nullityv();            /* et saaks järgmist parad. teha */
            if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
                {
                printf("\ntulemuse kirjut. error-faili ebaõnnestus");
                return EXIT_FAILURE;
                }
            continue;          /* vt järgmisi ridu */
	        }
	    if ( kk != -1 )                   /* leidub parall. indeks */
	        {
            inbuf_para_ind = (const FSxCHAR *)inbuf_para_ind.Mid(2);
            k = era_ind( &inbuf_para_ind );
	        if (!k)                           /* oli mingi viga */
		        {
		        unknown++;
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
	        k = vparpar( );
	        if (!k)                           /* oli mingi viga */
		        {
		        unknown++;
		        nullityv();            /* et saaks järgmist parad. teha */
                if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false)
		            {
		            printf("\ntulemuse kirjut. error-faili ebaõnnestus");
		            return EXIT_FAILURE;
		            }
		        continue;          /* vt järgmisi ridu */
		        }
	        }
	    converted++;
	    lisa_lgr( &lgrf );      /* leiab igale selle parad tyvele lgr nr */
	    tee_rida(&inbuf, &outbuf);            /* teeb rea nr|tyvi,lgr/tyvi,lgr... */
        if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false)
	        {
	        printf("\ntulemuse kirjut. väljundfaili ebaõnnestus");
	        return EXIT_FAILURE;
	        }
	    nullityv();            /* et saaks järgmist parad. teha */

	    /*STAT:*/    /* tooseisu v@ljastamine ekraanile */
	    if ( total % 100 == 0 )
	        printf( "\r  KOKKU: %6d KORRAS: %6d SEGASEID: %6d ",
			      total, converted, unknown );
	    }

   /* FILE_END:*/
    printf( "\r  KOKKU: %6d KORRAS: %6d SEGASEID: %6d ",
			  total, converted, unknown );
    Wprintf( FSTSTR("** VRB: %s\n"), (const FSTCHAR*)fn_0);
    infile.Close();
    outf.Close();
    errf.Close();
    lgrf.Close();
    return EXIT_SUCCESS;
}
コード例 #20
0
void MKLASSID2DCT::Run(
    DCTMETASTRCT& meta,     ///< Sõnastiku struktuurihoidla
    const TAGS2DCT& tags    ///< Ühestamismärgendite massiiv
    )
    {
    CPFSFile in;
    if(in.Open(FSTSTR("klassid.txt"), FSTSTR("rb"))==false)
        throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                " ", "Jama andmefaili klassid.txt avamisega");
    CFSAString rida;

    // tõsta kohe sõnastikku ümber...
    int n, reaNr, kokkuRidu;
    long pos=meta.Tell();
    meta.Add(DCTELEMID_T3M_KLASSID, pos);
    if(in.ReadLine(&rida)==false)
        throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                " ", "Jama andmefaili klassid.txt lugemisega");
    sscanf((const char*)rida, "%d", &kokkuRidu);
    if(meta.WriteUnsigned<UB4, int>(kokkuRidu)==false) // mitmsusklasside arv
        throw VEAD( ERR_HMM_MOOTOR, ERR_WRITE, __FILE__, __LINE__,
                " ", "Jama pakitud sõnastikku kirjutamisega");
    for(reaNr=1; in.ReadLine(&rida)==true; reaNr++)
        {
        printf("%06d:%06d\r", kokkuRidu, reaNr);
        int tyhik, vordus;
        rida += " ";
        if((tyhik=(int)rida.Find(' '))<0)
            throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                    " ", "Jama andmefaili  klassid.txt  lugemisega");
        if(sscanf((const char*)rida, "%d", &n)!=1)
            throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                    " ", "Jama andmefaili  klassid.txt  lugemisega");
        meta.WriteUnsigned<UB1, int>(n); // jooksva mitmesusklassi suurus
        for(int i=0; i<n; i++)
            {
            if((vordus=(int)rida.Find('=', tyhik+1))<=tyhik+1)
                throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                        " ", "Jama andmefaili  klassid.txt  lugemisega", (const char*)rida);

            CFSAString tagStr=rida.Mid(tyhik+1, vordus-tyhik-1);
            int tagIdx=tags.GetIdx(&tagStr);
            if(tagIdx<0)
                throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                    " ", "Tundmatu ühestamismärgend andmefailis  klassid.txt  ", 
                                                                            (const char*)rida);
            if(meta.WriteUnsigned<UB1,int>(tagIdx)==false)
                throw VEAD( ERR_HMM_MOOTOR, ERR_WRITE, __FILE__, __LINE__,
                        " ", "Jama pakitud sõnastikku kirjutamisega");

            UKAPROB tagProb;
            if(sscanf(((const char*)rida)+vordus+1, "%e", &tagProb)!=1)
                throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__,
                    " ", "Puuduv tõenäosus failis  klassid.txt  ", 
                                                                            (const char*)rida);

            if(meta.WriteBuffer(&tagProb,sizeof(UKAPROB))==false)
                throw VEAD( ERR_HMM_MOOTOR, ERR_WRITE, __FILE__, __LINE__,
                        " ", "Jama pakitud sõnastikku kirjutamisega");

            if((tyhik=(int)rida.Find(' ', tyhik+1))<=0)
                throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                        " ", "Jama andmefaili  klassid.txt  lugemisega");

            if(vordus >= tyhik)
                throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__,
                        " ", "Jama andmefaili  klassid.txt  lugemisega");
            }
        }
    printf("\n");
    }
コード例 #21
0
ファイル: json.cpp プロジェクト: urdvr/vabamorf
CFSVar CJSONReader::ReadVal(const CFSAString &szKeyPath)
{
	OnValReadStart(szKeyPath);
	CFSVar Data;

	if (m_cCh=='[') {
		Data.Cast(CFSVar::VAR_ARRAY);
		GetChar(true);
		INTPTR ipPos=0;
		for (;;) {
			if (m_cCh==0) {
				throw CJSONException(FSTSTR("Unexpetcted EOF"));
			} else if (m_cCh==']') {
				GetChar(true);
				break;
			} else if (ipPos>0) {
				if (m_cCh==',') {
					GetChar(true);
				} else {
					throw CJSONException(FSTSTR("Missing ',' in array"));
				}
			}

			CFSAString szKey;
			szKey.Format("%zd", ipPos);
			CFSVar Data1=ReadVal(szKeyPath+"/"+szKey);
			if (m_iCollectData>0) {
				Data[ipPos]=Data1;
			}
			ipPos++;
		}
	} else if (m_cCh=='{') {
		Data.Cast(CFSVar::VAR_MAP);
		GetChar(true);
		INTPTR ipPos=0;
		for (;;) {
			if (m_cCh==0) {
				throw CJSONException(FSTSTR("Unexpetcted EOF"));
			} else if (m_cCh=='}') {
				GetChar(true);
				break;
			} else if (ipPos>0) {
				if (m_cCh==',') {
					GetChar(true);
				} else {
					throw CJSONException(FSTSTR("Missing ',' in map"));
				}
			}

			CFSAString szKey;
			if (m_cCh=='\"' || m_cCh=='\'') {
				szKey=ReadString();
			} else if (FSIsLetter(m_cCh)) {
				szKey=ReadText();
			} else {
				throw CJSONException(FSTSTR("Expected key"));
			}
			if (m_cCh==':') {
				GetChar(true);
			} else {
				throw CJSONException(FSTSTR("Expected ':'"));
			}
			CFSVar Data1=ReadVal(szKeyPath+"/"+szKey);
			if (m_iCollectData>0) {
				Data[szKey]=Data1;
			}
			ipPos++;
		}
	} else if (m_cCh=='\"' || m_cCh=='\'') {
		Data=ReadString();
	} else if ((m_cCh>='0' && m_cCh<='9') || FSStrChr("-+.", m_cCh)) {
		Data=ReadNumber();
	} else if (FSIsLetter(m_cCh)) {
		Data=ReadConst();
	} else if (!m_cCh) {
	} else {
		throw CJSONException(FSTSTR("Unknown value type"));
	}

	OnValReadEnd(szKeyPath, Data);
	return Data;
}
コード例 #22
0
int Tmain(int argc, FSTCHAR** argv, FSTCHAR**envp)
    {
    return MainTemplate<T3PAKISON>(argc, argv, envp, FSTSTR(".t3"));
    }
コード例 #23
0
ファイル: t3tabelpre.cpp プロジェクト: theranger/vabamorf
void T3NGRAM::NGramsFromCooked(
    const CFSFileName& fileName,
    const PFSCODEPAGE codePage,
    const TMPLPTRARRAYBIN2<FSXSTRING,CFSWString>& margendid)
    {
    VOTAFAILIST in(fileName, FSTSTR("rb"), codePage);
    FSXSTRING rida, sona;
    FSXSTRING margend, *rec;
    int algus, lopp, idx, i, nRida=0, *iPtr;
    gramm1.Start(0, margendid.idxLast);
    gramm2.Start(0, margendid.idxLast, margendid.idxLast);
    gramm3.Start(0, margendid.idxLast, margendid.idxLast, margendid.idxLast);
    //printf("%10d reast ngrammid\r",nRida);
    for(nRida=0; in.Rida(rida)==true; nRida++)
        {
        //printf("%10d\r", nRida);
        TMPLPTRARRAY<int> lauseMargendid(30,10);
        rida.Trim();
        rida += FSWSTR(" ");
        for(algus=0, i=0; (lopp=(int)(rida.Find((FSWCHAR)' ', algus)))>0; algus=lopp+1, i++)
            {
            sona=rida.Mid(algus, lopp-algus);
            algus=lopp+1;
            lopp=(int)(rida.Find((FSWCHAR)' ', algus));
            assert(lopp > 0);
            margend=rida.Mid(algus, lopp-algus);

            rec=margendid.Get(&margend, &idx);
            assert(rec!=NULL); // tundmatu märgend

            iPtr=lauseMargendid.AddPlaceHolder();
            assert(iPtr!=NULL);

            *iPtr=idx; // lause i-ndale sõnale vastava märgendi indeks on idx
            }
        // terve lause märgendite indeksid olemas
        for(i=0; i<lauseMargendid.idxLast; i++)
            {
            nGrammeKokku[0]++;
            if(gramm1.Obj(*(lauseMargendid[i]))==0)
                {
                nGrammeErinevaid[0]++;
                }
            gramm1.Obj(*(lauseMargendid[i]))++;
            }
        for(i=1; i<lauseMargendid.idxLast; i++)
            {
            nGrammeKokku[1]++;
            if(gramm2.Obj(*(lauseMargendid[i-1]),*(lauseMargendid[i]))==0)
                {
                nGrammeErinevaid[1]++;
                }
            gramm2.Obj(*(lauseMargendid[i-1]),*(lauseMargendid[i]))++;
            }
        for(i=2; i<lauseMargendid.idxLast; i++)
            {
            nGrammeKokku[2]++;
            if(gramm3.Obj(*(lauseMargendid[i-2]),
                       *(lauseMargendid[i-1]),*(lauseMargendid[i]))==0)
                {
                nGrammeErinevaid[2]++;
                }
            gramm3.Obj(*(lauseMargendid[i-2]),
                       *(lauseMargendid[i-1]),*(lauseMargendid[i]))++;
            }
        }
    /*printf("%10d reast ngrammid |%6d/%6d|%6d/%6d|%6d/%6d|\n",
        nRida,
        nGrammeErinevaid[0],nGrammeKokku[0],
        nGrammeErinevaid[1],nGrammeKokku[1],
        nGrammeErinevaid[2],nGrammeKokku[2]);*/
    }