// // 1. ring: teeme sõnaliigijärjendite 2ndtabeli // void TeeS6naLiikideTabel( CFSFileName &inFileName) { CPFSFile in; CFSbaseSTRING rida; // printf(FSTSTR("%s "), (const FSTCHAR *)inFileName); if(in.Open(inFileName, FSTSTR("rb"))==false) { printf(FSTSTR("%s: ei saa faili avatud\n"), (const FSTCHAR *)inFileName); exit( EXIT_FAILURE ); } while(in.ReadLine(&rida)==true) { int idx; TeeSLL(&rida); // jupitame sisendrea if(sonaliikideMassiiv.Get(&sonaLiigiString, &idx)==NULL) // pole 2ndtabelis... { // ...lisame if(sonaliikideMassiiv.AddClone(sonaLiigiString, idx)==NULL) { printf(FSTSTR("Ei saa lisada sõnaliigijärjendit\n")); assert( false ); exit( EXIT_FAILURE ); } } } in.Close(); }
void LEX2DCT::LoeTekstifailist( const TAGS2DCT& tags ) { lexArr.Start(500,500); CPFSFile in; if(in.Open(FSTSTR("lex.txt"), FSTSTR("rb"))==false) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili lex.txt avamisega"); CFSAString rida; for(int reaNr=1; in.ReadLine(&rida)==true; reaNr++) { LEXINF* lexInf=lexArr.AddPlaceHolder(); // sõna [ N] tag1=prob1 ... tagN=probN rida.Trim();// white space eest-tagant maha rida+=' '; // tühik lõppu int pos1=(int)rida.Find(' '), pos2, pos3; if(pos1<=0) throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Jamane rida andmefailis lex.txt", (const char*)rida); if(rida[pos1+5]!=']' || rida[pos1+6]!=' ' || rida[pos1+7]=='\0') throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Jamane rida andmefailis lex.txt", (const char*)rida); CFSAString tagStr, probStr; //CFSWString wTagStr; lexInf->str=rida.Mid(0,pos1); sscanf(((const char*)rida)+pos1+2, "%d", &(lexInf->n)); lexInf->tagIdxProb=new LEXINF::LEXINFEL[lexInf->n]; pos1+=7; for(int i=0; i<lexInf->n; i++) { if((pos2=(int)rida.Find('=', pos1))<=0) throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Jamane rida andmefailis lex.txt", (const char*)rida); if((pos3=(int)rida.Find(' ',pos2))<=0) throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Jamane rida andmefailis lex.txt", (const char*)rida); tagStr=rida.Mid(pos1, pos2-pos1); if((lexInf->tagIdxProb[i].tagIdx=tags.GetIdx(&tagStr))<0) throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Tundmatu ühestamismärgendandmefailis lex.txt", (const char*)rida); sscanf((const char*)rida+pos2+1, "%e", &(lexInf->tagIdxProb[i].tagProb)); pos1=pos3+1; } for(int i=1; i<lexInf->n; i++) { assert(lexInf->tagIdxProb[i-1].tagIdx<lexInf->tagIdxProb[i].tagIdx); } printf("%d\r", lexArr.idxLast); } printf("%d\n", lexArr.idxLast); printf("Leksikoni järjestamine..."); lexArr.Sort(); printf("OK\n"); }
void TAGS2DCT::LoeTekstifailist(void) { CFSFileName fileName(FSTSTR("taglist.txt")); CPFSFile in; if(in.Open(fileName, FSTSTR("rb"))==false) throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__, __LINE__," ", "Ei suuda avada faili taglist.txt"); TMPLPTRARRAYBIN<PCFSAString,CFSAString>::Start(100,10); CFSAString rida; PCFSAString tagStr; // Loeme märgendite loendi mällu while(in.ReadLine(&rida)==true) { tagStr=rida.Mid(4); tagStr.Trim(); if(TMPLPTRARRAYBIN<PCFSAString,CFSAString>::AddClone(tagStr)==NULL) throw VEAD(ERR_HMM_MOOTOR, ERR_NOMEM, __FILE__, __LINE__," "); } in.Close(); printf("Märgendite järjestamine..."); // Garanteerime järjestatuse TMPLPTRARRAYBIN<PCFSAString,CFSAString>::Sort(); // Kontrollime veel üle, et ikka tõesti järjestatud for(int i=1; i<idxLast; i++) { if(*(operator[](i-1)) >= *(operator[](i))) throw VEAD(ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__," ", "Jama märgendite järjekorraga andmefailis taglist.txt"); } printf("OK\n"); gramm1.Start(idxLast); CFSFileName fileName2(FSTSTR("margcnt.txt")); if(in.Open(fileName2, FSTSTR("rb"))==false) throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__, __LINE__," ", "Ei suuda avada faili margcnt.txt"); for(int i=0; i<idxLast; i++) { if(in.ReadLine(&rida)==false) throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," "); int tyhikuPos=rida.Find(' '); if(tyhikuPos<=0) throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," "); CFSAString tag(rida.Left(tyhikuPos)); if(tag!=*(operator[](i))) throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," "); int nKorda; if(sscanf(((const char*)rida)+tyhikuPos, "%d", &nKorda)!=1) throw VEAD(ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__," "); gramm1.Obj(i)=nKorda; } }
// // Tükelda sisendfaili rida // void TeeSLL(CFSbaseSTRING *rida) // real alati 1 tühik lõpus { int tyveLopp, tykikeseAlgus, tykikeseLopp; rida->TrimRight(); (*rida) += FSxSTR(" "); if((tyveLopp=rida->Find((FSxSTR(" "))[0]))== -1) { printf(FSTSTR("Ei leia tüve lõppu\n")); assert( false ); } tyvi = rida->Left(tyveLopp); tykikeseAlgus=tyveLopp+1; sonaLiigiString=(const FSxCHAR*)(rida->Mid(tykikeseAlgus, 1)); if((*rida)[tykikeseAlgus+1] != (FSxSTR("="))[0]) { printf(FSTSTR("Võrdusmärk puudu\n")); assert( false ); } if((tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus))== -1) { printf(FSTSTR("Jama\n")); assert( false ); } tyveInf=rida->Mid(tykikeseAlgus+2, tykikeseLopp-tykikeseAlgus-2); tykikeseAlgus=tykikeseLopp+1; tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus); while((*rida)[tykikeseAlgus] != 0) { sonaLiigiString +=(const FSxCHAR*)(rida->Mid(tykikeseAlgus, 1)); if((*rida)[tykikeseAlgus+1] != (FSxSTR("="))[0]) { printf(FSTSTR("Võrdusmärk puudu\n")); assert( false ); } if((tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus))== -1) { printf(FSTSTR("Jama\n")); assert( false ); } tyveInf += FSxSTR(" "); tyveInf += rida->Mid(tykikeseAlgus+2, tykikeseLopp-tykikeseAlgus-2); tykikeseAlgus=tykikeseLopp+1; tykikeseLopp=rida->Find((FSxSTR(" "))[0], tykikeseAlgus); } }
int main(int argc, char* argv[]) #endif { FSUNUSED(argc); FSUNUSED(argv); try { vabamorf::speller spl(FSTSTR("et.dct")); for (size_t i = 0; words[i]; i++) { std::wcout << words[i] << L" -- "; if (spl.spell(words[i])) { std::wcout << L"OK\n"; } else { std::wcout << L"Vigane, soovitan:"; std::vector<std::wstring> suggs = spl.suggest(words[i]); for (size_t j = 0; j < suggs.size(); j++) { std::wcout << L" " << suggs[j]; } std::wcout << L"\n"; } } } catch (const vabamorf::exception &) { std::wcerr << L"Viga!\n"; } return 0; }
int CFSReg::IniRead(const CFSString &szFileName, const CFSString &szSectionName, const CFSString &szVarName, CFSString *pszData) { long lDataLen=1024; int iRes=(GetPrivateProfileString(szSectionName, szVarName, FSTSTR(""), pszData->GetBuffer(lDataLen), lDataLen, szFileName)==0); pszData->ReleaseBuffer(); return iRes; }
CFSVar CJSONReader::ReadConst() { CFSAString szStr=ReadText(); if (szStr=="true") return CFSVar(true); if (szStr=="false") return CFSVar(true); if (szStr=="null") return CFSVar(); throw CJSONException(CFSString(FSTSTR("Unknown constant '")) + FSStrAtoT(szStr, FSCP_UTF8) + FSTSTR("'")); }
CFSVar CJSONReader::Read() { m_iCollectData=1; GetChar(true); CFSVar Data=ReadVal(""); if (m_cCh) throw CJSONException(FSTSTR("Partially parsed file")); return Data; }
void T3TAGSPRE::TagsFromCooked( const CFSFileName& fileName, const PFSCODEPAGE codePage) { VOTAFAILIST in(fileName, FSTSTR("rb"), codePage); FSXSTRING rida, sona; FSXSTRING margend, *rec; int algus, lopp, idx, i, nRida=0; TMPLPTRARRAYBIN<FSXSTRING,CFSWString> mrgndid(130,50); margend=FSWSTR("***VAHE***"); //lausevahe iga märgendite loend peab sisaldama seda rec=mrgndid.AddClone(margend); assert(rec!=NULL); // märgendi lisamine äpardus margend=FSWSTR("X"); //iga märgendite loend peab sisaldama seda rec=mrgndid.AddClone(margend); assert(rec!=NULL); // märgendi lisamine äpardus //printf("%10d/%3d -- reast/märgendit\r", nRida, mrgndid.idxLast); for(nRida=0; in.Rida(rida)==true; nRida++) { //printf("%10d/%3d\r", nRida, mrgndid.idxLast); rida.Trim(); rida += FSWSTR(" "); for(algus=0; (lopp=(int)(rida.Find((FSWCHAR)' ', algus)))>0; algus=lopp+1) { sona=rida.Mid(algus, lopp-algus); algus=lopp+1; lopp=(int)(rida.Find((FSWCHAR)' ', algus)); //assert(lopp > 0); if(lopp <= 0) throw VEAD(__FILE__, __LINE__, "Vigane COOKED-fail"); margend=(rida.Mid(algus, lopp-algus)); rec=mrgndid.Get(&margend, &idx); if(rec==NULL) // sellist veel polnud, tuleb lisada idx-indaks { rec=mrgndid.AddClone(margend, idx); assert(rec!=NULL); // märgendi lisamine apardus } } } //printf("%10d/%3d -- reast/märgendit\n", nRida, mrgndid.idxLast); // Tõstame märgendid ümber for(i=1; i<mrgndid.idxLast; i++) { if(*(mrgndid[i-1]) >= *(mrgndid[i])) throw VEAD( ERR_HMM_MOOTOR, ERR_OPN, __FILE__, __LINE__," ", "jama märgendite järjestusega"); } for(i=0; i<mrgndid.idxLast; i++) { margendid.AddClone(*(mrgndid[i])); } margendid.Sort(); }
void CONV_HTML_UC2::Start( const FSTCHAR* path, const bool _ignoramp_, const bool _autosgml_ ) { ignoramp=_ignoramp_; autosgml=_autosgml_; if(path!=NULL) // v�tame loendi failist { CFSString tabeliFailiNimi(FSTSTR("sgml-uc-cnv.txt")); CFSString tabeliFailiPikkNimi; CFSString p(path); // Otsime �les, millises kataloogis teisendustabel if(Which(&tabeliFailiPikkNimi, &p, &tabeliFailiNimi)==false) throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__,__LINE__, "$Revision: 557 $", "Ei leia SGML olemite faili sgml-uc-cnv.txt"); // Avame teisenustabelit sisaldaa faili CPFSFile tabeliFail; if(tabeliFail.Open(tabeliFailiPikkNimi, FSTSTR("rb"))==false) throw VEAD(ERR_X_TYKK, ERR_OPN, __FILE__,__LINE__, "$Revision: 557 $" "Ei suuda avada SGML olemite faili" #if !defined( _UNICODE ) ,(const char*)tabeliFailiPikkNimi #endif ); // Loeme failist teisendustabeli m�llu SGML_UC* rec; sgml2uc.Start(100,10); uc2sgml.Start(100,10); sgml_stringi_max_pikkus=0; int n; while((rec=sgml2uc.AddPlaceHolder())->Start(tabeliFail)==true) { uc2sgml.AddPtr(rec); // sellesse massiivi panema ainult viida if((n=(int)strlen(rec->sgml))>sgml_stringi_max_pikkus) sgml_stringi_max_pikkus=n; } sgml2uc.Del(); // kustutame viimase, sest sinna ei �nnestunud lugeda sgml2uc.Sort(SGML_UC::sortBySGMLStr); // selle massiivi j�rjestame SGML olemite j�rgi uc2sgml.Sort(SGML_UC::sortByUCchar); // selle massiivi j�rjestame UNICODEi s�mbolite j�rgi } }
// // 2.ring: indeksid asemele // void PaneIndeksidAsemele( CFSFileName &inFileName, CFSFileName &outFileName) { CPFSFile in, out; CFSbaseSTRING rida, uusRida; if(in.Open(inFileName, FSTSTR("rb"))==false) { printf(FSTSTR("%s: ei saa faili avatud\n"), (const FSTCHAR *)inFileName); exit( EXIT_FAILURE ); } printf(FSTSTR("%s "), (const FSTCHAR *)outFileName); if(out.Open(outFileName, FSTSTR("wb+"))==false) { printf(FSTSTR("\n%s: ei saa faili luua\n"), (const FSTCHAR *)outFileName); exit( EXIT_FAILURE ); } while(in.ReadLine(&rida)==true) { int idx; TeeSLL(&rida); // jupitame uuesti sisendrea if((idx=sonaliikideMassiiv.GetIdx(&sonaLiigiString)) < 0) { printf(FSTSTR("Ei leia juba tehtud sõnaliigijärjendit\n")); assert( false ); } // paneme uue rea kokku uusRida.Format(FSxSTR("%s %d=%s\n"), (const FSxCHAR*)tyvi, idx, (const FSxCHAR*)tyveInf); out.WriteString((const FSxCHAR *)(uusRida), uusRida.GetLength()); } in.Close(); out.Close(); }
int CFSReg::Split(const CFSString &szPath, HKEY *hRoot, CFSString *pszFolder, CFSString *pszFile) { static struct _Roots{ TCHAR *Name; HKEY hKey; }const Roots[]={ {FSTSTR("HKEY_CLASSES_ROOT\\"), HKEY_CLASSES_ROOT}, {FSTSTR("HKEY_CURRENT_USER\\"), HKEY_CURRENT_USER}, {FSTSTR("HKEY_LOCAL_MACHINE\\"), HKEY_LOCAL_MACHINE}, {FSTSTR("HKEY_USERS\\"), HKEY_USERS}, {0, 0} }; if (szPath.IsEmpty()) { return -1; } CFSString szFolder; for (INTPTR ip=0; Roots[ip].Name; ip++){ if (szPath.StartsWith(Roots[ip].Name)) { *hRoot=Roots[ip].hKey; szFolder=szPath.Mid(FSStrLen(Roots[ip].Name)); break; } } if (szFolder.IsEmpty()) { return -1; } INTPTR ipPos=szFolder.ReverseFind('\\'); if (ipPos==-1) { *pszFolder=szFolder; pszFile->Empty(); } else { *pszFolder=szFolder.Left(ipPos); *pszFile=szFolder.Mid(ipPos+1); } return 0; }
void CLinguistic::Open(const CFSFileName &FileName) { if (m_pMorph) { throw CLinguisticException(CLinguisticException::MAINDICT, CLinguisticException::OPEN); } try { m_pMorph=new ETMRFAS(0, FileName, FSTSTR("")); } catch(const VEAD&) { Close(); throw CLinguisticException(CLinguisticException::MAINDICT, CLinguisticException::UNDEFINED); } catch(...) { Close(); throw; } }
CFSAString CJSONReader::ReadString() { char cQuote=m_cCh; CFSAString szStr; while (GetChar()) { if (m_cCh=='\\') { szStr+=m_cCh; if (GetChar()) szStr+=m_cCh; else break; } else if (m_cCh==cQuote) { GetChar(true); return szStr; } else { szStr+=m_cCh; } } throw CJSONException(FSTSTR("Missing end of string")); }
CFSAString CJSONReader::ReadString() { char cQuote=m_cCh; CFSAString szStr; while (GetChar()) { if (m_cCh=='\\') { if (!GetChar()) break; if (m_cCh=='\\') szStr+='\\'; else if (m_cCh=='\'' && cQuote=='\'') szStr+='\''; else if (m_cCh=='"' && cQuote=='"') szStr+='"'; else if (m_cCh=='n') szStr+='\n'; else if (m_cCh=='r') szStr+='\r'; else if (m_cCh=='t') szStr+='\t'; else if (m_cCh=='b') szStr+='\b'; else if (m_cCh=='f') szStr+='\f'; else if (m_cCh=='u') { bool Error=false; wchar_t Char=0; for (INTPTR ip=0; !Error && ip<4; ip++) { Error=!GetChar(); char Code=m_cCh; Char*=0x10; if (Code>='0' && Code<='9') Char+=Code-'0'; else if (Code>='a' && Code<='f') Char+=Code-'f'+10; else if (Code>='A' && Code<='F') Char+=Code-'F'+10; else Error=true; } if (Error) break; szStr+=FSStrWtoA(Char, FSCP_UTF8); } else break; } else if (m_cCh==cQuote) { GetChar(true); return szStr; } else { szStr+=m_cCh; } } throw CJSONException(FSTSTR("Missing end of string")); }
void T3PAKISON::Run(void) { // "taglist.txt" // "lex.txt" - leksikon // "3grammid.txt" CFSFileName dctFileName(FSTSTR("et3.dct")); DCTMETASTRCT meta; meta.Creat(dctFileName); TAGS2DCT tags; tags.Run(meta); MKLASSID2DCT mklassid; mklassid.Run(meta, tags); LEX2DCT lex; lex.Run(meta, tags); NGRAMS2DCT ngrams; ngrams.Run(meta, tags); meta.Write(); }
int main(int argc, FSTCHAR **argv) { CFSFileName inDCT, outDCT, inPRF, outPRF, outSL; const FSTCHAR *outS6Nlaiend = FSTSTR(".s6n"); // for(argc--, argv++; argc > 0; argc--, argv++) { if(argv[0][0]==(FSTCHAR)'-' && argv[0][1]==(FSTCHAR)'d') { inDCT=argv[0]+2; int punkt=inDCT.Find((FSTCHAR)'.'); outDCT = (punkt== -1) ? inDCT + outS6Nlaiend : inDCT.Left(punkt) + outS6Nlaiend; } else if(argc > 0 && argv[0][0]==(FSTCHAR)'-' && argv[0][1]==(FSTCHAR)'p') { inPRF=argv[0]+2; int punkt=inPRF.Find((FSTCHAR)'.'); outPRF = (punkt== -1) ? inPRF + outS6Nlaiend : inPRF.Left(punkt) + outS6Nlaiend; } else if(argc > 0 && argv[0][0]==(FSTCHAR)'-' && argv[0][1]==(FSTCHAR)'j') { outSL=argv[0]+2; } else { printf(FSTSTR("%s: jama lipp\n"), argv[0]); return EXIT_FAILURE; } } // // 1. ring: teeme sõnaliigujärjendite (2nd)tabeli // TeeS6naLiikideTabel(inDCT); TeeS6naLiikideTabel(inPRF); // // 2.ring: indeksid asemele // printf(FSTSTR("--> ")); PaneIndeksidAsemele(inDCT, outDCT); PaneIndeksidAsemele(inPRF, outPRF); // // sõnaliikide 2ndtabel ka teksti faili // iga tabeli element ise real // int i; CPFSFile sl; printf(FSTSTR("%s\n"), (const FSTCHAR *)outSL); if(sl.Open(outSL, FSTSTR("wb+"))==false) { printf(FSTSTR("\n%s: ei saa faili luua\n"), (const FSTCHAR *)outSL); return EXIT_FAILURE; } RIDA *slptr; printf(FSTSTR(" %d erinevat sõnaliigijärjendit\n"), sonaliikideMassiiv.idxLast); for(i=0; (slptr=sonaliikideMassiiv[i])!=NULL; i++) { (*slptr) += FSxSTR("\n"); const FSxCHAR *slstr = (const FSxCHAR *)(*slptr); const int len = slptr->GetLength(); sl.WriteString(slstr, len); } sl.Close(); printf(FSTSTR("\n")); return EXIT_SUCCESS; }
int Tmain(int argc, FSTCHAR **argv) { CPFSFile infile; /* viit sisendfailile */ CPFSFile outf, errf; /* viidad v@ljundfailidele */ CPFSFile lgrf; /* viit lõpugruppide failile LGR*/ FSXSTRING inbuf; /* toodeldav rida */ FSXSTRING outbuf; /* toodeldav rida */ CFSFileName fn0=argv[1]; CFSFileName fn1=argv[2]; CFSFileName fn2=argv[3]; CFSFileName fn3=FSTSTR("lgr"); int total, converted, unknown; int k; int kk, era, k2, k3; FSXSTRING inbuf_era, inbuf_para_ind; converted = unknown = 0; if (argc != 4) { printf("kirjuta paras arv parameetreid või keri poti laadale\n"); return EXIT_FAILURE; } if (infile.Open( fn0, FSTSTR("rb" )) == false || outf.Open( fn1, FSTSTR("wb" )) == false || errf.Open( fn2, FSTSTR("wb" )) == false || lgrf.Open( fn3, FSTSTR("ab+")) == false) { printf( "Can't open files\n" ); // fopen exit( 1 ); } loe_lgr( &lgrf ); /* loe lõpugrupid mällu */ tyvi[0].tyv = FSxSTR(""); for (total=0; infile.ReadLine(&inbuf)==true; total++) { kk = inbuf.Find(FSxSTR(".0!")); if (kk!=-1) /* on 0-muuttyybi sõna */ { tyybinr = 1; /* lihtsalt et poleks 0 */ k2 = inbuf.Find(FSxSTR("!\\")); if (k2!=-1) { FSXSTRING tmp; tmp = (const FSxCHAR *)inbuf.Mid(k2+2); k3 = tmp.Find(FSxSTR("\\")); if (k3!=-1) { sliik = (const FSxCHAR *)tmp.Left(k3); } } if (k2==-1 || k3==-1) { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = nomnul( &inbuf ); if (!k) { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } converted++; lisa_lgr( &lgrf ); /* leiab igale selle parad tyvele lgr nr */ tee_rida(&inbuf, &outbuf); /* teeb rea nr|tyvi,lgr/tyvi,lgr... */ if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false) { printf("\ntulemuse kirjut. väljundfaili ebaõnnestus"); return EXIT_FAILURE; } nullityv(); /* et saaks järgmist parad. teha */ continue; /* et ei vaataks nii, nagu 'norm' ridu */ } era = inbuf.Find(FSxSTR("**")); inbuf_era = FSxSTR(""); if (era != -1) inbuf_era = (const FSxCHAR *)inbuf.Mid(era-1); inbuf = (const FSxCHAR *)inbuf.Left(inbuf.GetLength()-inbuf_era.GetLength()); kk = inbuf.Find(FSxSTR("&&")); inbuf_para_ind = FSxSTR(""); if (kk != -1) inbuf_para_ind = (const FSxCHAR *)inbuf.Mid(kk); inbuf = (const FSxCHAR *)inbuf.Left(inbuf.GetLength()-inbuf_para_ind.GetLength()); k = era_ind( &inbuf ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = era_reegel( &inbuf ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = era_tyvi( &inbuf ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = tee_para( ); if (!k) /* oli mingi viga */ { unknown++; nullityv(); /* et saaks järgmist parad. teha */ if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } if ( kk != -1 ) /* leidub parall. indeks */ { inbuf_para_ind = (const FSxCHAR *)inbuf_para_ind.Mid(2); if (inbuf_para_ind.Find(FSxSTR("."))==-1) { /* => on uus sonaliik; => salvest. senine*/ lisa_lgr( &lgrf ); /*leiab igale selle parad tyvele lgr nr */ tee_rida(&inbuf, &outbuf); /* teeb rea nr|tyvi,lgr/tyvi,lgr... */ if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false) { printf("\ntulemuse kirjut. väljundfaili ebaõnnestus"); return EXIT_FAILURE; } nullityv(); /* et saaks järgmist parad. teha */ era_tyvi( &inbuf ); /* nullityv() oli ka tyved kaotand */ } k = era_ind(&inbuf_para_ind); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = era_reegel( &inbuf_para_ind ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } if (inbuf_para_ind.Find(FSxSTR("(")) == 0) { for (k=0; tyvi[k].tyv.GetLength() > 0; k++) tyvi[k].sulg = 1; } k = par_para( ); if (!k) /* oli mingi viga */ { unknown++; nullityv(); /* et saaks järgmist parad. teha */ if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } } if ( era != -1 ) /* leidub ka erandeid */ { inbuf_era = (const FSxCHAR *)inbuf_era.Mid(3); k = nomerand( &inbuf_era ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } } converted++; lisa_lgr( &lgrf ); /* leiab igale selle parad tyvele lgr nr */ tee_rida(&inbuf, &outbuf); /* teeb rea nr|tyvi,lgr/tyvi,lgr... */ if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false) { printf("\ntulemuse kirjut. väljundfaili ebaõnnestus"); return EXIT_FAILURE; } nullityv(); /* et saaks järgmist parad. teha */ /*STAT:*/ /* tooseisu v@ljastamine ekraanile */ if ( total % 100 == 0 ) printf( "\r KOKKU: %6d KORRAS: %6d SEGASEID: %6d ", total, converted, unknown ); } /*FILE_END:*/ printf( "\r KOKKU: %6d KORRAS: %6d SEGASEID: %6d ", total, converted, unknown ); Tprintf( FSTSTR("** NOM: %s\n"), (const FSTCHAR*)fn0 ); infile.Close(); outf.Close(); errf.Close(); lgrf.Close(); return EXIT_SUCCESS; }
int main(int argc, FSTCHAR **argv) { CPFSFile infile; /* viit sisendfailile */ CPFSFile outf, errf; /* viidad v@ljundfailidele */ CPFSFile lgrf; /* viit lõpugruppide failile LGR*/ FSXSTRING inbuf; /* toodeldav rida */ FSXSTRING outbuf; /* toodeldav rida */ CFSFileName fn_0, fn_1, fn_2; int total, converted, unknown; int k; int kk, era; FSXSTRING inbuf_era, inbuf_para_ind; total = converted = unknown = 0; if (argc != 4) { printf("pane argumendid taha !\n"); return EXIT_FAILURE; } else { fn_0=argv[1]; fn_1=argv[2]; fn_2=argv[3]; } if (infile.Open(fn_0, FSTSTR("rb" )) == false || outf.Open( fn_1, FSTSTR("wb" )) == false || errf.Open( fn_2, FSTSTR("wb" )) == false || lgrf.Open( FSTSTR("lgr"), FSTSTR("ab+")) == false) { printf( "Can't open files\n" ); return EXIT_FAILURE; } loe_lgr( &lgrf ); /* loe lõpugrupid mällu */ tyvi[0].tyv = FSxSTR(""); for (total=0; infile.ReadLine(&inbuf)==true; total++) { era = inbuf.Find(FSxSTR("**")); if (era != -1) { tyybinr = 1; /* lihtsalt et poleks 0; vaja tee_rida() jaoks */ k = inbuf.Find(FSWSTR("!\\")); if ( k == -1) /* polegi indeksit; ei saa olla... */ sliik = FSWSTR("V"); else { sliik = (const FSWCHAR *)inbuf.Mid(k+2); k = sliik.Find(FSWSTR("\\")); if ( k == -1) { printf("\nsonaliik sassis "); return EXIT_FAILURE; } sliik = (const FSWCHAR *)sliik.Left(k); } k = verbera( &inbuf ); if (!k) /* oli mingi viga */ { unknown++; nullityv(); /* et saaks järgmist parad. teha */ if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } converted++; lisa_lgr( &lgrf ); /* leiab igale selle parad tyvele lgr nr */ tee_rida(&inbuf, &outbuf); /* teeb rea nr|tyvi,lgr/tyvi,lgr... */ if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false) { printf("\ntulemuse kirjut. väljundfaili ebaõnnestus"); return EXIT_FAILURE; } nullityv(); /* et saaks järgmist parad. teha */ continue; /* vt järgmist kirjet */ } kk = inbuf.Find(FSxSTR("&&")); inbuf_para_ind = FSxSTR(""); if (kk != -1) inbuf_para_ind = (const FSxCHAR *)inbuf.Mid(kk); inbuf = (const FSxCHAR *)inbuf.Left(inbuf.GetLength()-inbuf_para_ind.GetLength()); k = era_ind( &inbuf ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = era_tyvi( &inbuf ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = vteepar( ); if (!k) /* oli mingi viga */ { unknown++; nullityv(); /* et saaks järgmist parad. teha */ if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } if ( kk != -1 ) /* leidub parall. indeks */ { inbuf_para_ind = (const FSxCHAR *)inbuf_para_ind.Mid(2); k = era_ind( &inbuf_para_ind ); if (!k) /* oli mingi viga */ { unknown++; if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } k = vparpar( ); if (!k) /* oli mingi viga */ { unknown++; nullityv(); /* et saaks järgmist parad. teha */ if ((errf.WriteString((const FSxCHAR *)inbuf, inbuf.GetLength())) == false) { printf("\ntulemuse kirjut. error-faili ebaõnnestus"); return EXIT_FAILURE; } continue; /* vt järgmisi ridu */ } } converted++; lisa_lgr( &lgrf ); /* leiab igale selle parad tyvele lgr nr */ tee_rida(&inbuf, &outbuf); /* teeb rea nr|tyvi,lgr/tyvi,lgr... */ if ((outf.WriteString((const FSxCHAR *)outbuf, outbuf.GetLength())) == false) { printf("\ntulemuse kirjut. väljundfaili ebaõnnestus"); return EXIT_FAILURE; } nullityv(); /* et saaks järgmist parad. teha */ /*STAT:*/ /* tooseisu v@ljastamine ekraanile */ if ( total % 100 == 0 ) printf( "\r KOKKU: %6d KORRAS: %6d SEGASEID: %6d ", total, converted, unknown ); } /* FILE_END:*/ printf( "\r KOKKU: %6d KORRAS: %6d SEGASEID: %6d ", total, converted, unknown ); Wprintf( FSTSTR("** VRB: %s\n"), (const FSTCHAR*)fn_0); infile.Close(); outf.Close(); errf.Close(); lgrf.Close(); return EXIT_SUCCESS; }
void MKLASSID2DCT::Run( DCTMETASTRCT& meta, ///< Sõnastiku struktuurihoidla const TAGS2DCT& tags ///< Ühestamismärgendite massiiv ) { CPFSFile in; if(in.Open(FSTSTR("klassid.txt"), FSTSTR("rb"))==false) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt avamisega"); CFSAString rida; // tõsta kohe sõnastikku ümber... int n, reaNr, kokkuRidu; long pos=meta.Tell(); meta.Add(DCTELEMID_T3M_KLASSID, pos); if(in.ReadLine(&rida)==false) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt lugemisega"); sscanf((const char*)rida, "%d", &kokkuRidu); if(meta.WriteUnsigned<UB4, int>(kokkuRidu)==false) // mitmsusklasside arv throw VEAD( ERR_HMM_MOOTOR, ERR_WRITE, __FILE__, __LINE__, " ", "Jama pakitud sõnastikku kirjutamisega"); for(reaNr=1; in.ReadLine(&rida)==true; reaNr++) { printf("%06d:%06d\r", kokkuRidu, reaNr); int tyhik, vordus; rida += " "; if((tyhik=(int)rida.Find(' '))<0) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt lugemisega"); if(sscanf((const char*)rida, "%d", &n)!=1) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt lugemisega"); meta.WriteUnsigned<UB1, int>(n); // jooksva mitmesusklassi suurus for(int i=0; i<n; i++) { if((vordus=(int)rida.Find('=', tyhik+1))<=tyhik+1) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt lugemisega", (const char*)rida); CFSAString tagStr=rida.Mid(tyhik+1, vordus-tyhik-1); int tagIdx=tags.GetIdx(&tagStr); if(tagIdx<0) throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Tundmatu ühestamismärgend andmefailis klassid.txt ", (const char*)rida); if(meta.WriteUnsigned<UB1,int>(tagIdx)==false) throw VEAD( ERR_HMM_MOOTOR, ERR_WRITE, __FILE__, __LINE__, " ", "Jama pakitud sõnastikku kirjutamisega"); UKAPROB tagProb; if(sscanf(((const char*)rida)+vordus+1, "%e", &tagProb)!=1) throw VEAD( ERR_X_TYKK, ERR_ROTTEN, __FILE__, __LINE__, " ", "Puuduv tõenäosus failis klassid.txt ", (const char*)rida); if(meta.WriteBuffer(&tagProb,sizeof(UKAPROB))==false) throw VEAD( ERR_HMM_MOOTOR, ERR_WRITE, __FILE__, __LINE__, " ", "Jama pakitud sõnastikku kirjutamisega"); if((tyhik=(int)rida.Find(' ', tyhik+1))<=0) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt lugemisega"); if(vordus >= tyhik) throw VEAD( ERR_HMM_MOOTOR, ERR_RD, __FILE__, __LINE__, " ", "Jama andmefaili klassid.txt lugemisega"); } } printf("\n"); }
CFSVar CJSONReader::ReadVal(const CFSAString &szKeyPath) { OnValReadStart(szKeyPath); CFSVar Data; if (m_cCh=='[') { Data.Cast(CFSVar::VAR_ARRAY); GetChar(true); INTPTR ipPos=0; for (;;) { if (m_cCh==0) { throw CJSONException(FSTSTR("Unexpetcted EOF")); } else if (m_cCh==']') { GetChar(true); break; } else if (ipPos>0) { if (m_cCh==',') { GetChar(true); } else { throw CJSONException(FSTSTR("Missing ',' in array")); } } CFSAString szKey; szKey.Format("%zd", ipPos); CFSVar Data1=ReadVal(szKeyPath+"/"+szKey); if (m_iCollectData>0) { Data[ipPos]=Data1; } ipPos++; } } else if (m_cCh=='{') { Data.Cast(CFSVar::VAR_MAP); GetChar(true); INTPTR ipPos=0; for (;;) { if (m_cCh==0) { throw CJSONException(FSTSTR("Unexpetcted EOF")); } else if (m_cCh=='}') { GetChar(true); break; } else if (ipPos>0) { if (m_cCh==',') { GetChar(true); } else { throw CJSONException(FSTSTR("Missing ',' in map")); } } CFSAString szKey; if (m_cCh=='\"' || m_cCh=='\'') { szKey=ReadString(); } else if (FSIsLetter(m_cCh)) { szKey=ReadText(); } else { throw CJSONException(FSTSTR("Expected key")); } if (m_cCh==':') { GetChar(true); } else { throw CJSONException(FSTSTR("Expected ':'")); } CFSVar Data1=ReadVal(szKeyPath+"/"+szKey); if (m_iCollectData>0) { Data[szKey]=Data1; } ipPos++; } } else if (m_cCh=='\"' || m_cCh=='\'') { Data=ReadString(); } else if ((m_cCh>='0' && m_cCh<='9') || FSStrChr("-+.", m_cCh)) { Data=ReadNumber(); } else if (FSIsLetter(m_cCh)) { Data=ReadConst(); } else if (!m_cCh) { } else { throw CJSONException(FSTSTR("Unknown value type")); } OnValReadEnd(szKeyPath, Data); return Data; }
int Tmain(int argc, FSTCHAR** argv, FSTCHAR**envp) { return MainTemplate<T3PAKISON>(argc, argv, envp, FSTSTR(".t3")); }
void T3NGRAM::NGramsFromCooked( const CFSFileName& fileName, const PFSCODEPAGE codePage, const TMPLPTRARRAYBIN2<FSXSTRING,CFSWString>& margendid) { VOTAFAILIST in(fileName, FSTSTR("rb"), codePage); FSXSTRING rida, sona; FSXSTRING margend, *rec; int algus, lopp, idx, i, nRida=0, *iPtr; gramm1.Start(0, margendid.idxLast); gramm2.Start(0, margendid.idxLast, margendid.idxLast); gramm3.Start(0, margendid.idxLast, margendid.idxLast, margendid.idxLast); //printf("%10d reast ngrammid\r",nRida); for(nRida=0; in.Rida(rida)==true; nRida++) { //printf("%10d\r", nRida); TMPLPTRARRAY<int> lauseMargendid(30,10); rida.Trim(); rida += FSWSTR(" "); for(algus=0, i=0; (lopp=(int)(rida.Find((FSWCHAR)' ', algus)))>0; algus=lopp+1, i++) { sona=rida.Mid(algus, lopp-algus); algus=lopp+1; lopp=(int)(rida.Find((FSWCHAR)' ', algus)); assert(lopp > 0); margend=rida.Mid(algus, lopp-algus); rec=margendid.Get(&margend, &idx); assert(rec!=NULL); // tundmatu märgend iPtr=lauseMargendid.AddPlaceHolder(); assert(iPtr!=NULL); *iPtr=idx; // lause i-ndale sõnale vastava märgendi indeks on idx } // terve lause märgendite indeksid olemas for(i=0; i<lauseMargendid.idxLast; i++) { nGrammeKokku[0]++; if(gramm1.Obj(*(lauseMargendid[i]))==0) { nGrammeErinevaid[0]++; } gramm1.Obj(*(lauseMargendid[i]))++; } for(i=1; i<lauseMargendid.idxLast; i++) { nGrammeKokku[1]++; if(gramm2.Obj(*(lauseMargendid[i-1]),*(lauseMargendid[i]))==0) { nGrammeErinevaid[1]++; } gramm2.Obj(*(lauseMargendid[i-1]),*(lauseMargendid[i]))++; } for(i=2; i<lauseMargendid.idxLast; i++) { nGrammeKokku[2]++; if(gramm3.Obj(*(lauseMargendid[i-2]), *(lauseMargendid[i-1]),*(lauseMargendid[i]))==0) { nGrammeErinevaid[2]++; } gramm3.Obj(*(lauseMargendid[i-2]), *(lauseMargendid[i-1]),*(lauseMargendid[i]))++; } } /*printf("%10d reast ngrammid |%6d/%6d|%6d/%6d|%6d/%6d|\n", nRida, nGrammeErinevaid[0],nGrammeKokku[0], nGrammeErinevaid[1],nGrammeKokku[1], nGrammeErinevaid[2],nGrammeKokku[2]);*/ }