C++ (Cpp) Lexicon::begin примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: Lexicon

Метод/Функция: begin

Примеров на hotexamples.com: 5

C++ (Cpp) Lexicon::begin - 5 примеров найдено. Это лучшие примеры C++ (Cpp) кода для Lexicon::begin, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

contains(28)

add(23)

containsPrefix(16)

containsWord(10)

end(6)

size(5)

begin(5)

addWordsFromFile(2)

insertSentence(2)

front(2)

toString(2)

find(1)

insert(1)

clear(1)

isEmpty(1)

normalize(1)

parse(1)

pruneZeros(1)

push_back(1)

remove(1)

back(1)

Пример #1

Показать файл

Файл: CCompress.cpp Проект: BackupTheBerlios/mrs-svn

void CCmpLexicon::WriteToken(const char* inText, obit_stream& ioBits)
{
	Lexicon::iterator i = lower_bound(lexicon.begin() + 1, lexicon.end(), LexEntry(inText, 0));
	if (i != lexicon.end() && strcmp(inText, (*i).text) == 0)
	{
		Push(ioBits, (*i).code, (*i).cnt);
	}
	else
	{
		Push(ioBits, lexicon[0].code, lexicon[0].cnt);

		for (const char* p = inText; *p; ++p)
		{
			RestChar e;
			e.ch = static_cast<unsigned char>(*p);
			RestCharacters::iterator i = lower_bound(rest.begin(), rest.end(), e); 
			assert(i != rest.end());
			assert((*i).ch == static_cast<unsigned char>(*p));
			
			Push(ioBits, (*i).code, (*i).cnt);
		}
			
		Push(ioBits, rest[0].code, rest[0].cnt);
	}
}

Пример #2

Показать файл

Файл: Boggle.cpp Проект: wizardforcel/se106labs

bool InEnglish(const string &word)
{
  for(Lexicon::iterator it = english.begin(); it != english.end(); ++it)
    if(word == *it) return true;

  return false;
}

Пример #3

Показать файл

Файл: train.cpp Проект: fanfannothing/citar

void writeLexicon(ostream &out, Lexicon const &lexicon)
{
	for (Lexicon::const_iterator iter = lexicon.begin();
		iter != lexicon.end(); ++iter)
	{
		out << iter->first;

		for (map<string, size_t>::const_iterator tagIter = iter->second.begin();
			tagIter != iter->second.end(); ++tagIter)
		{
			out << " " << tagIter->first << " " << tagIter->second;
		}

		out << endl;
	}
}

Пример #4

Показать файл

Файл: getLexiconFromBoW.cpp Проект: Geekking/lima

//**********************************************************************
//
// M A I N
//
//**********************************************************************
int main(int argc, char *argv[])
{
  QCoreApplication a(argc, argv);
  QsLogging::initQsLog();
  if (argc<1) {
        cerr << USAGE;
        return EXIT_FAILURE;
    }
    QsLogging::initQsLog();
    readCommandLineArguments(argc,argv);
    if (param.help) {
        cerr << HELP;
        return EXIT_FAILURE;
    }


    string resourcesPath=getenv("LIMA_RESOURCES")==0?"/usr/share/apps/lima/resources":string(getenv("LIMA_RESOURCES"));
    string configDir=getenv("LIMA_CONF")==0?"/usr/share/config/lima":string(getenv("LIMA_CONF"));

    if ( (!param.language.size()) && (!param.codeFile.size()) ) {
        cerr << "no codefile nor language specified !" << endl;
        cerr << "Use e.g option '-l fre'." << endl;
        cerr << "Option '-h' gives full help" << endl;
        return EXIT_FAILURE;
    }
    else if ( param.language.size() ) {
        param.codeFile=resourcesPath+"/LinguisticProcessings/"+param.language+"/code-"+param.language+".xml";
    }

    cerr << "read proccodeManager from file " << param.codeFile << "..." << endl;
    PropertyCodeManager propcodemanager;
    propcodemanager.readFromXmlFile(param.codeFile);
    cerr << "get macroManager..." << endl;
    const PropertyManager& macroManager = propcodemanager.getPropertyManager("MACRO");
    const PropertyAccessor& propertyAccessor = macroManager.getPropertyAccessor();
    set<LinguisticCode> referenceProperties;
    for ( std::vector<string>::const_iterator macro = param.macro.begin() ;
            macro != param.macro.end() ; macro++ ) {
        cerr << "referenceProperties.insert(" << *macro << ")" << endl;
        LinguisticCode referenceProperty = macroManager.getPropertyValue(*macro);
        referenceProperties.insert(referenceProperty);
    }

    cerr << "referencePropertySet= ";
    set<LinguisticCode>::iterator propIt = referenceProperties.begin();
    if ( propIt != referenceProperties.end() ) {
        const std::string& symbol = macroManager.getPropertySymbolicValue(*propIt);
        cerr << symbol;
        propIt++;
    }
    for ( ; propIt != referenceProperties.end() ; propIt++ ) {
        const std::string& symbol = macroManager.getPropertySymbolicValue(*propIt);
        cerr << ", " << symbol;
    }
    cerr << endl;

    Lexicon lex;

    // read all files and count terms
    vector<string>::const_iterator
    file=param.inputFiles.begin(),
         file_end=param.inputFiles.end();
    for (;file!=file_end; file++) {

        ifstream fileIn((*file).c_str(), std::ifstream::binary);
        if (! fileIn) {
            cerr << "cannot open input file [" << *file << "]" << endl;
            continue;
        }
        BoWBinaryReader reader;
        try {
            reader.readHeader(fileIn);
        }
        catch (exception& e) {
            cerr << "Error: " << e.what() << endl;
            return EXIT_FAILURE;
        }

        switch (reader.getFileType()) {
        case BOWFILE_TEXT: {
            cerr << "Build lexicon from BoWText [" << *file << "]" << endl;
            try {
                readBowFileText(fileIn,reader, lex, propertyAccessor, referenceProperties);
            }
            catch (exception& e) {
                cerr << "Error: " << e.what() << endl;
            }
            break;
        }
        case BOWFILE_DOCUMENTST: {
            cerr << "ReadBoWFile: file contains a BoWDocumentST  -> not treated" << endl;
        }
        case BOWFILE_DOCUMENT: {
            cerr << "ReadBoWFile: build BoWdocument from  " << *file<< endl;
            BoWDocument* document=new BoWDocument();
            try {
                cerr << "ReadBoWFile: extract terms... " << endl;
                readDocuments(fileIn,document,reader, lex, macroManager, propertyAccessor, referenceProperties);
            }
            catch (exception& e) {
                cerr << "Error: " << e.what() << endl;
            }
            fileIn.close();
            delete document;
            break;
        }
        default: {
            cerr << "format of file " << reader.getFileTypeString() << " not managed"
                 << endl;
            return EXIT_FAILURE;
        }
        }
    }

    // output stream (default is 'cout')
    std::ostream *s_out;

    // Manage output
    if ( param.outputFilename.length() == 0) s_out=&std::cout;
    else s_out = new std::ofstream(param.outputFilename.c_str(), std::ios_base::out | std::ios_base::binary | std::ios_base::trunc);

    // output lexicon
    Lexicon::const_iterator
    w=lex.begin(),
      w_end=lex.end();
    for (;w!=w_end; w++) {
        (*s_out) << Common::Misc::limastring2utf8stdstring((*w).second.second) << "|"
        << Common::Misc::limastring2utf8stdstring((*w).first) << "|"
        << (*w).second.first << endl;
    }

    // Close output file (if any)
    if (  param.outputFilename.length() != 0)
        dynamic_cast<std::ofstream*>(s_out)->close();

    return EXIT_SUCCESS;
}

Пример #5

Показать файл

Файл: CCompress.cpp Проект: BackupTheBerlios/mrs-svn

void CCmpLexicon::Reduce(HStreamBase& inFile)
{
	HSwapStream<net_swapper> data(inFile);
	
	lexicon.push_back(LexEntry("\x1b", 0));
	
	RestCharacters chars;
	uint32 i;
	
	for (i = 0; i < 256; ++i)
	{
		chars.push_back(RestChar());
		chars[i].ch = static_cast<unsigned char>(i);
		chars[i].cnt = 1;	// was 0
		chars[i].code = 0;
	}

	uint32 n, h;

	// try to reduce the lexicon size to something reasonable
	LexiconSet::iterator w;

	n = word_set.size();
	HAutoBuf<uint32> A_(new uint32[n * 2]);
	uint32* A = A_.get();
	
	HAutoBuf<const char*> str(new const char*[n]);
	
	uint32 s = 0;
	i = 0;
	for (w = word_set.begin(); w != word_set.end(); ++w, ++i)
	{
		A[i] = i + n;
		A[i + n] = (*w).second;
		str[i] = (*w).first;
		s += strlen(str[i]) + 1;
	}

//	word_set.clear();
	word_set = LexiconSet();
	
	h = n;
	make_heap(A, A + h, CntCompare(A));
	
	while (s > max_size)
	{
		const char* t = str[A[0] - n];
		
		++lexicon.front().cnt;
		
		for (const char* p = t; *p; ++p)
			++chars[static_cast<unsigned char>(*p)].cnt;
		++chars[0].cnt;
		
		s -= strlen(t) + 1;
		A[0] = A[h - 1];
		--h;
		pop_heap(A, A + h, CntCompare(A));
	}
	
	for (i = 0; i < h; ++i)
		lexicon.push_back(LexEntry(str[A[i] - n], A[A[i]]));

	sort(lexicon.begin() + 1, lexicon.end());
	
	n = lexicon.size();
	A = new uint32[n * 2];

	for (i = 0; i < n; ++i)
	{
		A[i] = i + n;
		A[i + n] = lexicon[i].cnt;
	}
	
	h = n;
	make_heap(A, A + h, CntCompare(A));
	
	while (h > 1)
	{
		uint32 m1 = A[0];
		A[0] = A[h - 1];
		--h;
		pop_heap(A, A + h, CntCompare(A));
		
		uint32 m2 = A[0];
		A[0] = A[h - 1];
		
		A[h] = A[m1] + A[m2];
		A[0] = h;
		A[m1] = A[m2] = h;
		
		pop_heap(A, A + h);
	}
	
	A[1] = 0;
	for (i = 2; i < 2 * n; ++i)
		A[i] = A[A[i]] + 1;
	
	for (i = 0; i < n; ++i)
		lexicon[i].cnt = A[i + n];

	uint32 numl[32];
	uint32 firstcode[32];
	uint32 nextcode[32];
	
	for (i = 0; i < 32; ++i)
		numl[i] = 0;
	
	for (i = 0; i < n; ++i)
		++numl[A[i + n]];
	
	firstcode[31] = 0;
	for (int l = 30; l >= 0; --l)
		firstcode[l] = (firstcode[l + 1] + numl[l + 1]) / 2;
	
	for (int l = 0; l < 32; ++l)
		nextcode[l] = firstcode[l];
	
	HAutoBuf<uint32> symbol_table(new uint32[n]);
	
	uint32 six[32];
	six[0] = 0;
	for (i = 1; i < 32; ++i)
		six[i] = six[i - 1] + numl[i - 1];
	
	for (i = 0; i < n; ++i)
	{
		uint32 li = A[i + n];
		
		lexicon[i].code = nextcode[li];
		symbol_table[six[li] + nextcode[li] - firstcode[li]] = i;
		++nextcode[li];
	}
	
	data << n;
	for (i = 0; i < 32; ++i)	data << firstcode[i];
	for (i = 0; i < 32; ++i)	data << six[i];

	uint32 symbol_text_length = 0;
	for (i = 0; i < n; ++i)
		symbol_text_length += strlen(lexicon[symbol_table[i]].text) + 1;
	
	symbol_text = new char[symbol_text_length];
	char* d = symbol_text;
	
	for (i = 0; i < n; ++i)
	{
		strcpy(d, lexicon[symbol_table[i]].text);
		lexicon[symbol_table[i]].text = d;
		symbol_table[i] = static_cast<uint32>(d - symbol_text);
		d += strlen(d) + 1;
	}

	data << symbol_text_length;
	data.Write(symbol_text, symbol_text_length);

	// and now repeat all steps for the rest characters
	
	// Count how many characters we actually have:
	n = 0;
	
	rest = chars;
//	for (RestCharacters::iterator i = chars.begin(); i != chars.end(); ++i)
//	{
//		if ((*i).cnt != 0)
//		{
//			rest.push_back(*i);
//			rest.back().cnt = 0;
//		}
//	}

	n = rest.size();
	A_.reset(new uint32[n * 2]);
	A = A_.get();

	for (i = 0; i < n; ++i)
	{
		A[i] = i + n;
		A[i + n] = rest[i].cnt;
	}
	
	h = n;
	make_heap(A, A + h, CntCompare(A));
	
	while (h > 1)
	{
		uint32 m1 = A[0];
		A[0] = A[h - 1];
		--h;
		pop_heap(A, A + h, CntCompare(A));
		
		uint32 m2 = A[0];
		A[0] = A[h - 1];
		
		A[h] = A[m1] + A[m2];
		A[0] = h;
		A[m1] = A[m2] = h;
		
		pop_heap(A, A + h);
	}
	
	A[1] = 0;
	for (i = 2; i < 2 * n; ++i)
		A[i] = A[A[i]] + 1;
	
	for (i = 0; i < n; ++i)
		rest[i].cnt = A[i + n];

	for (i = 0; i < 32; ++i)
		numl[i] = 0;
	
	for (i = 0; i < n; ++i)
		++numl[A[i + n]];
	
	firstcode[31] = 0;
	for (int l = 30; l >= 0; --l)
		firstcode[l] = (firstcode[l + 1] + numl[l + 1]) / 2;
	
	for (int l = 0; l < 32; ++l)
		nextcode[l] = firstcode[l];
	
	six[0] = 0;
	for (i = 1; i < 32; ++i)
		six[i] = six[i - 1] + numl[i - 1];
	
	HAutoBuf<unsigned char> char_symbol_table(new unsigned char[n]);
	
	for (i = 0; i < n; ++i)
	{
		uint32 li = A[i + n];
		
		rest[i].code = nextcode[li];
		char_symbol_table[six[li] + nextcode[li] - firstcode[li]] = rest[i].ch;
		++nextcode[li];
	}
	
	data << n;
	for (i = 0; i < 32; ++i)	data << firstcode[i];
	for (i = 0; i < 32; ++i)	data << six[i];
	data.Write(char_symbol_table.get(), n);
}