C++ (Cpp) UnigramDict::import Examples

Programming Language: C++ (Cpp)

Class/Type: UnigramDict

Method/Function: import

Examples at hotexamples.com: 2

C++ (Cpp) UnigramDict::import - 2 examples found. These are the top rated real world C++ (Cpp) examples of UnigramDict::import extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

save(2)

import(2)

exactMatch(1)

Frequently Used Methods

save (2)

import (2)

exactMatch (1)

Example #1

Show file

File: mmseg_interface.cpp Project: seacoastboy/mmseg

PyObject * PyMmseg_BuildDict(PyObject * self, PyObject* args)
{
	csfHelper_MMSegObject *self2 = (csfHelper_MMSegObject *)self;
	char *type; 
	char *source_file;
	char *target_file;

	if (!PyArg_Parse(args, "(sss)", &type, &source_file, &target_file))
		return NULL;
	else
	{
		int ret = 0;
		if(strncmp("unigram", type, 7) == 0) {
			UnigramCorpusReader ur;
			ur.open(source_file, NULL);
			int ret = 0;
			{
				UnigramDict ud;
				ret = ud.import(ur);
				ud.save(target_file);		
			}
			return Py_BuildValue("i",ret);
		}

		if(strncmp("thesaurus", type, 9) == 0 ) {
			ThesaurusDict tdict;
			ret = tdict.import(source_file, target_file);
			return Py_BuildValue("i",ret);
		}

		return Py_None;
	}
}

Example #2

Show file

File: mmseg_main.cpp Project: RobinQu/mmseg

int main(int argc, char **argv) {
	int c;
	const char* corpus_file = NULL;
	const char* uni_corpus_file = NULL;
	const char* thesaurus_file = NULL;
	const char* out_file = NULL;
	const char* dict_path = NULL;
	const char* target_file = NULL;
	char out_buf[512];
	
	if(argc < 2){
		usage(argv[0]);
		exit(0);
	}
	u1 bPlainText = 0;
	u1 bUcs2 = 0;
	while ((c = getopt(argc, argv, "t:b:u:d:o:rU")) != -1) {
		switch (c) {
		case 'o':
			target_file = optarg;
			break;
		case 'u':
			uni_corpus_file = optarg;
			break;
		case 'b':
			corpus_file = optarg;
			break;
		case 'd':
			dict_path = optarg;
			break;
		case 't':
			thesaurus_file = optarg;
			break;
		case 'r':
			bPlainText = 1;
			break;
		case 'U':
			bUcs2 = 1;
			break;
		case 'h':
			usage(argv[0]);
			exit(0);
		default:
			fprintf(stderr, "Illegal argument \"%c\"\n", c);
			return 1;
		}
	}

	if(optind < argc) {
		out_file = argv[optind];
	}

	if(thesaurus_file) {
		ThesaurusDict tdict;
		tdict.import(thesaurus_file, target_file);
		//ThesaurusDict ldict;
		//ldict.load("thesaurus.lib");
		return 0;
	}

	if(corpus_file){
		//build Synonyms dictionary
		SynonymsDict dict;
		dict.import(corpus_file);
		if(target_file)
		   dict.save(target_file);
		else
		   dict.save("synonyms.dat");
		//debug use
		//dict.load("synonyms.dat");
		//printf("%s\n", dict.exactMatch("c#"));
		return 0;
	}

	if(!corpus_file && !dict_path) {
		//build unigram 
		if(!out_file) {
			//build the output filename
			size_t len = strlen(uni_corpus_file);
			memcpy(out_buf,uni_corpus_file,len);
			memcpy(&out_buf[len],".uni\0",5);
			out_file = out_buf;
		}
		
		if(target_file) {
			out_file = target_file;
		}
		
		UnigramCorpusReader ur;
		ur.open(uni_corpus_file,bPlainText?"plain":NULL);
		if(!bUcs2){
			UnigramDict ud;
			int ret = ud.import(ur);
			ud.save(out_file);		
			//check
			int i = 0;
			for(i=0;i<ur.count();i++)
			{
				UnigramRecord* rec = ur.getAt(i);
				
				if(ud.exactMatch(rec->key.c_str()) == rec->count){
					continue;
				}else{
					printf("error!!!");
				}
			}//end for
		}else{
			printf("UCS2 used as inner encoding, is unsupported\n");
		}
		return 0;
	}else
	if(!dict_path){ //not segment mode.
		//build bigram
		if(!out_file) {
			//build the output filename
			size_t len = strlen(corpus_file);
			memcpy(out_buf,corpus_file,len);
			memcpy(&out_buf[len],".bi\0",4);
			out_file = out_buf;
		}
		printf("Bigram build unsupported.\n");
	}//end if(!corpus_file)
	//Segment mode
	{
		SegmenterManager* mgr = new SegmenterManager();
		int nRet = 0;
		if(dict_path)
			nRet = mgr->init(dict_path);
		else{
			usage(argv[0]);
			exit(0);
		}
		if(nRet == 0){
			//init ok, do segment.
			Segmenter* seg = mgr->getSegmenter();
			segment(out_file,seg);
		}
		delete mgr;
	}
	
	return 0;
}