Пример #1
0
ucs4string Regexp::replace(ucs4string& text, ucs4string& subst, bool& matched)
{
    OnigRegion* const region = matchInternal(text);
    if (NULL == region) {
        matched = false;
        return text;
    }
    const ucs4string beg = text.substr(0, region->beg[0] / sizeof(ucs4char));
    const ucs4string end = text.substr(region->end[0] / sizeof(ucs4char), text.size() - region->end[0] / sizeof(ucs4char));
    matched = true;
    return (beg + subst + end).data();
}
Пример #2
0
unsigned int ustrtou(const ucs4string& ustr)
{
	unsigned int u = 0;
	for (size_t i=0; i<ustr.size(); ++i)
	{
		ucs4_t ch = ustr[i];
		if (ch<'0' || ch>'9')
			break;

		u = u * 10 + (ch - '0');
	}
	return u;
}
Пример #3
0
void Segmentation::DoSegment(ucs4string base, double base_rr, ucs4string to_seg, SegmentData& data)
{
	for (size_t i=1; i<to_seg.size(); ++i)
	{
		ucs4string us = to_seg.substr(0, i);
		DoSegment(base + ucs4_t(' ') + us, base_rr*GetRateReciprocal(us), to_seg.substr(i), data);
	}

	double rr = base_rr*GetRateReciprocal(to_seg);
	if (rr > data.m_rr)
		return;

	data.m_rr = rr;
	data.m_res = base + ucs4_t(' ') + to_seg;
}
Пример #4
0
int main(int argc, char *argv[])
{
	if (argc != 3)
	{
		std::cerr << "Usage:" << argv[0] << " <ucs4-phrase-file> <ucs4-dictionary>" << std::endl;
		exit(1);
	}

	std::ifstream fin(argv[1], std::ios::binary);
	if (!fin.is_open())
	{
		std::cerr << "Failed to open " << argv[1] << " for read." << std::endl;
		exit(2);
	}
	std::ofstream fout(argv[2], std::ios::binary);
	if (!fout.is_open())
	{
		std::cerr << "Failed to open " << argv[2] << " for write." << std::endl;
		exit(3);
	}

	DictionaryGenerator dg;

	while (!fin.eof())
	{
		ucs4string s;

		ucs4getline(fin, s);

		dg.SetmentPhrase(s);
	}

	DictMap::const_iterator it = dg.GetDictionary().begin();
	DictMap::const_iterator itend = dg.GetDictionary().end();
	for (; it!=itend; ++it)
	{
		const ucs4string ustr=it->first;
		unsigned int cnt=it->second;
		if (cnt==1 && ustr.size()!=1)
			continue;
		ucs4putstr(fout, ustr);
		ucs4putstr(fout, stdtoustr(" "));
		ucs4putstr(fout, utoustr(cnt));
		ucs4putch(fout, ucs4_t('\n'));
	}

	return 0;
}
Пример #5
0
void ucs4putstr(std::ostream& os, const ucs4string& ustr)
{
	size_t bytes = ustr.size() * 4;
	os.write(reinterpret_cast<const char *>(ustr.data()), bytes);
}
Пример #6
0
void TextualOutputPort::format(const VM* theVM, const ucs4string& fmt, Object args)
{
    ucs4string buffer = UC("");
    for (uint32_t i = 0; i < fmt.size(); i++) {
        if (fmt[i] == '~') {
            i++;
            if (!buffer.empty()) {
                putString(buffer);
                buffer.clear();
            }
            switch (fmt[i]) {
            case '~':
                display(theVM, Object::makeChar('~'));
                break;
            case '%':
                display(theVM, Object::makeChar('\n'));
                break;
            case 'a':
            case 'A':
            case 'd':
            case 'D':
            {
                if (args.isPair()) {
                    display(theVM, args.car());
                    args = args.cdr();
                } else {
                    isErrorOccured_ = true;
                    errorMessage_ = "too few arguments for format string";
                    irritants_ = Pair::list1(Object::makeString(fmt));
                    return;
                }
                break;
            }
            case 's':
            case 'S':
            {
                if (args.isPair()) {
                    putDatum(theVM, args.car());
                    args = args.cdr();
                } else {
                    isErrorOccured_ = true;
                    errorMessage_ = "too few arguments for format string";
                    irritants_ = Pair::list1(Object::makeString(fmt));
                    return;
                }
                break;
            }
            case '\0':
                i--;
                break;
            }
        } else {
            buffer += fmt[i];
        }
    }

    if (!buffer.empty()) {
        putString(buffer);
    }
    flush();
    //fflush(stdout); // temp
    return;
}
Пример #7
0
void TextualOutputPort::putString(const ucs4string& s)
{
    for (ucs4string::size_type i = 0; i < s.size(); i++) {
        putCharHandleSpecial(s[i]);
    }
}