Exemplo n.º 1
0
int main(int argc, char** argv) {
	if (argc < 3) {
		std::cout << "Usage: " << argv[0] << " string1 string2" << std::endl;
		return 1;
	}

	std::cout << "Matching " << argv[1] << " and " << argv[2] << "; " << std::flush;

	// Convert input to UTF32 int-arrays
	std::vector<uint32_t> s2, t2;
	utf8to32(argv[1], s2);
	utf8to32(argv[2], t2);

	std::cout << "Levenshtein distance is " << levenshtein(s2, t2) << std::endl;
}
Exemplo n.º 2
0
void loadQueries(std::vector<std::vector<uint32_t>> &queries, const std::string &queryfn) {
    std::ifstream queryfile(queryfn);
    for (std::string line; std::getline(queryfile, line);) {
        std::vector<uint32_t> query;
        utf8to32(line.data(), query);
        queries.push_back(query);
    }
    queryfile.close();
}
Exemplo n.º 3
0
void unicode_unittest()
{
	{
		char* test_string = (char*)"abcd åäö";
		char32 dest_string[9] = {0x33};

		EXPECT_EQUAL( utf8to32( (char*)"", dest_string, 8 ), 0 );
		EXPECT_EQUAL( 0x00, dest_string[1] );

		EXPECT_EQUAL( utf8to32( (char*)"a", dest_string, 8 ), 1 );
		EXPECT_EQUAL( 0x61, dest_string[0] );
		EXPECT_EQUAL( 0x00, dest_string[1] );

		EXPECT_EQUAL( utf8to32( test_string, dest_string, 8 ), 11 );
		
		EXPECT_EQUAL( 0x61, dest_string[0] );
		EXPECT_EQUAL( 0x62, dest_string[1] );
		EXPECT_EQUAL( 0x63, dest_string[2] );
		EXPECT_EQUAL( 0x64, dest_string[3] );
		EXPECT_EQUAL( 0x20, dest_string[4] );
		EXPECT_EQUAL( 0xE5, dest_string[5] );
		EXPECT_EQUAL( 0xE4, dest_string[6] );
		EXPECT_EQUAL( 0xF6, dest_string[7] );
		EXPECT_EQUAL( 0x00, dest_string[8] );
		
		EXPECT_EQUAL( utf8len( test_string ), 8 );
		EXPECT_EQUAL( utf8len( "" ), 0 );
		EXPECT_EQUAL( utf8len( "a" ), 1 );
		EXPECT_EQUAL( utf8len( "ÿ" ), 1 );
		EXPECT_EQUAL( utf8len( "bÿ" ), 2 );
		EXPECT_EQUAL( utf8len( "ÿb" ), 2 );
		EXPECT_EQUAL( utf8len( "2ÿå" ), 3 );
	}

	/*{
		char32 utf32_string[4] = {0x41, 0xC1, 0xE5, 0x00};
		char utf8_dest_string[6];

		utf32to8( utf32_string, utf8_dest_string, 5 );
		utf8_dest_string[5] = '\0';
		EXPECT_EQUAL( strcmp( "AÁå", utf8_dest_string ), 0 );
	}*/
}
Exemplo n.º 4
0
void indexFromFile(const char *inputFile, const char *indexFile) {
    std::ifstream file(inputFile);
    std::ofstream index(indexFile);
    for (std::string line; std::getline(file, line);) {
        std::vector<uint32_t> indexed;
        utf8to32(line.data(), indexed);
        // Length of the new string
        uint32_t length = indexed.size();
        index.write(reinterpret_cast<const char *>(&length), sizeof(uint32_t));
        index.write(reinterpret_cast<const char *>(indexed.data()), sizeof(uint32_t)*length);
    }
    file.close();
    index.close();
}
Exemplo n.º 5
0
 Value* charCodeAt()
 {
     std::string s = getThis()->toString();
     double pos = getScopeChain()->get("pos")->toInteger(); // pos represents a number of bytes.
     if (0.0 <= pos && pos < s.length())
     {
         u32 utf32;
         int offset = static_cast<int>(pos);
         char* next = utf8to32(s.c_str() + offset, &utf32);
         if (next)
         {
             return new NumberValue(utf32);
         }
     }
     return new NumberValue(NAN);
 }
Exemplo n.º 6
0
 // Note charAt(pos) for esjs returns a string which contains a byte
 // sequence of a valid UTF-8 character.
 // i.e., The length of the returned string can be greater than one.
 Value* charAt()
 {
     std::string s = getThis()->toString();
     double pos = getScopeChain()->get("pos")->toInteger();
     if (0.0 <= pos && pos < s.length())
     {
         u32 utf32;
         int offset = static_cast<int>(pos);
         char* next = utf8to32(s.c_str() + offset, &utf32);
         if (next)
         {
             return new StringValue(s.substr(offset, next - (s.c_str() + offset)));
         }
     }
     return new StringValue("");
 }
Exemplo n.º 7
0
    Value* toUpperCase(Value* value)
    {
        std::string result;
        const char* next = value->toString().c_str();
        while (next && *next)
        {
            char utf8[5];
            u32 utf32;

            next = utf8to32(next, &utf32);
            char* nextResult = utf32to8(utftoupper(utf32), utf8);
            if (nextResult)
            {
                *nextResult = '\0';
                result += utf8;
            }
        }
        return new StringValue(result);
    }
Exemplo n.º 8
0
inline void
utf8_to_wchar_impl< 4 >( const std::string & in , std::wstring & out )
{
    utf8to32( in.begin() , in.end() , std::back_inserter(out) );
}