Exemplos de utf8size em C++ (Cpp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

String String::Replace(const String &strNeedle, const String &strReplace) const
{
	MUTEX_LOCK(str_mutex);
	MUTEX_LOCK_NAMED(wait2, strNeedle.str_mutex);
	MUTEX_LOCK_NAMED(wait3, strReplace.str_mutex);

	String strRet("");

	// Keep a pointer to the current offset and a "previous offset"
	char* szOffset = this->str_szBuffer;
	char* szOffsetPrev = szOffset;

	do {
		// Find the offset of the needle
#ifdef SCRATCH_NO_UTF8
		szOffset = strstr(szOffset, strNeedle);
#else
		szOffset = (char*)utf8str(szOffset, strNeedle);
#endif

		// If it's found
		if (szOffset != nullptr) {
			// Append everything before the needle of the original characters to the return value
			strRet.AppendToBuffer(szOffsetPrev, szOffset - szOffsetPrev);

			// Append the replace value
			strRet.AppendToBuffer(strReplace);

			// Increase the offset pointer by the needle size
#ifdef SCRATCH_NO_UTF8
			szOffset += strlen(strNeedle);
#else
			szOffset += utf8size(strNeedle) - 1;
#endif

			// Keep track of the pointer
			szOffsetPrev = szOffset;
		} else {
			// Append the remaining part of the source string
#ifdef SCRATCH_NO_UTF8
			size_t bytes = strlen(szOffsetPrev);
#else
			size_t bytes = utf8size(szOffsetPrev) - 1;
#endif
			strRet.AppendToBuffer(szOffsetPrev, bytes);
		}
	} while (szOffset != nullptr);

	return strRet;
}

Exemplo n.º 2

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

void String::CopyToBuffer(const char* szSrc)
{
	// Validate the source string
	if (szSrc == nullptr) {
		return;
	}

#ifdef SCRATCH_NO_UTF8
	size_t iLen = strlen(szSrc);
#else
	size_t iLen = utf8size(szSrc) - 1;
#endif
	if (iLen == 0) {
		// Clean up
		if (this->str_szBuffer != String::str_szEmpty) {
			delete[] this->str_szBuffer;
		}

		// Set to empty char*
		this->str_szBuffer = String::str_szEmpty;
		return;
	}

	// Find the current size and the required size for the string.
#ifdef SCRATCH_NO_UTF8
	size_t iBufLen = strlen(this->str_szBuffer) + 1;
#else
	size_t iBufLen = utf8size(this->str_szBuffer);
#endif
	size_t iReqLen = iLen + 1;

	// Check if we need to make more room.
	if (iReqLen > iBufLen) {
		// Get rid of the previously allocated space and allocate new memory.
		if (this->str_szBuffer != String::str_szEmpty) {
			delete[] this->str_szBuffer;
		}
		this->str_szBuffer = new char[iReqLen];
	}

	// Copy data to the buffer.
	memcpy(this->str_szBuffer, szSrc, iLen);

	// Always end with a null terminator.
	this->str_szBuffer[iLen] = '\0';
}

Exemplo n.º 3

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

void String::AppendToBuffer(const char* szSrc)
{
#ifdef SCRATCH_NO_UTF8
	size_t len = strlen(szSrc);
#else
	size_t len = utf8size(szSrc) - 1;
#endif
	this->AppendToBuffer(szSrc, len);
}

Exemplo n.º 4

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

int String::Size() const
{
	MUTEX_LOCK(str_mutex);

#ifdef SCRATCH_NO_UTF8
	return (int)strlen(this->str_szBuffer);
#else
	return (int)utf8size(this->str_szBuffer) - 1;
#endif
}

Exemplo n.º 5

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

bool String::EndsWith(const String &strNeedle) const
{
	MUTEX_LOCK(str_mutex);
	MUTEX_LOCK_NAMED(wait2, strNeedle.str_mutex);

	// Get the offset
#ifdef SCRATCH_NO_UTF8
	const char* szTemp = this->str_szBuffer + strlen(this->str_szBuffer) - strlen(strNeedle);
#else
	const char* szTemp = this->str_szBuffer + (utf8size(this->str_szBuffer) - 1) - (utf8size(strNeedle) - 1);
#endif

	// Make sure the needle is found
	if (szTemp == nullptr) {
		return false;
	}

	// Then compare the offset with our needle
#ifdef SCRATCH_NO_UTF8
	return !strcmp(strNeedle, szTemp);
#else
	return !utf8cmp(strNeedle, szTemp);
#endif
}

Exemplo n.º 6

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

void String::AppendToBuffer(const s_char cSrc)
{
	// Validate source string
	if (cSrc == 0) {
		return;
	}

	// Keep track of our previous buffer pointer so we can use it later for appending.
	char* szOldBuffer = this->str_szBuffer;

	// Find the current size and the required size for the string.
#ifdef SCRATCH_NO_UTF8
	size_t iBufBytes = strlen(this->str_szBuffer);
#else
	size_t iBufBytes = utf8size(this->str_szBuffer) - 1;
#endif

	// Get ourselves a new buffer
#ifdef SCRATCH_NO_UTF8
	this->str_szBuffer = new char[iBufBytes + 2];
#else
	size_t charSize = utf8codepointsize(cSrc);
	this->str_szBuffer = new char[iBufBytes + charSize + 1];
#endif

	// Copy the buffer
	size_t iOffset = 0;

	for (size_t i = 0; i < iBufBytes; i++) {
		this->str_szBuffer[iOffset++] = szOldBuffer[i];
	}

	// Append the new character
#ifdef SCRATCH_NO_UTF8
	this->str_szBuffer[iOffset++] = cSrc;
#else
	utf8catcodepoint(this->str_szBuffer + iOffset, cSrc, charSize);
	iOffset += charSize;
#endif

	// Always end with a null terminator.
	this->str_szBuffer[iOffset] = '\0';

	// Clean up
	if (szOldBuffer != String::str_szEmpty) {
		delete[] szOldBuffer;
	}
}

Exemplo n.º 7

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

void String::AppendToBuffer(const char* szSrc, size_t iCountBytes)
{
	// Validate source string
	if (szSrc == nullptr) {
		return;
	}

	if (iCountBytes <= 0) {
		return;
	}

	// Keep track of our previous buffer pointer so we can use it later for appending.
	char* szOldBuffer = this->str_szBuffer;

	// Find the current size and the required size for the string.
#ifdef SCRATCH_NO_UTF8
	size_t iBufBytes = strlen(this->str_szBuffer);
#else
	size_t iBufBytes = utf8size(this->str_szBuffer) - 1;
#endif

	// Get ourselves a new buffer
	this->str_szBuffer = new char[iBufBytes + iCountBytes + 1];

	// Copy the buffers
	int iOffset = 0;

	for (size_t i = 0; i < iBufBytes; i++) {
		this->str_szBuffer[iOffset++] = szOldBuffer[i];
	}

	for (size_t i = 0; i < iCountBytes; i++) {
		this->str_szBuffer[iOffset++] = szSrc[i];
	}

	// Always end with a null terminator.
	this->str_szBuffer[iOffset] = '\0';

	// Clean up
	if (szOldBuffer != String::str_szEmpty) {
		delete[] szOldBuffer;
	}
}

Exemplo n.º 8

0

Exibir arquivo

Arquivo: m_nationalchars.cpp Projeto: krsnapc/inspircd

/* Conditions added */
bool lwbNickHandler::Call(const std::string& nick)
{
	if (nick.empty())
		return false;

	const char* n = nick.c_str();
	unsigned int p = 0;
	for (const char* i = n; *i; i++, p++)
	{
		/* 1. Multibyte encodings support:  */
		/* 1.1. 16bit char. areas, e.g. chinese:*/

		/* if current character is the last, we DO NOT check it against multibyte table */
		/* if there are mbtable ranges, use ONLY them. No 8bit at all */
		if (i[1] && m_additionalMB[0])
		{
			/* otherwise let's take a look at the current character and the following one */
			bool found = false;
			for(unsigned char * mb = m_additionalMB; (*mb) && (mb < m_additionalMB + sizeof(m_additionalMB)); mb += 4)
			{
				if ( (i[0] >= mb[0]) && (i[0] <= mb[1]) && (i[1] >= mb[2]) && (i[1] <= mb[3]) )
				{
					/* multibyte range character found */
					i++;
					p++;
					found = true;
					break;
				}
			}
			if (found)
				/* next char! */
				continue;
			else
				/* there are ranges, but incorrect char (8bit?) given, sorry */
				return false;
		}

		/* 2. 8bit character support */
		if (((*i >= 'A') && (*i <= '}')) || m_reverse_additional[(unsigned char)*i])
			/* "A"-"}" can occur anywhere in a nickname */
			continue;

		if ((((*i >= '0') && (*i <= '9')) || (*i == '-')) && (i > n))
			/* "0"-"9", "-" can occur anywhere BUT the first char of a nickname */
			continue;

		/* 3.1. Check against a simple UTF-8 characters enumeration */
		int cursize, cursize2, ncursize = utf8size((unsigned char *)i);
		/* do check only if current multibyte character is valid UTF-8 only */
		if (ncursize != -1)
		{
			bool found = false;
			for (unsigned char * mb = m_additionalUtf8; (utf8size(mb) != -1) && (mb < m_additionalUtf8 + sizeof(m_additionalUtf8)); mb += cursize)
			{
				cursize = utf8size(mb);
				/* Size differs? Pick the next! */
				if (cursize != ncursize)
					continue;

				if (!strncmp(i, (char *)mb, cursize))
				{
					i += cursize - 1;
					p += cursize - 1;
					found = true;
					break;
				}
			}
			if (found)
				continue;

			/* 3.2. Check against an UTF-8 ranges: <start character> and <length of the range>. */
			found = false;
			for (unsigned char * mb = m_additionalUtf8range; (utf8size(mb) != -1) && (mb < m_additionalUtf8range + sizeof(m_additionalUtf8range)); mb += cursize + 1)
			{
				cursize = utf8size(mb);
				/* Size differs (or lengthbyte is zero)? Pick the next! */
				if ((cursize != ncursize) || (!mb[cursize]))
					continue;

				unsigned char uright[5] = {0,0,0,0,0}, range = mb[cursize] - 1;
				strncpy((char* ) uright, (char *) mb, cursize);

				for (int temp = cursize - 1; (temp >= 0) && range; --temp)
				{
					/* all but the first char are 64-based */
					if (temp)
					{
						char part64 = range & 63; /* i.e. % 64 */
						/* handle carrying over */
						if (uright[temp] + part64 - 1 > 191)
						{
							uright[temp] -= 64;
							range += 64;
						}
						uright[temp] += part64;
						range >>= 6; /* divide it on a 64 */
					}
					/* the first char of UTF-8 doesn't follow the rule */
					else
					{
						uright[temp] += range;
					}
				}

				if ((strncmp(i, (char *) mb, cursize) >= 0) && (strncmp(i, (char *) uright, cursize) <= 0))
				{
					i += cursize - 1;
					p += cursize - 1;
					found = true;
					break;
				}
			}
			if (found)
				continue;

			/* 3.3. Check against an UTF-8 intervals: <start character> and <end character>. */
			found = false;
			for (unsigned char * mb = m_additionalUtf8interval; (utf8size(mb) != -1) && (utf8size(mb+utf8size(mb)) != -1)
				&& (mb < m_additionalUtf8interval + sizeof(m_additionalUtf8interval)); mb += (cursize+cursize2) )
			{
				cursize = utf8size(mb);
				cursize2= utf8size(mb+cursize);

				int minlen  = cursize  > ncursize ? ncursize : cursize;
				int minlen2 = cursize2 > ncursize ? ncursize : cursize2;

				unsigned char* uright = mb + cursize;

				if ((strncmp(i, (char *) mb, minlen) >= 0) && (strncmp(i, (char *) uright, minlen2) <= 0))
				{
					i += cursize - 1;
					p += cursize - 1;
					found = true;
					break;
				}
			}
			if (found)
				continue;
		}

Exemplo n.º 9

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

String String::InternalTrim(bool bLeft, bool bRight, s_char c) const
{
	// Copy ourselves into a new buffer
#ifdef SCRATCH_NO_UTF8
	size_t bytes = strlen(this->str_szBuffer);
#else
	size_t bytes = utf8size(this->str_szBuffer) - 1;
#endif
	char* szBuffer = new char[bytes + 1];

#ifdef SCRATCH_NO_UTF8
#if WINDOWS
	strcpy_s(szBuffer, bytes + 1, this->str_szBuffer);
#else
	strcpy(szBuffer, this->str_szBuffer);
#endif
#else
	utf8ncpy(szBuffer, this->str_szBuffer, bytes);
#endif

	// Keep a pointer to the current offset
	char* szOffset = szBuffer;

	if (bLeft) {
#ifdef SCRATCH_NO_UTF8
		// While there's a space, keep incrementing the offset
		char lc = *szOffset;
		while (lc == c && lc != '\0') {
			// This way, we'll trim all the spaces on the left
			lc = *(++szOffset);
		}
#else
		// Find the space
		int codepoint;
		void* v = utf8codepoint(szOffset, &codepoint);
		while (codepoint == c && codepoint != '\0') {
			szOffset = (char*)v;
			v = utf8codepoint(szOffset, &codepoint);
		}
#endif
	}

	if (bRight) {
		// Loop from right to left in the string
#ifdef SCRATCH_NO_UTF8
		for (int i = strlen(szOffset) - 1; i >= 0; i--) {
			// When we find a space
			if (szOffset[i] == c) {
				// Put the null terminator here to trim the right part
				szOffset[i] = '\0';
			} else {
				// Something other than a space, we can stop now
				break;
			}
		}
#else
		int codepoint;
		char* lastValid = szOffset;
		for (void* v = utf8codepoint(szOffset, &codepoint); ; v = utf8codepoint(v, &codepoint)) {
			if (codepoint == '\0') {
				break;
			}
			if (codepoint != c) {
				lastValid = (char*)v;
			}
		}
		*(char*)lastValid = '\0';
#endif
	}

	// Return
	String ret(szOffset);
	delete[] szBuffer;
	return ret;
}

Exemplo n.º 10

0

Exibir arquivo

Arquivo: String.hpp Projeto: angelog/Scratch

void String::Split(const String &strNeedle, StackArray<String> &astrResult, bool bTrimAll) const
{
	MUTEX_LOCK(str_mutex);
	MUTEX_LOCK_NAMED(wait2, strNeedle.str_mutex);

	// Keep a pointer to the current offset and a "previous offset"
	char* szOffset = str_szBuffer;
	char* szOffsetPrev = szOffset;
	int iCount = 1;

	do {
		// Find the needle from the string in the current offset pointer
#ifdef SCRATCH_NO_UTF8
		szOffset = strstr(szOffset, strNeedle);
#else
		szOffset = (char*)utf8str(szOffset, strNeedle);
#endif

		// If the needle is found
		if (szOffset != nullptr) {
			// Get the length for the string
			size_t iLen = szOffset - szOffsetPrev;

			// And get a buffer started
			char* szPart = new char[iLen + 1];

			// Copy over the characters to the part buffer
			size_t i = 0;
			for (; i < iLen; i++) {
				szPart[i] = *(szOffset - (iLen - i));
			}
			szPart[i] = '\0';

			// Add it to the return array
			astrResult.Push() = szPart;
			delete[] szPart;

			// Keep a seperate count
			iCount++;

			// Increase the offset pointer by the needle size
#ifdef SCRATCH_NO_UTF8
			szOffset += strlen(strNeedle);
#else
			szOffset += utf8size(strNeedle) - 1;
#endif

			// Keep track of the pointer
			szOffsetPrev = szOffset;
		} else {
			// Get the length for the string
#ifdef SCRATCH_NO_UTF8
			size_t iLen = strlen(szOffsetPrev);
#else
			size_t iLen = utf8size(szOffsetPrev) - 1;
#endif

			// And get a buffer started
			char* szPart = new char[iLen + 1];

			// Copy over the characters to the part buffer
			size_t i = 0;
			for (; i < iLen; i++) {
				szPart[i] = szOffsetPrev[i];
			}
			szPart[i] = '\0';

			// Add it to the return vector
			String &strAdd = astrResult.Push();
			strAdd = szPart;
			if (bTrimAll) {
				strAdd = strAdd.Trim();
			}
			delete[] szPart;
		}
	} while (szOffset != nullptr);
}