Exemple #1
0
static cell AMX_NATIVE_CALL replace_stringex(AMX *amx, cell *params)
{
	int len;
	size_t maxlength = (size_t)params[2];

	char *text = get_amxstring(amx, params[1], 0, len);
	const char *search = get_amxstring(amx, params[3], 1, len);
	const char *replace = get_amxstring(amx, params[4], 2, len);

	size_t searchLen = (params[5] == -1) ? strlen(search) : (size_t)params[5];
	size_t replaceLen = (params[6] == -1) ? strlen(replace) : (size_t)params[6];

	bool caseSensitive = params[7] ? true : false;

	if (searchLen == 0)
	{
		LogError(amx, AMX_ERR_NATIVE, "Cannot replace searches of empty strings.");
		return -1;
	}

	char *ptr = UTIL_ReplaceEx(text, maxlength + 1, search, searchLen, replace, replaceLen, caseSensitive); // + EOS

	if (ptr == NULL)
	{
		return -1;
	}

	set_amxstring(amx, params[1], ptr, maxlength);

	return ptr - text;
}
unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace)
{
	size_t searchLen = strlen(search);
	size_t replaceLen = strlen(replace);

	char *ptr = subject;
	unsigned int total = 0;
	while ((ptr = UTIL_ReplaceEx(ptr, maxlength, search, searchLen, replace, replaceLen)) != NULL)
	{
		total++;
		if (*ptr == '\0')
		{
			break;
		}
	}

	return total;
}
Exemple #3
0
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
{
	char *output = text;

	/**
	 * Retrieve all matches and store them in 
	 * mSubStrings list.
	 */
	if (MatchAll(output) == -1)
	{
		return -1;
	}

	size_t subjectLen = strlen(subject);
	size_t total = 0;
	size_t baseIndex = 0;
	size_t diffLength = 0;

	char *toReplace = new char[textMaxLen + 1];
	char *toSearch = NULL;

	/**
	 * All characters which is not matched are not copied when replacing matches.
	 * Then original text (output buffer) should be considerated as empty.
	 */
	if (flags & REGEX_FORMAT_NOCOPY)
	{
		*output = '\0';
	}
	else
	{
		/**
		 * This is used only when we do replace matches.
		 */
		toSearch  = new char[textMaxLen + 1];
	}

	/** 
	 * Loop over all matches found.
	 */
	for (size_t i = 0; i < mMatchesSubs.length(); ++i)
	{
		char *ptr = toReplace;

		size_t browsed = 0;
		size_t searchLen = 0;
		size_t length = 0;
	
		/**
		 * Build the replace string as it can contain backreference
		 * and this needs to be parsed.
		 */
		for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
		{
			unsigned int c = *s;

			/**
			 * Supported format specifiers:
			 *
			 *   $number  : Substitutes the substring matched by group number.
			 *              n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
			 *   ${name}  : Substitutes the substring matched by the named group name (a maximum of 32 characters).
			 *   $&       : Substitutes a copy of the whole match.
			 *   $`       : Substitutes all the text of the input string before the match.
			 *   $'       : Substitutes all the text of the input string after the match.
			 *   $+       : Substitutes the last group that was captured.
			 *   $_       : Substitutes the entire input string.
			 *   $$       : Substitutes a literal "$".
			 */
			if (c == '$' || c == '\\')
			{
				switch (*++s)
				{
					case '\0':
					{
						/**
						 * End of string.
						 * Copy one character.
						 */
						 *(ptr + browsed) = c;
						 break;
					}
					case '&':
					{
						/**
						 * Concatenate retrieved full match sub-string.
						 * length - 1 to overwrite EOS.
						 */
						GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
						browsed += length - 1;
						break;
					}
					case '`':
					{
						/**
						 * Concatenate part of original text up to
						 * first sub-string position.
						 */
						length = mSubStrings.at(baseIndex).start;
						memcpy(ptr + browsed, subject, length);
						browsed += length - 1;
						break;
					}
					case '\'':
					{
						/**
						 * Concatenate part of original text from
						 * last sub-string end position to EOS.
						 */
						length = mSubStrings.at(baseIndex).end;
						memcpy(ptr + browsed, subject + length, subjectLen - length);
						browsed += (subjectLen - length) - 1;
						break;
					}
					case '+':
					{
						/**
						 * Copy the last group that was captured.
						 */
						GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
						browsed += length - 1;
						break;
					}
					case '_':
					{
						/**
						 * Copy the entire input string.
						 */
						memcpy(ptr + browsed, subject, subjectLen);
						browsed += (subjectLen - 1);
						break;
					}
					case '$':
					case '\\':
					{
						/**
						 * Copy the single character $ or \.
						 */
						*(ptr + browsed) = c;
						break;
					}
					case '0': case '1':	case '2': case '3':	case '4': 
					case '5': case '6': case '7': case '8': case '9':
					case '{':
					{
						/**
						 * Checking backreference.
						 * Which can be either $n, ${n} or ${name}.
						 */
						int backref = -1;
						const char *walk = s;
						bool inBrace = false;
						bool nameCheck = false;

						/**
						 * ${nn}.
						 *  ^
						 */
						if (*walk == '{') 
						{
							inBrace = true;
							++walk;
						}

						/**
						 * Valid number.
						 * $nn or ${nn}
						 *  ^       ^
						 */
						if (*walk >= '0' && *walk <= '9')
						{
							backref = *walk - '0';
							++walk;
						}
						else if (inBrace)
						{
							nameCheck = true;

							/**
							 * Not a valid number.
							 * Checking as string.
							 * ${name}
							 *   ^
							 */
							if (*walk)
							{
								const char *pch = strchr(walk, '}');

								if (pch != NULL)
								{
									/**
									 * A named group maximum character is 32 (PCRE).
									 */
									char name[32];
									size_t nameLength = strncopy(name, walk, pch - walk + 1);

									int flags, num = 0;
									pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);

									/**
									 * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
									 * as pcre_get_stringnumber output order is not defined.
									 */
									if (flags & PCRE_DUPNAMES)
									{
										memset(ovector, 0, REGEX_MAX_SUBPATTERNS);

										/**
										 * pcre_copy_named_substring needs a vector containing sub-patterns ranges
										 * for a given match.
										 */
										for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
										{
											ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
											ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
										}

										num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);

										if (num != PCRE_ERROR_NOSUBSTRING)
										{
											browsed += num - 1;
											s = pch;
											break;
										}
										++pch;
									}
									else
									{
										/**
										 * Retrieve sub-pattern index from a give name.
										 */
										num = pcre_get_stringnumber(re, name);
										if (num != PCRE_ERROR_NOSUBSTRING)
										{
											backref = num;
											walk = ++pch;
										}
									}

									if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
									{
										/**
										 * If a sub-string for a given match is not found,  or if > to
										 * number of sub-patterns we still need to check if this 
										 * group name is a valid one because if so we want to escape it. 
										 * Looking at the name table.
										 */
										bool found = false;
										for (size_t i = 0; i < mSubsNameTable.length(); ++i)
										{
											if (!mSubsNameTable.at(i).name.compare(name))
											{
												--browsed;
												s = --pch;
												found = true;
												break;
											}
										}

										if (found)
										{
											continue;
										}
									}
								}
							}
						}

						if (!nameCheck)
						{
							/**
							 * Valid second number.
							 * $nn or ${nn}
							 *   ^       ^
							 */
							if (*walk && *walk >= '0' && *walk <= '9')
							{
								backref = backref * 10 + *walk - '0';
								++walk;
							}

							if (inBrace)
							{
								/**
								 * Invalid specifier
								 * Either hit EOS or missing }.
								 * ${n  or ${nn  or ${nx or ${nnx
								 *    ^        ^       ^        ^
								 */
								if (*walk == '\0' || *walk != '}')
								{
									backref = -1;
								}
								else
								{
									++walk;
								}
							}
						}

						length = walk - s;
						s = --walk;

						/**
						 * We can't provide a capture number >= to total that pcre_exec has found.
						 * 0 is implicitly accepted, same behavior as $&.
						 */
						if (backref >= 0 && backref < mNumSubpatterns)
						{
							/**
							 * Valid available index for a given match.
							 */
							if ((size_t)backref < mMatchesSubs.at(i))
							{
								/**
								 * Concatenate retrieved sub-string.
								 * length - 1 to overwrite EOS.
								 */
								GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
								browsed += length - 1;
							}
							else
							{
								/**
								 * Valid unavailable index for a given match.
								 */
								--browsed;
							}
						}
						else
						{
							/**
							 * If we here it means the syntax is valid but sub-pattern doesn't exist. 
							 * So, copy as it is, including $.
							 */
							memcpy(ptr + browsed, s - length, length + 1);
							browsed += length;
						}

						break;
					}
					default:
					{
						/**
						 * Not a valid format modifier.
						 * So we copy characters as it is.
						 */
						*(ptr + browsed) = *s;
						break;
					}
				}
			}
			else
			{
				/**
				 * At this point, direct copy.
				 */
				*(ptr + browsed) = c;
			}
		}

		*(ptr + browsed) = '\0';

		/**
		 * Concatenate only replace string of each match, 
		 * as we don't want to copy unmatched characters.
		 */
		if (flags & REGEX_FORMAT_NOCOPY)
		{
			/**
			 * We want just the first occurrence.
			 */
			if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
			{
				break;
			}

			strncat(output, toReplace, textMaxLen + 1);
		}
		else
		{
			/**
			 * Retrieves full string of a given match.
			 */
			const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);

			/**
			 * We get something to replace, but the sub-pattern to search is empty.
			 * We insert replacement either a the start end or string.
			 */
			if (*toReplace && !searchLen)
			{
				if (output - text > 0)
				{
					strncat(output, toReplace, textMaxLen);
				}
				else
				{
					strncat(toReplace, text, textMaxLen);
					strncopy(text, toReplace, strlen(toReplace) + 1);
				}

				++total;
			}
			else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
			{
				/**
				 * Then we simply do a replace.
				 * Probably not the most efficient, but this should be at least safe.
				 * To avoid issue where the function could find a string which is not at the expected index,
				 * We force the input string to start from index of the full match.
				 */
				++total;
			}

			if (total && (flags & REGEX_FORMAT_FIRSTONLY))
			{
				break;
			}
		}

		/**
		 * mMatchesSubs is a flat list containing all sub-patterns of all matches.
		 * A number of sub-patterns can vary per match. So we calculate the position in the list, 
		 * from where the first sub-pattern result of current match starts.
		 */
		baseIndex  += mMatchesSubs.at(i);
		diffLength += browsed - searchLen;
	}

	delete[] toReplace;
	
	if (toSearch != NULL)
	{
		delete[] toSearch;
	}

	/**
	 * Return the number of successful replacements.
	 */
	return total;
}