Ejemplo n.º 1
0
STATIC char
S_grok_bslash_c(pTHX_ const char source, const bool output_warning)
{

 U8 result;

 if (! isPRINT_A(source)) {
  Perl_croak(aTHX_ "%s",
      "Character following \"\\c\" must be printable ASCII");
 }
 else if (source == '{') {
  assert(isPRINT_A(toCTRL('{')));

  /* diag_listed_as: Use "%s" instead of "%s" */
  Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", toCTRL('{'));
 }

 result = toCTRL(source);
 if (output_warning && isPRINT_A(result)) {
  U8 clearer[3];
  U8 i = 0;
  if (! isWORDCHAR(result)) {
   clearer[i++] = '\\';
  }
  clearer[i++] = result;
  clearer[i++] = '\0';

  Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
      "\"\\c%c\" is more clearly written simply as \"%s\"",
      source,
      clearer);
 }

 return result;
}
Ejemplo n.º 2
0
STATIC char
S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
{

    U8 result;

    if (utf8) {
	/* Trying to deprecate non-ASCII usages.  This construct has never
	 * worked for a utf8 variant.  So, even though are accepting non-ASCII
	 * Latin1 in 5.14, no need to make them work under utf8 */
	if (! isASCII(source)) {
	    Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII");
	}
    }

    result = toCTRL(source);
    if (! isASCII(source)) {
	    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
			    "Character following \"\\c\" must be ASCII");
    }
    else if (! isCNTRL(result) && output_warning) {
	if (source == '{') {
	    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
			    "\"\\c{\" is deprecated and is more clearly written as \";\"");
	}
	else {
	    U8 clearer[3];
	    U8 i = 0;
	    if (! isALNUM(result)) {
		clearer[i++] = '\\';
	    }
	    clearer[i++] = result;
	    clearer[i++] = '\0';

	    Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
			    "\"\\c%c\" is more clearly written simply as \"%s\"",
			    source,
			    clearer);
	}
    }

    return result;
}
Ejemplo n.º 3
0
STATIC bool
S_grok_bslash_o(pTHX_ char **s, UV *uv, const char** error_msg,
                      const bool output_warning, const bool strict,
                      const bool silence_non_portable,
                      const bool UTF)
{

/*  Documentation to be supplied when interface nailed down finally
 *  This returns FALSE if there is an error which the caller need not recover
 *  from; otherwise TRUE.  In either case the caller should look at *len [???].
 *  It guarantees that the returned codepoint, *uv, when expressed as
 *  utf8 bytes, would fit within the skipped "\o{...}" bytes.
 *  On input:
 *	s   is the address of a pointer to a NULL terminated string that begins
 *	    with 'o', and the previous character was a backslash.  At exit, *s
 *	    will be advanced to the byte just after those absorbed by this
 *	    function.  Hence the caller can continue parsing from there.  In
 *	    the case of an error, this routine has generally positioned *s to
 *	    point just to the right of the first bad spot, so that a message
 *	    that has a "<--" to mark the spot will be correctly positioned.
 *	uv  points to a UV that will hold the output value, valid only if the
 *	    return from the function is TRUE
 *      error_msg is a pointer that will be set to an internal buffer giving an
 *	    error message upon failure (the return is FALSE).  Untouched if
 *	    function succeeds
 *	output_warning says whether to output any warning messages, or suppress
 *	    them
 *	strict is true if this should fail instead of warn if there are
 *	    non-octal digits within the braces
 *      silence_non_portable is true if to suppress warnings about the code
 *          point returned being too large to fit on all platforms.
 *	UTF is true iff the string *s is encoded in UTF-8.
 */
    char* e;
    STRLEN numbers_len;
    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
                | PERL_SCAN_DISALLOW_PREFIX
                /* XXX Until the message is improved in grok_oct, handle errors
                 * ourselves */
                | PERL_SCAN_SILENT_ILLDIGIT;

#ifdef DEBUGGING
    char *start = *s - 1;
    assert(*start == '\\');
#endif

    PERL_ARGS_ASSERT_GROK_BSLASH_O;


    assert(**s == 'o');
    (*s)++;

    if (**s != '{') {
        *error_msg = "Missing braces on \\o{}";
        return FALSE;
    }

    e = strchr(*s, '}');
    if (!e) {
        (*s)++;  /* Move past the '{' */
        while (isOCTAL(**s)) { /* Position beyond the legal digits */
            (*s)++;
        }
        *error_msg = "Missing right brace on \\o{";
        return FALSE;
    }

    (*s)++;    /* Point to expected first digit (could be first byte of utf8
                  sequence if not a digit) */
    numbers_len = e - *s;
    if (numbers_len == 0) {
        (*s)++;    /* Move past the } */
        *error_msg = "Number with no digits";
        return FALSE;
    }

    if (silence_non_portable) {
        flags |= PERL_SCAN_SILENT_NON_PORTABLE;
    }

    *uv = grok_oct(*s, &numbers_len, &flags, NULL);
    /* Note that if has non-octal, will ignore everything starting with that up
     * to the '}' */

    if (numbers_len != (STRLEN) (e - *s)) {
        if (strict) {
            *s += numbers_len;
            *s += (UTF) ? UTF8SKIP(*s) : (STRLEN) 1;
            *error_msg = "Non-octal character";
            return FALSE;
        }
        else if (output_warning) {
            Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
            /* diag_listed_as: Non-octal character '%c'.  Resolved as "%s" */
                        "Non-octal character '%c'.  Resolved as \"\\o{%.*s}\"",
                        *(*s + numbers_len),
                        (int) numbers_len,
                        *s);
        }
    }

    /* Return past the '}' */
    *s = e + 1;

    /* guarantee replacing "\o{...}" with utf8 bytes fits within
     * existing space */
    assert(OFFUNISKIP(*uv) < *s - start);

    return TRUE;
}
Ejemplo n.º 4
0
STATIC bool
S_grok_bslash_o(pTHX_ const char *s,
			 UV *uv,
			 STRLEN *len,
			 const char** error_msg,
			 const bool output_warning)
{

/*  Documentation to be supplied when interface nailed down finally
 *  This returns FALSE if there is an error which the caller need not recover
 *  from; , otherwise TRUE.  In either case the caller should look at *len
 *  On input:
 *	s   points to a string that begins with 'o', and the previous character
 *	    was a backslash.
 *	uv  points to a UV that will hold the output value, valid only if the
 *	    return from the function is TRUE
 *	len on success will point to the next character in the string past the
 *		       end of this construct.
 *	    on failure, it will point to the failure
 *      error_msg is a pointer that will be set to an internal buffer giving an
 *	    error message upon failure (the return is FALSE).  Untouched if
 *	    function succeeds
 *	output_warning says whether to output any warning messages, or suppress
 *	    them
 */
    const char* e;
    STRLEN numbers_len;
    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
		| PERL_SCAN_DISALLOW_PREFIX
		/* XXX Until the message is improved in grok_oct, handle errors
		 * ourselves */
	        | PERL_SCAN_SILENT_ILLDIGIT;

    PERL_ARGS_ASSERT_GROK_BSLASH_O;


    assert(*s == 'o');
    s++;

    if (*s != '{') {
	*len = 1;	/* Move past the o */
	*error_msg = "Missing braces on \\o{}";
	return FALSE;
    }

    e = strchr(s, '}');
    if (!e) {
	*len = 2;	/* Move past the o{ */
	*error_msg = "Missing right brace on \\o{";
	return FALSE;
    }

    /* Return past the '}' no matter what is inside the braces */
    *len = e - s + 2;	/* 2 = 1 for the o + 1 for the '}' */

    s++;    /* Point to first digit */

    numbers_len = e - s;
    if (numbers_len == 0) {
	*error_msg = "Number with no digits";
	return FALSE;
    }

    *uv = NATIVE_TO_UNI(grok_oct(s, &numbers_len, &flags, NULL));
    /* Note that if has non-octal, will ignore everything starting with that up
     * to the '}' */

    if (output_warning && numbers_len != (STRLEN) (e - s)) {
	Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
	/* diag_listed_as: Non-octal character '%c'.  Resolved as "%s" */
		       "Non-octal character '%c'.  Resolved as \"\\o{%.*s}\"",
		       *(s + numbers_len),
		       (int) numbers_len,
		       s);
    }

    return TRUE;
}