// . s[maxLen] should be the NULL
// . returns full length of entity @ "s" if there is a valid one, 0 otherwise
// . sets *c to the iso character the entity represents (if there is one)
// JAB: const-ness for optimizer...
int32_t getEntity_a ( char *s , int32_t maxLen , uint32_t *c ) {
	// ensure there's an & as first char
	if ( s[0] != '&' ) return 0;
	// compute maximum length of entity, if it's indeed an entity
	int32_t len = 1;
	if ( s[len]=='#' ) len++;
	// cut it off after 9 chars to save time
	while ( len < maxLen && len < 9 && is_alnum_a(s[len]) ) len++;
	// include the ending ; if any
	if ( len < maxLen && s[len]==';' ) len++;
	//	char d = s[len];
	//	s[len]='\0';
	//	fprintf(stderr,"got entity %s \n",s);
	//	s[len]=d;
	// we don't have entities longer than "&curren;"
	if ( len > 10 ) return 0;
	// all entites are 3 or more chars (&gt)
	if ( len < 3 ) return 0;
	// . if it's a numeric entity like &#123 use this routine
	// . pass in the whole she-bang: "&#12...;" or "&acute...;
	if ( s[1] == '#' ) {
		if ( s[2] == 'x' ) *c = getHexadecimalEntity (s, len );
		else               *c = getDecimalEntity     (s, len );
	}
	// otherwise, it's text
	else *c = getTextEntity ( s , len );
	// return 0 if not an entity, length of entity if it is an entity
	if ( *c ) return len;
	else      return 0;
}
// . s[maxLen] should be the NULL
// . returns full length of entity @ "s" if there is a valid one, 0 otherwise
// . sets *c to the iso character the entity represents (if there is one)
// JAB: const-ness for optimizer...
int32_t getEntity_a ( const char *s , int32_t maxLen , uint32_t *c ) {
	// ensure there's an & as first char
	if ( s[0] != '&' ) {
		return 0;
	}

	// compute maximum length of entity, if it's indeed an entity
	int32_t len = 1;
	if ( s[len] == '#' ) {
		len++;
	}

	// cut it off after 9 chars to save time
	while ( len < maxLen && len < 9 && is_alnum_a( s[len] ) ) {
		len++;
	}

	// character entity reference must end with a semicolon.
	// some browsers have lenient parsing, but we don't accept invalid
	// references.
	if ( len == maxLen || s[len] != ';' ) {
		//not a valid character entity reference
		return 0;
	}
	len++;

	// we don't have entities longer than "&curren;"
	if ( len > 10 ) {
		return 0;
	}

	// all entites are 3 or more chars (&gt)
	if ( len < 3 ) {
		return 0;
	}

	// . if it's a numeric entity like &#123 use this routine
	// . pass in the whole she-bang: "&#12...;" or "&acute...;
	if ( s[1] == '#' ) {
		if ( s[2] == 'x' ) {
			*c = getHexadecimalEntity( s, len );
		} else {
			*c = getDecimalEntity( s, len );
		}
	} else {
		// otherwise, it's text
		*c = getTextEntity( s, len );
	}

	// return 0 if not an entity, length of entity if it is an entity
	if ( *c ) {
		return len;
	} else {
		return 0;
	}
}
// . s[maxLen] should be the NULL
// . returns full length of entity @ "s" if there is a valid one, 0 otherwise
// . sets *c to the iso character the entity represents (if there is one)
// JAB: const-ness for optimizer...
int32_t getEntity_a ( const char *s, int32_t maxLen, uint32_t codepoint[2], int32_t *codepointCount, int32_t *utf8Len ) {
	//TODO: handle multi-codepoint entitites
	*utf8Len=0;

	// ensure there's an & as first char
	if ( s[0] != '&' ) {
		return 0;
	}

	// compute maximum length of entity, if it's indeed an entity
	int32_t len = 1;
	if ( s[len] == '#' ) {
		len++;
	}

	// cut it off after <32> chars to save time and also to avoid parsing
	// obscenely long incorrect entitites (eg an ampersand followed by 2MB of letters)
	while ( len < maxLen && len < max_entity_name_len && is_alnum_a( s[len] ) ) {
		len++;
	}

	// character entity reference must end with a semicolon.
	// some browsers have lenient parsing, but we don't accept invalid
	// references.
	if ( len == maxLen || s[len] != ';' ) {
		//not a valid character entity reference
		return 0;
	}
	len++;

	// we don't have entities longer than what w3c specified
	if ( len > max_entity_name_len+1 ) {
		return 0;
	}

	// all entites are 3 or more chars (&gt)
	if ( len < 3 ) {
		return 0;
	}

	// . if it's a numeric entity like &#123 use this routine
	// . pass in the whole she-bang: "&#12...;" or "&acute...;
	if ( s[1] == '#' ) {
		if ( s[2] == 'x' ) {
			codepoint[0] = getHexadecimalEntity( s, len );
			*codepointCount = 1;
		} else {
			codepoint[0] = getDecimalEntity( s, len );
			*codepointCount = 1;
		}
	} else {
		// otherwise, it's a named entity
		const Entity *entity = getTextEntity( s, len );
		if(entity) {
			memcpy(codepoint, entity->codepoint, entity->codepoints*sizeof(int32_t));
			*codepointCount = entity->codepoints;
			*utf8Len = (int32_t)entity->utf8Len;
			return len;
		} else {
			return 0; //unknown named entity
		}
	}

	// return 0 if not an entity, length of entity if it is an entity
	if ( codepoint[0] ) {
		return len;
	} else {
		return 0;
	}
}