示例#1
0
int main(void){
	for(char i = 'a'; i <= 'z'; ++i){
		assert(is_alpha(i));
		assert(!is_digit(i));
		assert(is_lower(i));
		assert(!is_upper(i));
		assert(to_lower(i) == i);
		assert(to_upper(i) != i);
	}
	for(char i = 'A'; i <= 'Z'; ++i){
		assert(is_alpha(i));
		assert(!is_digit(i));
		assert(!is_lower(i));
		assert(is_upper(i));
		assert(to_lower(i) != i);
		assert(to_upper(i) == i);
	}
	for(char i = '0'; i <= '9'; ++i){
		assert(!is_alpha(i));
		assert(is_digit(i));
		assert(!is_lower(i));
		assert(!is_upper(i));
		assert(to_lower(i) == i);
		assert(to_upper(i) == i);
	}
	printf("TEST SUCEEDED\n");
	return 0;
}
示例#2
0
文件: symb.cpp 项目: iley/intelib
const char *get_type(RfListItem *typed)
{
    if (!typed)
        return "*0";
    if (typed->IsChar()) {
        if (is_capital(typed->charcode))
            return "Lu";
        if (is_lower(typed->charcode))
            return "Ll";
        return "Ol";
    }
    if (typed->IsLeftMarkup())
        return "B0";
    SExpressionInt *ti;
    ti = typed->symb_val.DynamicCastGetPtr<SExpressionInt>();
    if (ti)
        return "N0";
    SExpressionFloat *tf;
    tf = typed->symb_val.DynamicCastGetPtr<SExpressionFloat>();
    if (tf)
        return "D0";
    SString repr = typed->symb_val->TextRepresentation();
    char *str = (char *)repr.c_str();
    if (is_ident(str))
        return "Wi";
    return "Wq";
}
示例#3
0
char* correct_words(char* correct_word,char* word)
{
    char ch;
    char* res = NULL;
    int i = 0;
    int count_lower = 0, count_upper = 0;
    for(int i = 0; word[i] != '\0'; i++){
        if(word[i]>=65 && word[i] < 91){
            count_upper += 1;
        }
        else
            count_lower += 1;
    }
    if(count_upper > count_lower){
        for(; word[i] != '\0'; i++){
            ch = word[i];
            if(is_lower(ch)){
                ch = to_upper(ch);
            }
            correct_word[i] = ch;
        }
    }
    else{
        for(i = 0; word[i] != '\0'; i++){
            ch = word[i];
            if(is_upper(ch)){
                ch = to_lower(ch);
            }
            correct_word[i] = ch;
        }
    }
    correct_word[i] = '\0';
    res = correct_word;
    return res;
}
示例#4
0
static int rec_shortu(cell* c, cell * cap)
{
    cell * clist[8];
    uchar let = c->vers[0].let;
    if(cap->row > c->row)        return 0; // not a cap
    if(cap->width() * 3 < c->width())          return 0; // not so wide as need
    if(cap->width() < 4 || cap->height() < 3) return 0; // just dot
    if(cap->col < c->col - 2)      return 0; // left dust
    if(c->row > cap->row + cap->height() &&
            (c->row - (cap->row + cap->height())) > c->height() / 2 ) return 0; // dust lay so high

    if((let == (uchar)'\xE3') && (c->pos_inc & erect_rot))
        // 'u' with cap
        if(c->col + c->width() / 2 < cap->col ||
                c->col + c->width() / 2 > cap->col + cap->width()) return 0;  // not centered dust

    clist[0] = c;
    clist[1] = cap;
    if(!compose_cell(2, clist, c))
        return -1; //OLEG:new return style of composed
    let = is_lower(let) ? (uchar) u_bel : (uchar) U_bel;
    c->vers[0].let = let;
    c->vers[0].prob = MIN(254, c->vers[0].prob + 2);
    c->recsource = 0; // artifact
    c->dens = 255; // undef
    c->nvers = 1;
    c->vers[1].let = c->vers[1].prob = 0;
    return 1;
}
示例#5
0
bool str_is_slot_name(const char* s, fint len) {
  assert(len >= 0, "shouldn't be negative length");
  if (len == 0) {
    return false;
  }
  char c = *s;
  if (!is_lower(c)) {
    if (!is_punct(c))     return false;
    switch (c) {
     case '^':      case '|':       case '\\':   case '.':
       if (len == 1) return false;
    }
    for (int i = 0;  i < len; ) {
      c = s[i++];
      if (! is_punct(c)) return false;
      switch (c) {
       case '(':  case ')':  case '\'':  case '\"':  case ':':  
       case '[': case ']':
        return false;
      }
    }
    return true;
  }
  for (int i = 1;  i < len;  ) {
    c = s[i++];
    if (is_id_char(c))   continue;
    if (c != ':')        return false;
    if (i == len)        return true;  // this was final ":"
    if (!is_upper(s[i])) return false; // after ":" must be uppercase
    if (s[len-1] != ':') return false; // one ":" -> last is ":"
  }
  return true;
}
示例#6
0
static void tokencat ( char c ) {
  if ( is_lower ( c ) ) {
    c = _toupper ( c );
  }
  char *eot = strchr ( g_sb_token, '\0' );
  *eot = c;
  *(eot + 1) = '\0';
}
示例#7
0
/**
 * Returns 1 if the string 's' is only made of lowercase letters,
 * according to the given alphabet, 0 otherwise.
 */
int is_sequence_of_lowercase_letters(const unichar* s,const Alphabet* alphabet) {
int i=0;
while (s[i]!='\0') {
  if (!is_lower(s[i],alphabet)) return 0;
  i++;
}
return 1;
}
示例#8
0
 bool detectCapitalUse(string word) {
     bool all_upper = true, all_lower = true, first = is_upper(word[0]);
     for(int i=1; i<word.size(); i++) {
         if (is_lower(word[i])) all_upper = false;
         if (is_upper(word[i])) all_lower = false;
     }
     return all_lower || first && all_upper; 
 }
示例#9
0
static void validate_matrices(const SparseMatrix *L, const SparseMatrix *P)
{
    EXPENSIVE_ASSERT(is_lower(L));
    EXPENSIVE_ASSERT(check_symbolic_zeros(L));
    EXPENSIVE_ASSERT(is_symmetric(P));

    assert(L->N == P->N);
    assert(P->nz == NZ_SYM(L->nz, L->N));
}
示例#10
0
/*-----------------------------------------------------------------------------
** Function:	init_type()
** Purpose:	This is the initialization routine for this file. This
**		has to be called before some of the macros in |type.h|
**		will work as described. It does no harm to call this
**		initialization more than once. It just takes some time.
**
**		Note that this function is for internal purposes
**		only. The normal user should call |init_bibtool()|
**		instead.
** Arguments:	none
** Returns:	nothing
**___________________________________________________			     */
void init_type()				   /*                        */
{ register int i;				   /*                        */
 						   /*                        */
  for ( i = 0; i < 256; ++i )			   /*                        */
  { trans_lower[i] = is_upper(i)?to_lower(i):i;	   /*                        */
    trans_upper[i] = is_lower(i)?to_upper(i):i;	   /*                        */
    trans_id[i] = i;				   /*                        */
  }						   /*                        */
}						   /*------------------------*/
示例#11
0
/**
 * @fn Вычисляет новый символ, закодированный по цезарю, с учетом зацикливания.
 */
short offset(short symbol, int counter){
    short new_symbol = symbol + counter;
    if (is_digit(symbol)){
        return  cycle(new_symbol, '0', '9');
    }
    if (is_lower(symbol)){
        return  cycle(new_symbol, 'a', 'z');
    }
    if (is_upper(symbol)){
        return  cycle(new_symbol, 'A', 'Z');
    }
    return symbol;
}
示例#12
0
// Change the case of the current character.  First check lower and then upper.  If it is not a letter, it gets returned
// unchanged.
int chcase(int ch) {

	// Translate lowercase.
	if(is_lower(ch))
		return upcase[ch];

	// Translate uppercase.
	if(is_upper(ch))
		return lowcase[ch];

	// Let the rest pass.
	return ch;
	}
示例#13
0
/*
 * Encode char if it's a letter, or return original
 */
char encode(char src, int key)
{
    if (is_upper(src))
    {
        return rotate(src, 'A', key, 26);
    }
    if (is_lower(src))
    {
        return rotate(src, 'a', key, 26);
    }
    else
    {
        return src;
    }
}
示例#14
0
int minimumNumber(int n, std::string p) {
    int t[4] = {0};
    for (std::size_t i = 0; i < p.size(); ++i)
        if (is_upper(p[i]))
            t[0] = 1;
        else if (is_lower(p[i]))
            t[1] = 1;
        else if (is_digit(p[i]))
            t[2] = 1;
        else if (is_special(p[i]))
            t[3] = 1;

    int s = 0;
    for (int i = 0; i < 4; ++i)
        s += t[i];
    if (n < 6)
        return std::max(6 - n, 4 - s);
    else
        return 4 - s;
}
示例#15
0
文件: iot.cpp 项目: nctan/quneiform
int16_t rec_ii(cell* c,cell * cap)
{
    cell *clist[8];
    uchar let;
    let = c->vers[0].let;
    if( cap->row > c->row )        return 0; // not a cap
    if( cap->w*3 < c->w )          return 0; // not so wide as need
    if( cap->w < 4 || cap->h < 3 ) return 0; // just dot
    if( cap->col < c->col-2 )      return 0; // left dust
    if(c->row > cap->row+cap->h &&
            (c->row-(cap->row+cap->h)) > c->h/2 ) return 0; // dust lay so high
    if( let != r_cu_u || (let == r_cu_u&&(c->pos_inc&erect_rot)) )
        // 'u' with cap
        if( c->col+c->w/2 < cap->col ||
                c->col+c->w/2 > cap->col+cap->w) return 0;  // not centered dust
    if( let == r_cu_u)  // 'u' with cap
        if( c->col+c->w < cap->col ||
                c->col      > cap->col+cap->w) return 0;  // not centered dust
    if(0&&!p2_active)  // OLEG
        if( let==r_cu_u || let==(uchar)'\xa8' /* и */ )
        {
            B_LINES bl;
            get_b_lines(c,&bl);
            if( cap->row+cap->h<=bl.b1+1 )
                return 0;
        }

    clist[0]=c;
    clist[1]=cap;
    if( !compose_cell(2,clist,c) )
        return -1; //OLEG:new return style of composed
    let = is_lower(let) ? (uchar)'\xa9' /* й */ : (uchar)'\x89' /* Й */;
    c->vers[0].let = let;
    c->vers[0].prob=MIN(254,c->vers[0].prob+2);
    c->recsource = 0; // artifact
    c->dens = 255; // undef
    c->nvers=1;
    c->vers[1].let=c->vers[1].prob=0;
    return 1;
}
示例#16
0
文件: match.c 项目: sylware/lwl
//input: *c=='[' **pc==':'
static u16 bracket_class(u8 *c,u8 **pc,u8 **sc,u8 not,u8 sc_folded)
{
  u8 char_class[CHAR_CLASS_MAX+1];//don't forget the 0 terminating char

  u16 r=bracket_char_class_get(c,pc,not,sc_folded,&char_class[0]);
  if(r!=OK) return r;

  if((STREQ(char_class,"alnum")&&is_alnum(**sc))
     ||(STREQ(char_class,"alpha")&&is_alpha(**sc))
     ||(STREQ(char_class,"blank")&&is_blank(**sc))
     ||(STREQ(char_class,"cntrl")&&is_cntrl(**sc))
     ||(STREQ(char_class,"digit")&&is_digit(**sc))
     ||(STREQ(char_class,"graph")&&is_graph(**sc))
     ||(STREQ(char_class,"lower")&&is_lower(**sc))
     ||(STREQ(char_class,"print")&&is_print(**sc))
     ||(STREQ(char_class,"punct")&&is_punct(**sc))
     ||(STREQ(char_class,"space")&&is_space(**sc))
     ||(STREQ(char_class,"upper")&&is_upper(**sc))
     ||(STREQ(char_class,"xdigit")&&is_xdigit(**sc)))
    return bracket_matched(c,pc,not);
  *c=*(*pc)++;
  return OK;
}
示例#17
0
unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
{
	unsigned long result = 0,value;

	if (!base) {
		base = 10;
		if (*cp == '0') {
			base = 8;
			cp++;
			if ((*cp == 'x') && is_xdigit(cp[1])) {
				cp++;
				base = 16;
			}
		}
	}
	while (is_xdigit(*cp) && (value = is_digit(*cp) ? *cp-'0' : (is_lower(*cp)
	    ? toupper(*cp) : *cp)-'A'+10) < base) {
		result = result*base + value;
		cp++;
	}
	if (endp)
		*endp = (char *)cp;
	return result;
}
示例#18
0
 bool is_alpha(char ch) {
     return is_lower(ch) || is_upper(ch);
 }
示例#19
0
// Is a character a letter?  We presume a letter must be either in the upper or lower case tables (even if it gets
// translated to itself).
bool isletter(int ch) {

	return is_upper(ch) || is_lower(ch);
	}
bool SkFontConfigInterfaceDirect::matchFamilySet(const char inFamilyName[],
                                                 SkString* outFamilyName,
                                                 SkTArray<FontIdentity>* ids) {
    SkAutoMutexAcquire ac(mutex_);

#if 0
    SkString familyStr(familyName ? familyName : "");
    if (familyStr.size() > kMaxFontFamilyLength) {
        return false;
    }

    SkAutoMutexAcquire ac(mutex_);

    FcPattern* pattern = FcPatternCreate();

    if (familyName) {
        FcPatternAddString(pattern, FC_FAMILY, (FcChar8*)familyName);
    }
    FcPatternAddBool(pattern, FC_SCALABLE, FcTrue);

    FcConfigSubstitute(NULL, pattern, FcMatchPattern);
    FcDefaultSubstitute(pattern);

    // Font matching:
    // CSS often specifies a fallback list of families:
    //    font-family: a, b, c, serif;
    // However, fontconfig will always do its best to find *a* font when asked
    // for something so we need a way to tell if the match which it has found is
    // "good enough" for us. Otherwise, we can return NULL which gets piped up
    // and lets WebKit know to try the next CSS family name. However, fontconfig
    // configs allow substitutions (mapping "Arial -> Helvetica" etc) and we
    // wish to support that.
    //
    // Thus, if a specific family is requested we set @family_requested. Then we
    // record two strings: the family name after config processing and the
    // family name after resolving. If the two are equal, it's a good match.
    //
    // So consider the case where a user has mapped Arial to Helvetica in their
    // config.
    //    requested family: "Arial"
    //    post_config_family: "Helvetica"
    //    post_match_family: "Helvetica"
    //      -> good match
    //
    // and for a missing font:
    //    requested family: "Monaco"
    //    post_config_family: "Monaco"
    //    post_match_family: "Times New Roman"
    //      -> BAD match
    //
    // However, we special-case fallback fonts; see IsFallbackFontAllowed().

    const char* post_config_family = get_name(pattern, FC_FAMILY);

    FcResult result;
    FcFontSet* font_set = FcFontSort(0, pattern, 0, 0, &result);
    if (!font_set) {
        FcPatternDestroy(pattern);
        return false;
    }

    FcPattern* match = MatchFont(font_set, post_config_family, familyStr);
    if (!match) {
        FcPatternDestroy(pattern);
        FcFontSetDestroy(font_set);
        return false;
    }

    FcPatternDestroy(pattern);

    // From here out we just extract our results from 'match'

    if (FcPatternGetString(match, FC_FAMILY, 0, &post_config_family) != FcResultMatch) {
        FcFontSetDestroy(font_set);
        return false;
    }

    FcChar8* c_filename;
    if (FcPatternGetString(match, FC_FILE, 0, &c_filename) != FcResultMatch) {
        FcFontSetDestroy(font_set);
        return false;
    }

    int face_index;
    if (FcPatternGetInteger(match, FC_INDEX, 0, &face_index) != FcResultMatch) {
        FcFontSetDestroy(font_set);
        return false;
    }

    FcFontSetDestroy(font_set);

    if (outIdentity) {
        outIdentity->fTTCIndex = face_index;
        outIdentity->fString.set((const char*)c_filename);
    }
    if (outFamilyName) {
        outFamilyName->set((const char*)post_config_family);
    }
    if (outStyle) {
        *outStyle = GetFontStyle(match);
    }
    return true;

////////////////////

        int count;
        FcPattern** match = MatchFont(font_set, post_config_family, &count);
        if (!match) {
            FcPatternDestroy(pattern);
            FcFontSetDestroy(font_set);
            return NULL;
        }

        FcPatternDestroy(pattern);

        SkTDArray<FcPattern*> trimmedMatches;
        for (int i = 0; i < count; ++i) {
            const char* justName = find_just_name(get_name(match[i], FC_FILE));
            if (!is_lower(*justName)) {
                *trimmedMatches.append() = match[i];
            }
        }

        SkFontStyleSet_FC* sset = new SkFontStyleSet_FC                                               (trimmedMatches.begin(),                                               trimmedMatches.count());
#endif
    return false;
}
示例#21
0
/**
 * Returns a control byte that represents the characteristics of the given token.
 */
unsigned char get_control_byte(const unichar* token,const Alphabet* alph,struct string_hash* err,TokenizationPolicy tokenization_policy) {
    int i;
    int tmp;
    unsigned char c=0;
    if (token==NULL || token[0]=='\0') {
        fatal_error("NULL or empty token in get_control_byte\n");
    }
    /* We consider that a token starting with a letter is a word */
    if (is_letter(token[0],alph)) {
        set_bit_mask(&c,MOT_TOKEN_BIT_MASK);
        /* If a token is a word, we check if it is in the 'err' word list
         * in order to answer the question <!DIC>. We perform this test in order
         * to avoid taking "priori" as an unknown word if the compound "a priori"
         * is in the text. */
        if (err!=NULL && get_value_index(token,err,DONT_INSERT)!=-1) {
            set_bit_mask(&c,NOT_DIC_TOKEN_BIT_MASK);
        }
        if (is_upper(token[0],alph)) {
            set_bit_mask(&c,PRE_TOKEN_BIT_MASK);
            i=0;
            tmp=0;
            while (token[i]!='\0') {
                if (is_lower(token[i],alph)) {
                    tmp=1;
                    break;
                }
                i++;
            }
            if (!tmp) {
                set_bit_mask(&c,MAJ_TOKEN_BIT_MASK);
            }
            return c;
        }
        i=0;
        tmp=0;
        while (token[i]!='\0') {
            if (is_upper(token[i],alph)) {
                tmp=1;
                break;
            }
            i++;
        }
        if (!tmp) {
            set_bit_mask(&c,MIN_TOKEN_BIT_MASK);
        }
        return c;
    }
    /* If the token doesn't start with a letter, we start with
     * checking if it is a tag like {today,.ADV} */
    if (token[0]=='{' && u_strcmp(token,"{S}") && u_strcmp(token,"{STOP}")) {
        /* Anyway, such a tag is classed as verifying <MOT> and <DIC> */
        set_bit_mask(&c,MOT_TOKEN_BIT_MASK|DIC_TOKEN_BIT_MASK|TDIC_TOKEN_BIT_MASK);
        struct dela_entry* temp=tokenize_tag_token(token);
        if (is_upper(temp->inflected[0],alph)) {
            set_bit_mask(&c,PRE_TOKEN_BIT_MASK);
            i=0;
            tmp=0;
            while (temp->inflected[i]!='\0') {
                if (is_letter(temp->inflected[i],alph) && is_lower(temp->inflected[i],alph)) {
                    tmp=1;
                    break;
                }
                i++;
            }
            if (!tmp) {
                set_bit_mask(&c,MAJ_TOKEN_BIT_MASK);
            }
        }
        else {
            i=0;
            tmp=0;
            while (temp->inflected[i]!='\0') {
                if (is_letter(temp->inflected[i],alph) && is_upper(temp->inflected[i],alph)) {
                    tmp=1;
                    break;
                }
                i++;
            }
            if (!tmp) {
                set_bit_mask(&c,MIN_TOKEN_BIT_MASK);
            }
        }
        if (!is_a_simple_word(temp->inflected,tokenization_policy,alph)) {
            /* If the tag is a compound word, we say that it verifies the <CDIC> pattern */
            set_bit_mask(&c,CDIC_TOKEN_BIT_MASK);
        }
        free_dela_entry(temp);
    }
    return c;
}
示例#22
0
/**
 * Takes a given unicode string 'dest' and
 * replaces any lowercase letter by the set made of itself and
 * its uppercase equivalent, surrounded with square brackets if
 * the letter was not already between square brackets.
 * Examples:
 *
 * "For" => "F[oO][rR]"
 * "F[ao]r" => "F[aAoO][rR]"
 *
 * The output is stored in 'src'. The function assumes that 'src' is
 * wide enough.
 *
 * This function is used for morphological filter regular expressions.
 */
void replace_letter_by_letter_set(const Alphabet* a,unichar* dest,const unichar* src) {
int i=0,j=0;
char inside_a_set=0;
while (src[i]!='\0') {
   switch (src[i]) {
      case '\\':
         if (src[i+1]=='\0') {
             // there is nothing after a backslash, then we stop,
             // and the RE compiler may indicate an error
             dest[j++] = src[i++];
             dest[j] = src[i];
             return;
         }
         if (is_lower(src[i+1],a)) {
             // this is a lowercase letter in Unitex alphabet :
             // we don't need "\" and we make expansion "[eE]"
             ++i;
             if (!inside_a_set) dest[j++]='[';
             dest[j++]=src[i];
             if (a==NULL) {
                /* If there is no alphabet file, we just consider the unique
                 * uppercase variant of the letter */
                dest[j++]=u_toupper(src[i]);
             } else {
			 unichar* tbrowse = NULL;
			 int i_pos_in_array_of_string = a->pos_in_represent_list[src[i]];
			 if (i_pos_in_array_of_string != 0)
				 tbrowse = a->t_array_collection[i_pos_in_array_of_string];
			 if (tbrowse != NULL)
				 while ((*tbrowse) != '\0') {
					 dest[j++]=*(tbrowse++);
				 }
             }
             if (!inside_a_set) dest[j++]=']';
             i++;
          } else {
             // others cases :
             // we keep the "\" and the letter
             dest[j++] = src[i++];
             dest[j++] = src[i++];
          }
          break;
       case '[':
          dest[j++]=src[i++];
          inside_a_set=1;
          break;
       case ']':
          dest[j++]=src[i++];
          inside_a_set=0;
          break;
       case '.': case '*': case '+': case '?': case '|': case '^': case '$':
       case ':': case '(': case ')': case '{': case '}': case '1': case '2':
       case '3': case '4': case '5': case '6': case '7': case '8': case '9':
          dest[j++]=src[i++];
          break;
       default:
          if (is_lower(src[i],a)) {
             if (!inside_a_set) dest[j++]='[';
             dest[j++]=src[i];
             if (inside_a_set && src[i+1]=='-') {
            	 /* Special case:
            	  * if we had [a-d], we don't want to turn it into
            	  * [aA-dD], but rather into [a-dA-D]. In such a case,
            	  * we just use u_toupper
            	  */
            	 i=i+2;
            	 dest[j++]='-';
            	 dest[j++]=src[i++];
            	 dest[j++]=u_toupper(dest[i-3]);
            	 dest[j++]='-';
            	 dest[j++]=u_toupper(src[i-1]);
            	 continue;
             }

             if (a==NULL) {
                /* If there is no alphabet file, we just consider the unique
                 * uppercase variant of the letter */
                dest[j++]=u_toupper(src[i]);
             } else {
                /* If there is an alphabet file, we use it */
                unichar* tbrowse = NULL;
                int i_pos_in_array_of_string = a->pos_in_represent_list[src[i]];
                if (i_pos_in_array_of_string != 0) {
                   tbrowse = a->t_array_collection[i_pos_in_array_of_string];
                }
                if (tbrowse != NULL) {
                   while ((*tbrowse) != '\0') {
                      dest[j++]=*(tbrowse++);
                   }
                }
             }
             if (!inside_a_set) dest[j++]=']';
             i++;
         }
          else {
             /* Not a lower case letter */
             dest[j++]=src[i++];
          }
   }
}
dest[j]='\0';
}
示例#23
0
inline fint asnum(fint c) {
  return is_digit(c) ? c - '0' : is_lower(c) ? c - 'a' + 10 : c - 'A' + 10;
}
bool SkParsePath::FromSVGString(const char data[], SkPath* result) {
    SkPath path;
    SkPoint f = {0, 0};
    SkPoint c = {0, 0};
    SkPoint lastc = {0, 0};
    SkPoint points[3];
    char op = '\0';
    char previousOp = '\0';
    bool relative = false;
    for (;;) {
        data = skip_ws(data);
        if (data[0] == '\0') {
            break;
        }
        char ch = data[0];
        if (is_digit(ch) || ch == '-' || ch == '+') {
            if (op == '\0') {
                return false;
            }
        } else {
            op = ch;
            relative = false;
            if (is_lower(op)) {
                op = (char) to_upper(op);
                relative = true;
            }
            data++;
            data = skip_sep(data);
        }
        switch (op) {
            case 'M':
                data = find_points(data, points, 1, relative, &c);
                path.moveTo(points[0]);
                op = 'L';
                c = points[0];
                break;
            case 'L':
                data = find_points(data, points, 1, relative, &c);
                path.lineTo(points[0]);
                c = points[0];
                break;
            case 'H': {
                SkScalar x;
                data = find_scalar(data, &x, relative, c.fX);
                path.lineTo(x, c.fY);
                c.fX = x;
            } break;
            case 'V': {
                SkScalar y;
                data = find_scalar(data, &y, relative, c.fY);
                path.lineTo(c.fX, y);
                c.fY = y;
            } break;
            case 'C':
                data = find_points(data, points, 3, relative, &c);
                goto cubicCommon;
            case 'S':
                data = find_points(data, &points[1], 2, relative, &c);
                points[0] = c;
                if (previousOp == 'C' || previousOp == 'S') {
                    points[0].fX -= lastc.fX - c.fX;
                    points[0].fY -= lastc.fY - c.fY;
                }
            cubicCommon:
                path.cubicTo(points[0], points[1], points[2]);
                lastc = points[1];
                c = points[2];
                break;
            case 'Q':  // Quadratic Bezier Curve
                data = find_points(data, points, 2, relative, &c);
                goto quadraticCommon;
            case 'T':
                data = find_points(data, &points[1], 1, relative, &c);
                points[0] = points[1];
                if (previousOp == 'Q' || previousOp == 'T') {
                    points[0].fX = c.fX * 2 - lastc.fX;
                    points[0].fY = c.fY * 2 - lastc.fY;
                }
            quadraticCommon:
                path.quadTo(points[0], points[1]);
                lastc = points[0];
                c = points[1];
                break;
            case 'Z':
                path.close();
#if 0   // !!! still a bug?
                if (fPath.isEmpty() && (f.fX != 0 || f.fY != 0)) {
                    c.fX -= SkScalar.Epsilon;   // !!! enough?
                    fPath.moveTo(c);
                    fPath.lineTo(f);
                    fPath.close();
                }
#endif
                c = f;
                op = '\0';
                break;
            case '~': {
                SkPoint args[2];
                data = find_points(data, args, 2, false, NULL);
                path.moveTo(args[0].fX, args[0].fY);
                path.lineTo(args[1].fX, args[1].fY);
            } break;
            default:
                return false;
        }
        if (previousOp == 0) {
            f = c;
        }
        previousOp = op;
    }
    // we're good, go ahead and swap in the result
    result->swap(path);
    return true;
}
示例#25
0
unsigned int is_letter(unsigned char ch) {
	return (is_upper(ch) || is_lower(ch));
}
示例#26
0
static void
output_tables (const char *filename, const char *version)
{
  FILE *stream;
  unsigned int ch;

  stream = fopen (filename, "w");
  if (stream == NULL)
    {
      fprintf (stderr, "cannot open '%s' for writing\n", filename);
      exit (1);
    }

  fprintf (stream, "escape_char /\n");
  fprintf (stream, "comment_char %%\n");
  fprintf (stream, "\n");
  fprintf (stream, "%% Generated automatically by gen-unicode-ctype for Unicode %s.\n",
	   version);
  fprintf (stream, "\n");

  fprintf (stream, "LC_IDENTIFICATION\n");
  fprintf (stream, "title     \"Unicode %s FDCC-set\"\n", version);
  fprintf (stream, "source    \"UnicodeData.txt, PropList.txt\"\n");
  fprintf (stream, "address   \"\"\n");
  fprintf (stream, "contact   \"\"\n");
  fprintf (stream, "email     \"[email protected]\"\n");
  fprintf (stream, "tel       \"\"\n");
  fprintf (stream, "fax       \"\"\n");
  fprintf (stream, "language  \"\"\n");
  fprintf (stream, "territory \"Earth\"\n");
  fprintf (stream, "revision  \"%s\"\n", version);
  {
    time_t now;
    char date[11];
    now = time (NULL);
    strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
    fprintf (stream, "date      \"%s\"\n", date);
  }
  fprintf (stream, "category  \"unicode:2001\";LC_CTYPE\n");
  fprintf (stream, "END LC_IDENTIFICATION\n");
  fprintf (stream, "\n");

  /* Verifications. */
  for (ch = 0; ch < 0x110000; ch++)
    {
      /* toupper restriction: "Only characters specified for the keywords
	 lower and upper shall be specified.  */
      if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
	fprintf (stderr,
		 "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n",
		 ucs_symbol (ch), ch, to_upper (ch));

      /* tolower restriction: "Only characters specified for the keywords
	 lower and upper shall be specified.  */
      if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
	fprintf (stderr,
		 "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n",
		 ucs_symbol (ch), ch, to_lower (ch));

      /* alpha restriction: "Characters classified as either upper or lower
	 shall automatically belong to this class.  */
      if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
	fprintf (stderr, "%s is upper|lower but not alpha\n", ucs_symbol (ch));

      /* alpha restriction: "No character specified for the keywords cntrl,
	 digit, punct or space shall be specified."  */
      if (is_alpha (ch) && is_cntrl (ch))
	fprintf (stderr, "%s is alpha and cntrl\n", ucs_symbol (ch));
      if (is_alpha (ch) && is_digit (ch))
	fprintf (stderr, "%s is alpha and digit\n", ucs_symbol (ch));
      if (is_alpha (ch) && is_punct (ch))
	fprintf (stderr, "%s is alpha and punct\n", ucs_symbol (ch));
      if (is_alpha (ch) && is_space (ch))
	fprintf (stderr, "%s is alpha and space\n", ucs_symbol (ch));

      /* space restriction: "No character specified for the keywords upper,
	 lower, alpha, digit, graph or xdigit shall be specified."
	 upper, lower, alpha already checked above.  */
      if (is_space (ch) && is_digit (ch))
	fprintf (stderr, "%s is space and digit\n", ucs_symbol (ch));
      if (is_space (ch) && is_graph (ch))
	fprintf (stderr, "%s is space and graph\n", ucs_symbol (ch));
      if (is_space (ch) && is_xdigit (ch))
	fprintf (stderr, "%s is space and xdigit\n", ucs_symbol (ch));

      /* cntrl restriction: "No character specified for the keywords upper,
	 lower, alpha, digit, punct, graph, print or xdigit shall be
	 specified."  upper, lower, alpha already checked above.  */
      if (is_cntrl (ch) && is_digit (ch))
	fprintf (stderr, "%s is cntrl and digit\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_punct (ch))
	fprintf (stderr, "%s is cntrl and punct\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_graph (ch))
	fprintf (stderr, "%s is cntrl and graph\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_print (ch))
	fprintf (stderr, "%s is cntrl and print\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_xdigit (ch))
	fprintf (stderr, "%s is cntrl and xdigit\n", ucs_symbol (ch));

      /* punct restriction: "No character specified for the keywords upper,
	 lower, alpha, digit, cntrl, xdigit or as the <space> character shall
	 be specified."  upper, lower, alpha, cntrl already checked above.  */
      if (is_punct (ch) && is_digit (ch))
	fprintf (stderr, "%s is punct and digit\n", ucs_symbol (ch));
      if (is_punct (ch) && is_xdigit (ch))
	fprintf (stderr, "%s is punct and xdigit\n", ucs_symbol (ch));
      if (is_punct (ch) && (ch == 0x0020))
	fprintf (stderr, "%s is punct\n", ucs_symbol (ch));

      /* graph restriction: "No character specified for the keyword cntrl
	 shall be specified."  Already checked above.  */

      /* print restriction: "No character specified for the keyword cntrl
	 shall be specified."  Already checked above.  */

      /* graph - print relation: differ only in the <space> character.
	 How is this possible if there are more than one space character?!
	 I think susv2/xbd/locale.html should speak of "space characters",
	 not "space character".  */
      if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
	fprintf (stderr,
		 "%s is print but not graph|<space>\n", ucs_symbol (ch));
      if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
	fprintf (stderr,
		 "%s is graph|<space> but not print\n", ucs_symbol (ch));
    }

  fprintf (stream, "LC_CTYPE\n");
  output_charclass (stream, "upper", is_upper);
  output_charclass (stream, "lower", is_lower);
  output_charclass (stream, "alpha", is_alpha);
  output_charclass (stream, "digit", is_digit);
  output_charclass (stream, "outdigit", is_outdigit);
  output_charclass (stream, "blank", is_blank);
  output_charclass (stream, "space", is_space);
  output_charclass (stream, "cntrl", is_cntrl);
  output_charclass (stream, "punct", is_punct);
  output_charclass (stream, "xdigit", is_xdigit);
  output_charclass (stream, "graph", is_graph);
  output_charclass (stream, "print", is_print);
  output_charclass (stream, "class \"combining\";", is_combining);
  output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
  output_charmap (stream, "toupper", to_upper);
  output_charmap (stream, "tolower", to_lower);
  output_charmap (stream, "map \"totitle\";", to_title);
  output_widthmap (stream);
  fprintf (stream, "END LC_CTYPE\n");

  if (ferror (stream) || fclose (stream))
    {
      fprintf (stderr, "error writing to '%s'\n", filename);
      exit (1);
    }
}
示例#27
0
/*
 *  Returns the upper case of the character
 */
char to_upper(char c) {
	if (is_lower(c)) {
		c = c + 'A' - 'a';
	}
	return c;
}
示例#28
0
char to_upper(char x){ return x - (is_lower(x) << 5); }
示例#29
0
static bool is_identifier_first(char c)
{
    return is_upper(c) || is_lower(c) || c == '_';
}
示例#30
0
void scan_graph(int n_graph,         // number of current graph
                     int e,          // number of current state
                     int pos,        //
                     int depth,
                     struct parsing_info** liste_arrivee,
                     unichar* mot_token_buffer,
                     struct fst2txt_parameters* p,Abstract_allocator prv_alloc_recycle) {
Fst2State etat_courant=p->fst2->states[e];
if (depth > MAX_DEPTH) {

  error(  "\n"
          "Maximal stack size reached in graph %i!\n"
          "Recognized more than %i tokens starting from:\n"
          "  ",
          n_graph, MAX_DEPTH);
  for (int i=0; i<60; i++) {
    error("%S",p->buffer[p->current_origin+i]);
  }
  error("\nSkipping match at this position, trying from next token!\n");
  p->output[0] = '\0';  // clear output
  p->input_length = 0; // reset taille_entree
  empty(p->stack);    // clear output stack
  if (liste_arrivee != NULL) {
    while (*liste_arrivee != NULL) { // free list of subgraph matches
      struct parsing_info* la_tmp=*liste_arrivee;
      *liste_arrivee=(*liste_arrivee)->next;
      la_tmp->next=NULL; // to don't free the next item
      free_parsing_info(la_tmp, prv_alloc_recycle);
    }
  }
  return;
  //  exit(1); // don't exit, try at next position
}
depth++;

if (is_final_state(etat_courant)) {
   // if we are in a final state
  p->stack->stack[p->stack->stack_pointer+1]='\0';
  if (n_graph == 0) { // in main graph
    if (pos>=p->input_length/*sommet>u_strlen(output)*/) {
      // and if the recognized input is longer than the current one, it replaces it
      u_strcpy(p->output,p->stack->stack);
      p->input_length=(pos);
    }
  } else { // in a subgraph
    (*liste_arrivee)=insert_if_absent(pos,-1,-1,(*liste_arrivee),p->stack->stack_pointer+1,
                                      p->stack->stack,p->variables,NULL,NULL,-1,-1,NULL,-1, prv_alloc_recycle);
  }
}

if (pos+p->current_origin==p->text_buffer->size) {
   // if we are at the end of the text, we return
   return;
}

int SOMMET=p->stack->stack_pointer+1;
int pos2;

/* If there are some letter sequence transitions like %hello, we process them */
if (p->token_tree[e]->transition_array!=NULL) {
   if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
   /* we don't keep this line because of problems occur in sentence tokenizing
    * if the return sequence is defautly considered as a separator like space
    else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
    */
   else pos2=pos;
   int position=0;
   unichar *token=mot_token_buffer;
   if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION
       || (is_letter(p->buffer[pos2+p->current_origin],p->alphabet) && (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet)))) {
      /* If we are in character by character mode */
      while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
         token[position++]=p->buffer[(pos2++)+p->current_origin];
         if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION) {
            break;
         }
      }
      token[position]='\0';
      if (position!=0 &&
          (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || !(is_letter(token[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)))) {
       // we proceed only if we have exactly read the contenu sequence
       // in both modes MERGE and REPLACE, we process the transduction if any
       int SOMMET2=p->stack->stack_pointer;
       Transition* RES=get_matching_tags(token,p->token_tree[e],p->alphabet);
       Transition* TMP;
       unichar* mot_token_new_recurse_buffer=NULL;
       if (RES!=NULL) {
          // we allocate a new mot_token_buffer for the scan_graph recursin because we need preserve current
          // token=mot_token_buffer
          mot_token_new_recurse_buffer=(unichar*)malloc(MOT_BUFFER_TOKEN_SIZE*sizeof(unichar));
          if (mot_token_new_recurse_buffer==NULL) {
            fatal_alloc_error("scan_graph");
          }
       }
       while (RES!=NULL) {
           p->stack->stack_pointer=SOMMET2;
          Fst2Tag etiq=p->fst2->tags[RES->tag_number];
          traiter_transduction(p,etiq->output);
          int longueur=u_strlen(etiq->input);
          unichar C=token[longueur];
          token[longueur]='\0';
          if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
             // if we are in MERGE mode, we add to ouput the char we have read
             push_input_string(p->stack,token,0);
          }
          token[longueur]=C;
          scan_graph(n_graph,RES->state_number,pos2-(position-longueur),depth,liste_arrivee,mot_token_new_recurse_buffer,p);
          TMP=RES;
          RES=RES->next;
          free(TMP);
       }
       if (mot_token_new_recurse_buffer!=NULL) {
         free(mot_token_new_recurse_buffer);
       }
   }
}
}

Transition* t=etat_courant->transitions;
while (t!=NULL) {
    p->stack->stack_pointer=SOMMET-1;
      // we process the transition of the current state
      int n_etiq=t->tag_number;
      if (n_etiq<0) {
         // case of a sub-graph
         struct parsing_info* liste=NULL;
         unichar* pile_old;
         p->stack->stack[p->stack->stack_pointer+1]='\0';
         pile_old = u_strdup(p->stack->stack);
         scan_graph((((unsigned)n_etiq)-1),p->fst2->initial_states[-n_etiq],pos,depth,&liste,mot_token_buffer,p);
         while (liste!=NULL) {
            p->stack->stack_pointer=liste->stack_pointer-1;
            u_strcpy(p->stack->stack,liste->stack);
            scan_graph(n_graph,t->state_number,liste->position,depth,liste_arrivee,mot_token_buffer,p);
            struct parsing_info* l_tmp=liste;
            liste=liste->next;
            l_tmp->next=NULL; // to don't free the next item
            free_parsing_info(l_tmp, prv_alloc_recycle);
         }
         u_strcpy(p->stack->stack,pile_old);
         free(pile_old);
         p->stack->stack_pointer=SOMMET-1;
      }
      else {
         // case of a normal tag
         Fst2Tag etiq=p->fst2->tags[n_etiq];
         unichar* contenu=etiq->input;
         int contenu_len_possible_match=u_len_possible_match(contenu);
         if (etiq->type==BEGIN_OUTPUT_VAR_TAG) {
        	 fatal_error("Unsupported $|XXX( tags in Fst2Txt\n");
         }
         if (etiq->type==END_OUTPUT_VAR_TAG) {
           	 fatal_error("Unsupported $|XXX) tags in Fst2Txt\n");
         }
         if (etiq->type==BEGIN_VAR_TAG) {
            // case of a $a( variable tag
            //int old;
            struct transduction_variable* L=get_transduction_variable(p->variables,etiq->variable);
            if (L==NULL) {
               fatal_error("Unknown variable: %S\n",etiq->variable);
            }
            //old=L->start;
            if (p->buffer[pos+p->current_origin]==' ' && pos+p->current_origin+1<p->text_buffer->size) {
               pos2=pos+1;
               if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');
            }
            //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
            else pos2=pos;
            L->start_in_tokens=pos2;
            scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
            //L->start=old;
         }
         else if (etiq->type==END_VAR_TAG) {
              // case of a $a) variable tag
              //int old;
              struct transduction_variable* L=get_transduction_variable(p->variables,etiq->variable);
              if (L==NULL) {
                 fatal_error("Unknown variable: %S\n",etiq->variable);
              }
              //old=L->end;
              if (pos>0)
                L->end_in_tokens=pos-1;
              else L->end_in_tokens=pos;
              // BUG: qd changement de buffer, penser au cas start dans ancien buffer et end dans nouveau
              scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p);
              //L->end=old;
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MOT_LN5))) {
              // case of transition by any sequence of letters
              if (p->buffer[pos+p->current_origin]==' ' && pos+p->current_origin+1<p->text_buffer->size) {
                 pos2=pos+1;
                 if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');
              }
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  ((pos2+p->current_origin)==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) {
                     while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                           mot[position++]=p->buffer[(pos2++)+p->current_origin];
                     }
                     mot[position]='\0';
                     if (position!=0) {
                       // we proceed only if we have read a letter sequence
                       // in both modes MERGE and REPLACE, we process the transduction if any
                       traiter_transduction(p,etiq->output);
                       if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                         // if we are in MERGE mode, we add to ouput the char we have read
                         push_output_string(p->stack,mot);
                       }
                       scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                     }
              }
         }
         else if ((contenu_len_possible_match==4) && (!u_trymatch_superfast4(contenu,ETIQ_NB_LN4))) {
              // case of transition by any sequence of digits
              if (p->buffer[pos+p->current_origin]==' ') {
                 pos2=pos+1;
                 if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');
              }
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              while (pos2+p->current_origin<p->text_buffer->size && (p->buffer[pos2+p->current_origin]>='0')
                     && (p->buffer[pos2+p->current_origin]<='9')) {
                 mot[position++]=p->buffer[(pos2++)+p->current_origin];
              }
              mot[position]='\0';
              if (position!=0) {
                 // we proceed only if we have read a letter sequence
                 // in both modes MERGE and REPLACE, we process the transduction if any
                 traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                     push_output_string(p->stack,mot);
                 }
                 scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MAJ_LN5))) {
              // case of upper case letter sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  ((pos2+p->current_origin)==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) {
                 while (pos2+p->current_origin<p->text_buffer->size && is_upper(p->buffer[pos2+p->current_origin],p->alphabet)) {
                    mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                   // we proceed only if we have read an upper case letter sequence
                   // which is not followed by a lower case letter
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MIN_LN5))) {
              // case of lower case letter sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) {
                 while (pos2+p->current_origin<p->text_buffer->size && is_lower(p->buffer[pos2+p->current_origin],p->alphabet)) {
                    mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                   // we proceed only if we have read a lower case letter sequence
                   // which is not followed by an upper case letter
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_PRE_LN5))) {
              // case of a sequence beginning by an upper case letter
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  (is_upper(p->buffer[pos2+p->current_origin],p->alphabet) && (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet)))) {
                 while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                    mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                   // we proceed only if we have read a letter sequence
                   // which is not followed by a letter
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_PNC_LN5))) {
              // case of a punctuation sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar C=p->buffer[pos2+p->current_origin];
              if (C==';' || C=='!' || C=='?' ||
                  C==':' ||  C==0xbf ||
                  C==0xa1 || C==0x0e4f || C==0x0e5a ||
                  C==0x0e5b || C==0x3001 || C==0x3002 ||
                  C==0x30fb) {
                 // in both modes MERGE and REPLACE, we process the transduction if any
                 traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,C);
                 }
                 scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p);
              }
              else {
                   // we consider the case of ...
                   // BUG: if ... appears at the end of the buffer
                   if (C=='.') {
                      if ((pos2+p->current_origin+2)<p->text_buffer->size && p->buffer[pos2+p->current_origin+1]=='.' && p->buffer[pos2+p->current_origin+2]=='.') {
                         traiter_transduction(p,etiq->output);
                         if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                            // if we are in MERGE mode, we add to ouput the ... we have read
                            push(p->stack,C);push(p->stack,C);push(p->stack,C);
                         }
                         scan_graph(n_graph,t->state_number,pos2+3,depth,liste_arrivee,mot_token_buffer,p);
                      } else {
                        // we consider the . as a normal punctuation sign
                        traiter_transduction(p,etiq->output);
                        if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                          // if we are in MERGE mode, we add to ouput the char we have read
                          push(p->stack,C);
                        }
                        scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p);
                      }
                   }
              }
         }
         else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast3(contenu,ETIQ_E_LN3))) {
              // case of an empty sequence
              // in both modes MERGE and REPLACE, we process the transduction if any
              traiter_transduction(p,etiq->output);
              scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p);
         }
         else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast3(contenu,ETIQ_CIRC_LN3))) {
              // case of a new line sequence
              if (p->buffer[pos+p->current_origin]=='\n') {
                 // in both modes MERGE and REPLACE, we process the transduction if any
                 traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,'\n');
                 }
                 scan_graph(n_graph,t->state_number,pos+1,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==1) && (!u_trymatch_superfast1(contenu,'#')) && (!(etiq->control&RESPECT_CASE_TAG_BIT_MASK))) {
              // case of a no space condition
              if (p->buffer[pos+p->current_origin]!=' ') {
                // in both modes MERGE and REPLACE, we process the transduction if any
                traiter_transduction(p,etiq->output);
                scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==1) && (!u_trymatch_superfast1(contenu,' '))) {
         // case of an obligatory space
              if (p->buffer[pos+p->current_origin]==' ') {
                // in both modes MERGE and REPLACE, we process the transduction if any
                traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,' ');
                 }
                scan_graph(n_graph,t->state_number,pos+1,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast5(contenu,ETIQ_L_LN3))) {
              // case of a single letter
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              if (is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                // in both modes MERGE and REPLACE, we process the transduction if any
                traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,p->buffer[pos2+p->current_origin]);
                 }
                scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else {
              // case of a normal letter sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              if (etiq->control&RESPECT_CASE_TAG_BIT_MASK) {
                 // case of exact case match
                 int position=0;
                 while (pos2+p->current_origin<p->text_buffer->size && p->buffer[pos2+p->current_origin]==contenu[position]) {
                   pos2++; position++;
                 }
                 if (contenu[position]=='\0' && position!=0 &&
                     !(is_letter(contenu[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet))) {
                   // we proceed only if we have exactly read the contenu sequence
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,contenu,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
              else {
                 // case of variable case match
                 // the letter sequences may have been caught by the arbre_etiquette structure
                 int position=0;
                 unichar* mot=mot_token_buffer;
                 while (pos2+p->current_origin<p->text_buffer->size && is_equal_or_uppercase(contenu[position],p->buffer[pos2+p->current_origin],p->alphabet)) {
                   mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (contenu[position]=='\0' && position!=0 &&
                     !(is_letter(contenu[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet))) {
                   // we proceed only if we have exactly read the contenu sequence
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
      }
      t=t->next;
}
}