Пример #1
0
/**
 * This function reads one concordance line from 'f', and splits its
 * components into 'left', 'middle' and 'right'.
 *
 * IMPORTANT: in order to fix a bug, we may have to reorder some matches. However, we can't
 *            easily reorder lines in concordance files. So, we indicate the length of the
 *            expected match instead of relying on the length of the match that was actually read.
 *            This is why there is the (++total>=60) hack.
 */
void read_concordance_line(U_FILE* f,unichar* left,unichar* middle,unichar* right,unichar* indices,int
        expected_match_length) {
int i,c;
i=0;
while ((c=u_fgetc(f))!='\t') {
   left[i++]=(unichar)c;
}
left[i]='\0';
int total=i+expected_match_length;
i=0;
while ((c=u_fgetc(f))!='\t') {
   middle[i++]=(unichar)c;
}
middle[i]='\0';
i=0;
while ((c=u_fgetc(f))!='\t') {
    if (++total>=60) {
        c='\0';
    }
   right[i++]=(unichar)c;
}
right[i]='\0';
i=0;
while ((c=u_fgetc(f))!='\n') {
   indices[i++]=(unichar)c;
}
indices[i]='\0';
}
Пример #2
0
void protect_special_characters(const char *text,Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){

	U_FILE *source;
	U_FILE *destination;

	//fprintf(stdout,"protect special character\n");

	char temp_name_file[FILENAME_MAX];
	char path[FILENAME_MAX];
	get_path(text,path);
	sprintf(temp_name_file,"%stemp",path);


	source = u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input, text,U_READ);
	if( source == NULL){
		perror("u_fopen\n");
		fprintf(stderr,"Cannot open file %s\n",text);
		exit(1);
	}

	destination = u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,temp_name_file,U_WRITE);
	if( destination == NULL){
		perror("u_fopen\n");
		fprintf(stderr,"Cannot open file %s\n",temp_name_file);
		exit(1);
	}

	int a;
	a = u_fgetc(source);
	while(a!=EOF){
		u_fputc((unichar)a,destination);
		if(a=='{'){
			//fprintf(stdout,"opening bracket found\n");


			unichar *bracket_string = get_braced_string(source);
			unichar *protected_bracket_string = protect_braced_string(bracket_string);
			//u_fprints(protected_bracket_string,destination);
			u_fprintf(destination,"%S",protected_bracket_string);
			//u_printf("%S --- ",bracket_string);
			//u_printf("%S\n",protected_bracket_string);
			free(bracket_string);
			free(protected_bracket_string);
		}

		a = u_fgetc(source);
	}

	u_fclose(source);
	u_fclose(destination);

	copy_file(text,temp_name_file);

	// should delete the 'temp' file
}
Пример #3
0
/**
 * Reads and processes a line of the Thai text file.
 */
int read_line_thai(struct sort_infos* inf) {
  unichar line[LINE_LENGTH];
  unichar thai_line[LINE_LENGTH];
  int c;
  int ret = 1;
  int i = 0;
  while ((c = u_fgetc(inf->f)) != '\n' && c != EOF && i < LINE_LENGTH) {
    line[i++] = (unichar) c;
  }
  line[i] = '\0';
  if (c == EOF)
    ret = 0;
  else
    (inf->number_of_lines)++;
  if (i == 0) {
    /* We ignore the empty line */
    return ret;
  }
  if (i == LINE_LENGTH) {
    error("Line %d: line too long\n", inf->number_of_lines);
    return ret;
  }
  convert_thai(line, thai_line);
  get_node_thai(thai_line, 0, inf->root, line, inf);
  return ret;
}
Пример #4
0
/**
 * Reads and processes a line of the text file.
 * Returns 0 if the end of file has been reached; 1 otherwise.
 */
int read_line(struct sort_infos* inf) {
  unichar line[LINE_LENGTH];
  int c;
  int ret = 1;
  int i = 0;
  while ((c = u_fgetc(inf->f)) != '\n' && c != EOF && i < LINE_LENGTH) {
    line[i++] = (unichar) c;
  }
  line[i] = '\0';
  if (c == EOF)
    ret = 0;
  else
    (inf->number_of_lines)++;
  if (i == 0) {
    /* We ignore the empty line */
    return ret;
  }
  if (i == LINE_LENGTH) {
    /* Too long lines are not taken into account */
    error("Line %d: line too long\n", inf->number_of_lines);
    return ret;
  }
  get_node(line, 0, inf->root, inf);
  return ret;
}
Пример #5
0
/**
 * This function reads the given char order file.
 */
void read_char_order(const VersatileEncodingConfig* vec, const char* name,
    struct sort_infos* inf) {
  int c;
  int current_line = 1;
  U_FILE* f = u_fopen(vec, name, U_READ);
  if (f == NULL) {
    error("Cannot open file %s\n", name);
    return;
  }
  unichar current_canonical = '\0';
  int current_priority = 0;
  while ((c = u_fgetc(f)) != EOF) {
    if (c != '\n') {
      /* we ignore the \n char */
      if (inf->class_numbers[(unichar) c] != 0) {
        error("Error in %s: char 0x%x appears several times\n", name, c);
      } else {
        inf->class_numbers[(unichar) c] = current_line;
        if (current_canonical == '\0') {
          current_canonical = (unichar) c;
        }
        inf->canonical[(unichar) c] = current_canonical;
        inf->priority[(unichar) c] = ++current_priority;
      }
    } else {
      current_line++;
      current_canonical = '\0';
      current_priority = 0;
    }
  }
  u_fclose(f);
}
Пример #6
0
/**
 * Loads a match list. Match lists are supposed to have been
 * generated by the Locate program.
 */
struct match_list* load_match_list(U_FILE* f,OutputPolicy *output_policy,unichar *header,Abstract_allocator prv_alloc) {
struct match_list* l=NULL;
struct match_list* end_of_list=NULL;
int start,end,start_char,end_char,start_letter,end_letter;
Ustring* line=new_Ustring();
char is_an_output;
/* We read the header */
unichar foo=0;
if (header==NULL) {
  header=&foo;
}
u_fscanf(f,"#%C\n",header);
OutputPolicy policy;
switch(*header) {
   case 'D': {
     policy=DEBUG_OUTPUTS;
     /* In debug mode, we have to skip the debug header */
     int n_graphs;
     u_fscanf(f,"%d\n",&n_graphs);
     while ((n_graphs--)>-1) {
       /* -1, because we also have to skip the #[IMR] line */
       readline(line,f);
     }
     break;
   }
   case 'M': policy=MERGE_OUTPUTS; break;
   case 'R':
   case 'T':
   case 'X': policy=REPLACE_OUTPUTS; break;
   case 'I':
   default: policy=IGNORE_OUTPUTS; break;
}
if (output_policy!=NULL) {
   (*output_policy)=policy;
}
while (6==u_fscanf(f,"%d.%d.%d %d.%d.%d",&start,&start_char,&start_letter,&end,&end_char,&end_letter)) {
   /* We look if there is an output or not, i.e. a space or a new line */
   int c=u_fgetc(f);
   if (c==' ') {
      /* If we have an output to read */
    readline(line,f);
    /* In debug mode, we have to stop at the char #1 */
      int i=-1;
      while (line->str[++i]!=1 && line->str[i]!='\0') {
    }
      line->str[i]='\0';
   }
   is_an_output=(policy!=IGNORE_OUTPUTS);
   if (l==NULL) {
      l=new_match(start,end,start_char,end_char,start_letter,end_letter,is_an_output?line->str:NULL,-1,NULL,prv_alloc);
      end_of_list=l;
   } else {
      end_of_list->next=new_match(start,end,start_char,end_char,start_letter,end_letter,is_an_output?line->str:NULL,-1,NULL,prv_alloc);
      end_of_list=end_of_list->next;
   }
}
free_Ustring(line);
return l;
}
Пример #7
0
/**
 * \brief \b fgets working with \b U_FILE and storing \b char
 *
 * Needed to process configuration file
 *
 * @param[out] line the text read
 * @param[in] n max number of character read
 * @param[in] u file descriptor
 *
 * @return NULL if no character has been read before \c EOF has been encountered, \c line otherwise
 */
char *cassys_fgets(char *line, int n, U_FILE *u) {
	int i = 0;
	int c;

	c = u_fgetc(u);
	if (c == EOF) {
		return NULL;
	}
	while (c != EOF && c != '\n' && i < n) {
		line[i] = (char) c;
		c=u_fgetc(u);
		i++;
	}
	line[i] = '\0';
	//fprintf(stdout, "fgets result =%s\n",line);
	return line;
}
Пример #8
0
static int32_t
u_scanf_simple_percent_handler(UFILE        *input,
                               u_scanf_spec_info *info,
                               ufmt_args    *args,
                               const UChar  *fmt,
                               int32_t      *fmtConsumed,
                               int32_t      *argConverted)
{
    /* make sure the next character in the input is a percent */
    *argConverted = 0;
    if(u_fgetc(input) != 0x0025) {
        *argConverted = -1;
    }
    return 1;
}
Пример #9
0
/**
 * Loads an alphabet file and returns the associated 'Alphabet*' structure.
 * If 'korean' is non null, we compute the equivalences between Chinese and Hangul
 * characters.
 */
Alphabet* load_alphabet(const VersatileEncodingConfig* vec,const char* filename,int korean) {
void* a=get_persistent_structure(filename);
if (a!=NULL) {
	return (Alphabet*)a;
}
U_FILE* f;
f=u_fopen(vec,filename,U_READ);
if (f==NULL) {
   return NULL;
}
Alphabet* alphabet=new_alphabet(korean);
int c;
unichar lower,upper;
while ((c=u_fgetc(f))!=EOF) {
      upper=(unichar)c;
      if (upper=='\n') {
    	  /* We skip empty lines */
    	  continue;
      }
      if (upper=='#') {
         // we are in the case of an interval #AZ -> [A..Z]
         lower=(unichar)u_fgetc(f);
         upper=(unichar)u_fgetc(f);
         if (lower>upper) {
            error("Error in alphabet file: for an interval like #AZ, A must be before Z\n");
            free_alphabet(alphabet);
            u_fclose(f);
            return NULL;
         }
         for (c=lower;c<=upper;c++) {
		   SET_CASE_FLAG_MACRO(c,alphabet,1|2);
           add_letter_equivalence(alphabet,(unichar)c,(unichar)c);
         }
         u_fgetc(f); // reading the \n
      }
      else {
		SET_CASE_FLAG_MACRO(upper,alphabet,1);
        lower=(unichar)u_fgetc(f);
        if (lower!='\n') {
          SET_CASE_FLAG_MACRO(lower,alphabet,2);
          u_fgetc(f); // reading the \n
          add_letter_equivalence(alphabet,lower,upper);
        }
        else {
          // we are in the case of a single (no min/maj distinction like in thai)
          SET_CASE_FLAG_MACRO(upper,alphabet,2);
          add_letter_equivalence(alphabet,upper,upper);
        }
      }
}
u_fclose(f);
return alphabet;
}
Пример #10
0
unichar *get_braced_string(U_FILE *u){

	//u_printf("get_braced string = ");
	int brace_level = 0; // already one brace opened

	long origin_position = ftell(u);
	if (origin_position == -1) {
		perror("ftell\n");
		fatal_error("ftell");
	}

	int length = 0;
	int a = u_fgetc(u);
	bool protected_char = false;
	while (a != EOF) {
		//u_printf("%C",(unichar)a);
		unichar c = (unichar)a;
		if (protected_char) {
			protected_char = false;
		} else {
			if (c == '\\') {
				protected_char = true;
			} else {
				if (c == '}') {
					if (brace_level == 0) {
						break;
					}
					else {
						brace_level--;
					}
				}
				if(c=='{'){
					brace_level++;
				}

			}
		}
		length++;
		a = u_fgetc(u);
	}

	//u_printf("\n");


	if(a == EOF){
		fatal_error("Unexpected end of file");
	}

	unichar *result;
	result = (unichar*)malloc(sizeof(unichar)*(length+1));
	if(result == NULL){
		perror("malloc\n");
		fprintf(stderr,"Impossible to allocate memory\n");
		exit(1);
	}

	int fseek_result = fseek(u,origin_position,SEEK_SET);
	if(fseek_result==-1){
		perror("fseek");
		fatal_error("fseek");
	}

	for (int i = 0; i < length; ++i) {
		result[i]=(unichar)u_fgetc(u);
	}
	result[length]='\0';

	return result;
}
Пример #11
0
void char_by_char_tokenization(U_FILE* f,U_FILE* coded_text,U_FILE* output,Alphabet* alph,
                               vector_ptr* tokens,struct hash_table* hashtable,
                               vector_int* n_occur,vector_int* n_enter_pos,
                               int *SENTENCES,int *TOKENS_TOTAL,int *WORDS_TOTAL,
                               int *DIGITS_TOTAL) {
int c;
unichar s[MAX_TAG_LENGTH];
int n;
char ENTER;
int COUNT=0;
int current_megabyte=0;
c=u_fgetc(f);
while (c!=EOF) {
   COUNT++;
   if ((COUNT/(1024*512))!=current_megabyte) {
      current_megabyte++;
      u_printf("%d megabytes read...         \r",(COUNT/(1024*512)));
   }
   if (c==' ' || c==0x0d || c==0x0a) {
      ENTER=0;
      if (c=='\n') {
         ENTER=1;
      }
      // if the char is a separator, we jump all the separators
      while ((c=u_fgetc(f))==' ' || c==0x0d || c==0x0a) {
         if (c=='\n') ENTER=1;
         COUNT++;
      }
      s[0]=' ';
      s[1]='\0';
      n=get_token_number(s,tokens,hashtable,n_occur);
      /* If there is a \n, we note it */
      if (ENTER==1) {
         vector_int_add(n_enter_pos,*TOKENS_TOTAL);
      }
      (*TOKENS_TOTAL)++;
      fwrite(&n,4,1,coded_text);
   }
   else if (c=='{') {
     s[0]='{';
     int z=1;
     while (z<(MAX_TAG_LENGTH-1) && (c=u_fgetc(f))!='}' && c!='{' && c!='\n') {
        s[z++]=(unichar)c;
        COUNT++;
     }
     if (c=='\n') {
        // if the tag contains a return
        fatal_error("Error: a tag containing a new-line sequence has been found\n");
     }
     if (z==(MAX_TAG_LENGTH-1) || c!='}') {
        // if the tag has no ending }
        if (z==(MAX_TAG_LENGTH-1)) {z--;}
        s[z]='\0';
        fatal_error("Error: a tag without ending } has been found:\n==>%S<==\n",s);
     }
     s[z]='}';
     s[z+1]='\0';
     if (!u_strcmp(s,"{S}")) {
        // if we have found a sentence delimiter
        (*SENTENCES)++;
     } else {
        if (u_strcmp(s,"{STOP}") && !check_tag_token(s)) {
           // if a tag is incorrect, we exit
           fatal_error("The text contains an invalid tag. Unitex cannot process it.");
        }
     }
     n=get_token_number(s,tokens,hashtable,n_occur);
     (*TOKENS_TOTAL)++;
     fwrite(&n,4,1,coded_text);
     c=u_fgetc(f);
   }
   else {
      s[0]=(unichar)c;
      s[1]='\0';
      n=get_token_number(s,tokens,hashtable,n_occur);
      (*TOKENS_TOTAL)++;
      if (is_letter((unichar)c,alph)) (*WORDS_TOTAL)++;
      else if (c>='0' && c<='9') (*DIGITS_TOTAL)++;
      fwrite(&n,4,1,coded_text);
      c=u_fgetc(f);
   }
}
for (n=0;n<tokens->nbelems;n++) {
   u_fprintf(output,"%S\n",tokens->tab[n],output);
}
}
Пример #12
0
int xmlize(const VersatileEncodingConfig* vec,const char* fin,const char* fout,int ouput_style) {
	U_FILE* input = u_fopen(vec, fin, U_READ);
	if (input == NULL) {
    error("Input file '%s' not found!\n", fin);
    return DEFAULT_ERROR_CODE;
  }  

	U_FILE* output = u_fopen(UTF8, fout, U_WRITE);
	if (output == NULL) {
    error("Cannot open output file '%s'!\n", fout);
		u_fclose(input);
    return DEFAULT_ERROR_CODE;
	} else // FIXME(johndoe) put breaks

	if(ouput_style==XML) {
	   u_fprintf(output, xml_open);
	}
	else {
	   u_fprintf(output, tei_open);
	}

  int sentence_count = 1;
  int sentence_count_relative = 1;
  int paragraph_count = 1;

	u_fprintf(output, "<p><s id=\"n%d\" xml:id=\"d1p%ds%d\">",sentence_count++,paragraph_count,sentence_count_relative++);

	int current_state = 0;
	unichar c;
	int i;
	while ((i = u_fgetc(input)) != EOF) {
		c = (unichar)i;
		switch (current_state) {
			case 0: {
				if ( c == '{') current_state = 1;
				else if(c == '&') u_fprintf(output, "&amp;");
				else if(c == '<') u_fprintf(output, "&lt;");
				else if(c == '>') u_fprintf(output, "&gt;");
				else u_fputc(c, output);
				break;
			}
			case 1: {
				if (c == 'S') current_state = 2;
				else {
					u_fputc('{', output);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
			case 2: {
				if (c == '}') current_state = 3;
				else {
					u_fputc('{', output);
					u_fputc('S', output);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
			case 3: {
				if (c == '{') current_state = 4;
				else if (c == '\n' || c == ' ' || c == '\t') {
					u_fputc(c, output);
					current_state = 3;
				}
				else {
					u_fprintf(output, "</s><s id=\"n%d\" xml:id=\"d1p%ds%d\">",sentence_count++,paragraph_count,sentence_count_relative++);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
			case 4: {
				if (c == 'S') current_state = 7;
				else if (c == 'P') current_state = 5;
				else {
					u_fputc('{', output);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
			case 5: {
				if (c == '}') {
					u_fprintf(output, "</s></p>\n");
					paragraph_count++;
					sentence_count_relative=1;
					current_state = 6;
				} else {
					u_fputc('{', output);
					u_fputc('P', output);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
			case 6: {
				if (c == '\n' || c == ' ' || c == '\t') u_fputc(c, output);
				else {
					u_fprintf(output, "<p><s id=\"n%d\" xml:id=\"d1p%ds%d\">",sentence_count++,paragraph_count,sentence_count_relative++);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
			case 7: {
				if (c == '}') {
					current_state = 3;
				}
				else {
					u_fputc('{', output);
					u_fputc('S', output);
					u_fputc(c, output);
					current_state = 0;
				}
				break;
			}
		}
	}

	if (current_state == 3) {
		//...
	} else if (current_state == 6) {
		//...
	} else {
		u_fprintf(output, "</s></p>\n");
	}

	if(ouput_style==XML) {
	   u_fprintf(output, xml_close);
	}
	else {
	   u_fprintf(output, tei_close);
	}

	u_fclose(input);
	u_fclose(output);
	u_printf("Done.\n");
  return SUCCESS_RETURN_CODE;
}
Пример #13
0
int tei2txt(char *fin, char *fout, const VersatileEncodingConfig* vec) {
    void* html_ctx = init_HTML_character_context();
    if (html_ctx == NULL) {
    alloc_error("tei2txt");
    return ALLOC_ERROR_CODE;
  }

    U_FILE* input = u_fopen(vec, fin, U_READ);
    if (input == NULL) {
    error("Input file '%s' not found!\n", fin);
    free_HTML_character_context(html_ctx);
    return DEFAULT_ERROR_CODE;
  }

    U_FILE* output = u_fopen(vec, fout, U_WRITE);
    if (output == NULL) {
    error("Cannot open output file '%s'!\n", fout);
    u_fclose(input);
    free_HTML_character_context(html_ctx);
    return DEFAULT_ERROR_CODE;
    }

    unichar buffer[5000];

    int i, j, k;
    unichar c;
    if((i = u_fgetc(input)) != EOF) {
        c = (unichar)i;

        for (;;) {
            while(c != '<' && (i = u_fgetc(input)) != EOF) {
                c = (unichar)i;
      }

            j = 0;
            while((i = u_fgetc(input)) != EOF && (c = (unichar)i) != ' '
               && (c = (unichar)i) != '\t' && (c = (unichar)i) != '\n'
               && (c = (unichar)i) != '>') {
                buffer[j++] = c;
            }
            buffer[j] = '\0';
         if (c!='>') {
            /* We do this because we can find <body ...> */
            while((i = u_fgetc(input)) != EOF && (c = (unichar)i) != '>') {}
         }
            //u_printf("Current tag : <%S>\n", buffer);

            if(!u_strcmp(buffer, body)) {
        break;
      } else {
        buffer[0] = '\0';
      }
        }
    } else {
    error("Empty TEI file %s\n", fin);
  }

    char schars[11];

  int first_sentence=1;
    int current_state = 0;
  int inside_sentence=0;
    while ((i = u_fgetc(input)) != EOF) {
        c = (unichar)i;
        switch (current_state) {
            case 0: {
                if(c == '<') {
               current_state = 1;
               inside_sentence=0;
        } else if(c == '&') {
          current_state = 3;
        } else if (inside_sentence) {
          u_fputc(c, output);
        }
                break;
            }
            case 1: {
                if(c == 's' || c == 'S') {
          current_state = 2;
                } else {
                    while((i = u_fgetc(input)) != EOF) {
                        c = (unichar)i;
                        if(c == '>') {
              break;
            }
                    }
                    current_state = 0;
                }
                break;
            }
            case 2: {
                if(c == ' ' || c == '>') {
          current_state = 0;
          inside_sentence=1;
          if (!first_sentence) {
             /* We put a {STOP} tag in order to avoid matches that overlap 2 sentences */
             u_fprintf(output,"\n{STOP}{S}");
          } else {
             first_sentence=0;
          }
                }
                if(c != '>') {
                    while((i = u_fgetc(input)) != EOF) {
                        c = (unichar)i;
                        if(c == '>') {
              break;
            }
                    }
                }
                break;
            }
            case 3: {
                j = 0;
                while(c != ';' && (i = u_fgetc(input)) != EOF) {
                    //u_printf("Current S-character: %C\n", c);
                    schars[j++] = (char)c;
                    c = (unichar)i;
                }
                schars[j] = '\0';
                //u_printf("Current S-chain: %S\n", schars);

                k = get_HTML_character(html_ctx,schars, 1);
                switch (k) {
                    case UNKNOWN_CHARACTER: {
                        u_fputc('?', output);
                        break;
                    }
                    case MALFORMED_HTML_CODE: {
                        error("Malformed HTML character declaration &%s;\n", schars);
                        u_fputc('?', output);
                        break;
                    }
                    default: {
                        c = (unichar)k;
                        u_fputc(c, output);
                        break;
                    }
                }

                schars[0] = '\0';
                current_state = 0;
                break;
            }
        }
    }

    u_fclose(output);
    u_fclose(input);
  free_HTML_character_context(html_ctx);
    u_printf("Done.\n");

  return SUCCESS_RETURN_CODE;
}
Пример #14
0
U_CFUNC int32_t
u_scanf_parse(UFILE     *f,
            const UChar *patternSpecification,
            va_list     ap)
{
    const UChar     *alias;
    int32_t         count, converted, argConsumed, cpConsumed;
    uint16_t        handlerNum;

    ufmt_args       args;
    u_scanf_spec    spec;
    ufmt_type_info  info;
    u_scanf_handler handler;

    /* alias the pattern */
    alias = patternSpecification;

    /* haven't converted anything yet */
    argConsumed = 0;
    converted = 0;
    cpConsumed = 0;

    /* iterate through the pattern */
    for(;;) {

        /* match any characters up to the next '%' */
        while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
            alias++;
        }

        /* if we aren't at a '%', or if we're at end of string, break*/
        if(*alias != UP_PERCENT || *alias == 0x0000)
            break;

        /* parse the specifier */
        count = u_scanf_parse_spec(alias, &spec);

        /* update the pointer in pattern */
        alias += count;

        handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
        if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
            /* skip the argument, if necessary */
            /* query the info function for argument information */
            info = g_u_scanf_infos[ handlerNum ].info;
            if (info != ufmt_count && u_feof(f)) {
                break;
            }
            else if(spec.fInfo.fSkipArg) {
                args.ptrValue = NULL;
            }
            else {
                switch(info) {
                case ufmt_count:
                    /* set the spec's width to the # of items converted */
                    spec.fInfo.fWidth = cpConsumed;
                    U_FALLTHROUGH;
                case ufmt_char:
                case ufmt_uchar:
                case ufmt_int:
                case ufmt_string:
                case ufmt_ustring:
                case ufmt_pointer:
                case ufmt_float:
                case ufmt_double:
                    args.ptrValue = va_arg(ap, void*);
                    break;

                default:
                    /* else args is ignored */
                    args.ptrValue = NULL;
                    break;
                }
            }

            /* call the handler function */
            handler = g_u_scanf_infos[ handlerNum ].handler;
            if(handler != 0) {

                /* reset count to 1 so that += for alias works. */
                count = 1;

                cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);

                /* if the handler encountered an error condition, break */
                if(argConsumed < 0) {
                    converted = -1;
                    break;
                }

                /* add to the # of items converted */
                converted += argConsumed;

                /* update the pointer in pattern */
                alias += count-1;
            }
            /* else do nothing */
        }
        /* else do nothing */

        /* just ignore unknown tags */
    }