示例#1
0
文件: read_mps.c 项目: jonls/qsopt-ex
void EGLPNUM_TYPENAME_ILLmps_check_end_of_line (
	EGLPNUM_TYPENAME_ILLread_mps_state * state)
{
	if (!mps_skip_comment (state))
	{
		if (!END_LINE (state->p))
		{
			EGLPNUM_TYPENAME_ILLmps_warn (state, "Extra fields on line.");
		}
	}
}
示例#2
0
int tokenize_whitespace(parser_t *self, size_t line_limit)
{
    int i, slen, start_lines;
    char c;
    char *stream;
    char *buf = self->data + self->datapos;

    start_lines = self->lines;

    if (make_stream_space(self, self->datalen - self->datapos) < 0) {
        self->error_msg = "out of memory";
        return -1;
    }

    stream = self->stream + self->stream_len;
    slen = self->stream_len;

    TRACE(("%s\n", buf));

    for (i = self->datapos; i < self->datalen; ++i)
    {
        // Next character in file
        c = *buf++;

        TRACE(("Iter: %d Char: %c Line %d field_count %d, state %d\n",
               i, c, self->file_lines + 1, self->line_fields[self->lines],
               self->state));

        switch(self->state) {

        case EAT_WHITESPACE:
            if (!IS_WHITESPACE(c)) {
                // END_FIELD();
                self->state = START_FIELD;
                // Fall through to subsequent state
            } else {
                // if whitespace char, keep slurping
                break;
            }

        case START_RECORD:
            // start of record
            if (c == '\n') {
                // \n\r possible?
                END_LINE();
                break;
            } else if (c == '\r') {
                self->state = EAT_CRNL;
                break;
            } else if (IS_WHITESPACE(c)) {
                END_FIELD();
                self->state = EAT_WHITESPACE;
                break;
            } else {
                /* normal character - handle as START_FIELD */
                self->state = START_FIELD;
            }
            /* fallthru */
        case START_FIELD:
            /* expecting field */
            if (c == '\n') {
                END_FIELD();
                END_LINE();
                /* self->state = START_RECORD; */
            } else if (c == '\r') {
                END_FIELD();
                self->state = EAT_CRNL;
            }
            else if (c == self->quotechar &&
                     self->quoting != QUOTE_NONE) {
                /* start quoted field */
                self->state = IN_QUOTED_FIELD;
            }
            else if (c == self->escapechar) {
                /* possible escaped character */
                self->state = ESCAPED_CHAR;
            }
            /* else if (c == ' ' && self->skipinitialspace) */
            /*     /\* ignore space at start of field *\/ */
            /*     ; */
            else if (IS_WHITESPACE(c)) {
                self->state = EAT_WHITESPACE;
            }
            else if (c == self->commentchar) {
                END_FIELD();
                self->state = EAT_COMMENT;
            }
            else {
                /* begin new unquoted field */
                if (self->quoting == QUOTE_NONNUMERIC)
                    self->numeric_field = 1;

                // TRACE(("pushing %c", c));
                PUSH_CHAR(c);
                self->state = IN_FIELD;
            }
            break;

        case ESCAPED_CHAR:
            /* if (c == '\0') */
            /*  c = '\n'; */

            PUSH_CHAR(c);
            self->state = IN_FIELD;
            break;

        case IN_FIELD:
            /* in unquoted field */
            if (c == '\n') {
                END_FIELD();
                END_LINE();
                /* self->state = START_RECORD; */
            } else if (c == '\r') {
                END_FIELD();
                self->state = EAT_CRNL;
            }
            else if (c == self->escapechar) {
                /* possible escaped character */
                self->state = ESCAPED_CHAR;
            }
            else if (IS_WHITESPACE(c)) {
                // End of field. End of line not reached yet
                END_FIELD();
                self->state = EAT_WHITESPACE;
            }
            else if (c == self->commentchar) {
                END_FIELD();
                self->state = EAT_COMMENT;
            }
            else {
                /* normal character - save in field */
                PUSH_CHAR(c);
            }
            break;

        case IN_QUOTED_FIELD:
            /* in quoted field */
            if (c == self->escapechar) {
                /* Possible escape character */
                self->state = ESCAPE_IN_QUOTED_FIELD;
            }
            else if (c == self->quotechar &&
                     self->quoting != QUOTE_NONE) {
                if (self->doublequote) {
                    /* doublequote; " represented by "" */
                    self->state = QUOTE_IN_QUOTED_FIELD;
                }
                else {
                    /* end of quote part of field */
                    self->state = IN_FIELD;
                }
            }
            else {
                /* normal character - save in field */
                PUSH_CHAR(c);
            }
            break;

        case ESCAPE_IN_QUOTED_FIELD:
            /* if (c == '\0') */
            /*  c = '\n'; */

            PUSH_CHAR(c);
            self->state = IN_QUOTED_FIELD;
            break;

        case QUOTE_IN_QUOTED_FIELD:
            /* doublequote - seen a quote in an quoted field */
            if (self->quoting != QUOTE_NONE && c == self->quotechar) {
                /* save "" as " */

                PUSH_CHAR(c);
                self->state = IN_QUOTED_FIELD;
            }
            else if (IS_WHITESPACE(c)) {
                // End of field. End of line not reached yet

                END_FIELD();
                self->state = EAT_WHITESPACE;
            }
            else if (c == '\n') {
                END_FIELD();
                END_LINE();
                /* self->state = START_RECORD; */
            }
            else if (c == '\r') {
                END_FIELD();
                self->state = EAT_CRNL;
            }
            else if (!self->strict) {
                PUSH_CHAR(c);
                self->state = IN_FIELD;
            }
            else {
                self->error_msg = (char*) malloc(50);
                sprintf(self->error_msg, "'%c' expected after '%c'",
                        self->delimiter, self->quotechar);
                goto parsingerror;
            }
            break;

        case EAT_CRNL:
            if (c == '\n') {
                END_LINE();
                /* self->state = START_RECORD; */
            } else if (IS_WHITESPACE(c)){
                // Handle \r-delimited files
                END_LINE_AND_FIELD_STATE(EAT_WHITESPACE);
            } else {
                PUSH_CHAR(c);
                END_LINE_STATE(IN_FIELD);
            }
            break;

        case EAT_COMMENT:
            if (c == '\n') {
                END_LINE();
            } else if (c == '\r') {
                self->state = EAT_CRNL;
            }
            break;

        default:
            break;


        }

    }

    _TOKEN_CLEANUP();

    TRACE(("Finished tokenizing input\n"))

    return 0;

parsingerror:
    i++;
    _TOKEN_CLEANUP();

    return -1;

linelimit:
    i++;
    _TOKEN_CLEANUP();

    return 0;
}
示例#3
0
文件: read_mps.c 项目: jonls/qsopt-ex
int EGLPNUM_TYPENAME_ILLmps_next_bound (
	EGLPNUM_TYPENAME_ILLread_mps_state * state,
	EGLPNUM_TYPE * coef)
{
	int len = 0, sign = 1;
	char c, *p;

	if (!mps_skip_comment (state))
	{
		c = *state->p;
		if (c == '-')
		{
			sign = -1;
			len = 1;
		}
		else
		{
			if (c == '+')
			{
				len = 1;
			}
		}
		if (!strncasecmp (state->p + len, "INFINITY", (size_t) 8))
		{
			len += 8;
		}
		else
		{
			if (!strncasecmp (state->p + len, "INF", (size_t) 3))
			{
				len += 3;
			}
		}
		if (len > 1)
		{
			state->p += len;
			p = state->p;
			mps_skip_comment (state);
			if (!END_LINE (state->p) && p == state->p)
			{
				/* found no blanks so this INF/INFINITY is the prefix 
				 * of something else */
				state->p -= len;
				return 1;								/* no coef found */
			}
			else
			{
				if (sign == 1)
					EGLPNUM_TYPENAME_EGlpNumCopy (*coef, EGLPNUM_TYPENAME_ILL_MAXDOUBLE);
				else
					EGLPNUM_TYPENAME_EGlpNumCopy (*coef, EGLPNUM_TYPENAME_ILL_MINDOUBLE);
				state->field_num++;
				ILL_RESULT (0, "EGLPNUM_TYPENAME_ILLmps_next_bound");
			}
		}
		if (get_double (state, 0, coef))
		{
			ILL_RESULT (0, "EGLPNUM_TYPENAME_ILLmps_next_bound");
		}
		else
		{
			ILL_RESULT (1, "EGLPNUM_TYPENAME_ILLmps_next_bound");	/* no coef found */
		}
	}
	ILL_RETURN (1, "EGLPNUM_TYPENAME_ILLmps_next_bound");
}
示例#4
0
int tokenize(tokenizer_t *self, int end, int header, int num_cols)
{
    char c; // input character
    int col = 0; // current column ignoring possibly excluded columns
    tokenizer_state old_state = START_LINE; // last state the tokenizer was in before CR mode
    int parse_newline = 0; // explicit flag to treat current char as a newline
    int i = 0;
    int whitespace = 1;
    delete_data(self); // clear old reading data
    self->num_rows = 0;
    self->comment_lines_len = INITIAL_COMMENT_LEN;

    if (header)
        self->num_cols = 1; // store header output in one column
    else
        self->num_cols = num_cols;

    // Allocate memory for structures used during tokenization
    self->output_cols = (char **) malloc(self->num_cols * sizeof(char *));
    self->col_ptrs = (char **) malloc(self->num_cols * sizeof(char *));
    self->output_len = (size_t *) malloc(self->num_cols * sizeof(size_t));

    for (i = 0; i < self->num_cols; ++i)
    {
        self->output_cols[i] = (char *) calloc(1, INITIAL_COL_SIZE *
                                               sizeof(char));
        // Make each col_ptrs pointer point to the beginning of the
        // column string
        self->col_ptrs[i] = self->output_cols[i];
        self->output_len[i] = INITIAL_COL_SIZE;
    }

    if (end == 0)
        RETURN(NO_ERROR); // don't read if end == 0

    self->state = START_LINE;

    // Loop until all of self->source has been read
    while (self->source_pos < self->source_len + 1)
    {
        if (self->source_pos == self->source_len || parse_newline)
            c = '\n';
        else
            c = self->source[self->source_pos];

        if (c == '\r')
            c = '\n';

        parse_newline = 0;

        switch (self->state)
        {
        case START_LINE:
            if (c == '\n')
                break;
            else if ((c == ' ' || c == '\t') && self->strip_whitespace_lines)
                break;
            else if (self->comment != 0 && c == self->comment)
            {
                // comment line; ignore
                self->state = COMMENT;
                break;
            }
            // initialize variables for the beginning of line parsing
            col = 0;
            BEGIN_FIELD();
            // parse in mode START_FIELD

        case START_FIELD:
            // strip whitespace before field begins
            if ((c == ' ' || c == '\t') && self->strip_whitespace_fields)
                break;
            else if (!self->strip_whitespace_lines && self->comment != 0 &&
                     c == self->comment)
            {
                // comment line, not caught earlier because of no stripping
                self->state = COMMENT;
                break;
            }
            else if (c == self->delimiter) // field ends before it begins
            {
                if (col >= self->num_cols)
                    RETURN(TOO_MANY_COLS);
                END_FIELD();
                BEGIN_FIELD();
                break;
            }
            else if (c == '\n')
            {
                if (self->strip_whitespace_lines)
                {
                    // Move on if the delimiter is whitespace, e.g.
                    // '1 2 3   '->['1','2','3']
                    if (self->delimiter == ' ' || self->delimiter == '\t')
                        ;
                    // Register an empty field if non-whitespace delimiter,
                    // e.g. '1,2, '->['1','2','']
                    else
                    {
                        if (col >= self->num_cols)
                            RETURN(TOO_MANY_COLS);
                        END_FIELD();
                    }
                }

                else if (!self->strip_whitespace_lines)
                {
                    // In this case we don't want to left-strip the field,
                    // so we backtrack
                    size_t tmp = self->source_pos;
                    --self->source_pos;

                    while (self->source_pos >= 0 &&
                           self->source[self->source_pos] != self->delimiter
                           && self->source[self->source_pos] != '\n'
                           && self->source[self->source_pos] != '\r')
                    {
                        --self->source_pos;
                    }

                    // backtracked to line beginning
                    if (self->source_pos == -1
                        || self->source[self->source_pos] == '\n'
                        || self->source[self->source_pos] == '\r')
                    {
                        self->source_pos = tmp;
                    }
                    else
                    {
                        ++self->source_pos;

                        if (self->source_pos == tmp)
                            // no whitespace, just an empty field
                            ;

                        else
                            while (self->source_pos < tmp)
                            {
                                // append whitespace characters
                                PUSH(self->source[self->source_pos]);
                                ++self->source_pos;
                            }

                        if (col >= self->num_cols)
                            RETURN(TOO_MANY_COLS);
                        END_FIELD(); // whitespace counts as a field
                    }
                }

                END_LINE();
                self->state = START_LINE;
                break;
            }
            else if (c == self->quotechar) // start parsing quoted field
            {
                self->state = START_QUOTED_FIELD;
                break;
            }
            else
            {
                if (col >= self->num_cols)
                    RETURN(TOO_MANY_COLS);
                // Valid field character, parse again in FIELD mode
                self->state = FIELD;
            }

        case FIELD:
            if (self->comment != 0 && c == self->comment && whitespace && col == 0)
            {
                // No whitespace stripping, but the comment char is found
                // before any data, e.g. '  # a b c'
                self->state = COMMENT;
            }
            else if (c == self->delimiter)
            {
                // End of field, look for new field
                END_FIELD();
                BEGIN_FIELD();
            }
            else if (c == '\n')
            {
                // Line ending, stop parsing both field and line
                END_FIELD();
                END_LINE();
                self->state = START_LINE;
            }
            else
            {
                if (c != ' ' && c != '\t')
                    whitespace = 0; // field is not all whitespace
                PUSH(c);
            }
            break;

        case START_QUOTED_FIELD:
            if ((c == ' ' || c == '\t') && self->strip_whitespace_fields)
            {
                // ignore initial whitespace
                break;
            }
            else if (c == self->quotechar)
            {
                // Lookahead check for double quote inside quoted field,
                // e.g. """cd" => "cd
                if (self->source_pos < self->source_len - 1)
                {
                    if (self->source[self->source_pos + 1] == self->quotechar)
                    {
                        self->state = QUOTED_FIELD_DOUBLE_QUOTE;
                        PUSH(c);
                        break;
                    }
                }
                // Parse rest of field normally, e.g. ""c
                self->state = FIELD;
            }
            else
            {
                // Valid field character, parse again in QUOTED_FIELD mode
                self->state = QUOTED_FIELD;
            }

        case QUOTED_FIELD_NEWLINE:
            if (self->state == QUOTED_FIELD)
                ; // fall through
            // Ignore initial whitespace if strip_whitespace_lines and
            // newlines regardless
            else if (((c == ' ' || c == '\t') && self->strip_whitespace_lines)
                     || c == '\n')
                break;
            else if (c == self->quotechar)
            {
                self->state = FIELD;
                break;
            }
            else
            {
                // Once data begins, parse it as a normal quoted field
                self->state = QUOTED_FIELD;
            }

        case QUOTED_FIELD:
            if (c == self->quotechar)
            {
                // Lookahead check for double quote inside quoted field,
                // e.g. "ab""cd" => ab"cd
                if (self->source_pos < self->source_len - 1)
                {
                    if (self->source[self->source_pos + 1] == self->quotechar)
                    {
                        self->state = QUOTED_FIELD_DOUBLE_QUOTE;
                        PUSH(c);
                        break;
                    }
                }
                // Parse rest of field normally, e.g. "ab"c
                self->state = FIELD;
            }
            else if (c == '\n')
                self->state = QUOTED_FIELD_NEWLINE;
            else
            {
                PUSH(c);
            }
            break;

        case QUOTED_FIELD_DOUBLE_QUOTE:
            // Ignore the second double quote from "ab""cd" and parse rest of
            // field normally as quoted field.
            self->state = QUOTED_FIELD;
            break;

        case COMMENT:
            if (c == '\n')
            {
                self->state = START_LINE;
                if (!header)
                    end_comment(self);
            }
            else if (!header)
                push_comment(self, c);
            break; // keep looping until we find a newline

        }

        ++self->source_pos;
    }

    RETURN(0);
}