Beispiel #1
0
static PyObject *
Reader_iternext(ReaderObj *self)
{
    PyObject *fields = NULL;
    Py_UCS4 c;
    Py_ssize_t pos, linelen;
    unsigned int kind;
    void *data;
    PyObject *lineobj;

    if (parse_reset(self) < 0)
        return NULL;
    do {
        lineobj = PyIter_Next(self->input_iter);
        if (lineobj == NULL) {
            /* End of input OR exception */
            if (!PyErr_Occurred() && self->field_len != 0)
                PyErr_Format(error_obj,
                             "newline inside string");
            return NULL;
        }
        if (!PyUnicode_Check(lineobj)) {
            PyErr_Format(error_obj,
                         "iterator should return strings, "
                         "not %.200s "
                         "(did you open the file in text mode?)",
                         lineobj->ob_type->tp_name
                );
            Py_DECREF(lineobj);
            return NULL;
        }
        ++self->line_num;
        kind = PyUnicode_KIND(lineobj);
        data = PyUnicode_DATA(lineobj);
        pos = 0;
        linelen = PyUnicode_GET_LENGTH(lineobj);
        while (linelen--) {
            c = PyUnicode_READ(kind, data, pos);
            if (c == '\0') {
                Py_DECREF(lineobj);
                PyErr_Format(error_obj,
                             "line contains NULL byte");
                goto err;
            }
            if (parse_process_char(self, c) < 0) {
                Py_DECREF(lineobj);
                goto err;
            }
            pos++;
        }
        Py_DECREF(lineobj);
        if (parse_process_char(self, 0) < 0)
            goto err;
    } while (self->state != START_RECORD);

    fields = self->fields;
    self->fields = NULL;
err:
    return fields;
}
Beispiel #2
0
bool add_entry(char *lbuf, int length, struct dict_entry_t *entry)
{
	char *cp;

	if (length == 0)
		return false;

	/* allocate line buffer */
	entry->lbuf = (char *) ecalloc(1, length + 1);
	strncpy(entry->lbuf, lbuf, length);

	/* format: keyword TAB candidate1 TAB candidate2 TAB candidate3... TAB */
	if ((cp = strchr(entry->lbuf, '\t')) == NULL)
		goto error;

	*cp = '\0';
	entry->keyword = entry->lbuf;

	parse_reset(&entry->candidate);
	parse_arg(cp + 1, &entry->candidate, '\t', not_tab);

	/* illegal entry */
	if (entry->candidate.argc <= 0)
		goto error;

	return true;
error:
	free(entry->lbuf);
	return false;
}
Beispiel #3
0
static PyObject *
CSVParser_fread(CSVParser *self, size_t size)
{
    char *buf = PyMem_Malloc(sizeof(char)*size);
    if(buf==NULL){
        PyErr_NoMemory();
        return NULL;
    }
    char c;
    size_t readsize;
    PyObject *datas = PyList_New(0);
    if(self->fields==NULL){
        parse_reset(self);
    }
    while(size){
        PyFile_IncUseCount((PyFileObject*)self->pyfile);
        Py_BEGIN_ALLOW_THREADS
        readsize = fread(buf, 1, size, self->file);
        Py_END_ALLOW_THREADS
        PyFile_DecUseCount((PyFileObject*)self->pyfile);
        if(readsize==0)
            break;

        size -= readsize;
        while(readsize--) {
            logq_readc(buf)
        }
    }
err:
    return datas;
}
Beispiel #4
0
static PyObject *
CSVParser_iternext(CSVParser *self)
{
    if(!self->is_file)
        return CSVParser_iternext_filelike(self);
    char buf[MAXBUFSIZE];
    char c;
    char *cp;
    PyObject *fields = NULL;
    long i;
    long linelen;
    Logq_Engine_reset(self->engine);
    while(!self->engine->is_success){
        if (parse_reset(self) < 0)
            return NULL;
        do {
            PyFile_IncUseCount((PyFileObject*)self->pyfile);
            Py_BEGIN_ALLOW_THREADS
            cp = fgets(buf, MAXBUFSIZE, self->file);
            Py_END_ALLOW_THREADS
            PyFile_DecUseCount((PyFileObject*)self->pyfile);
            if (cp == NULL) {
                /* End of input OR exception */
                if (!PyErr_Occurred() && (self->field_len != 0 ||
                                          self->state == IN_QUOTED_FIELD)) {
                    if (parse_save_field(self) >= 0 )
                        break;
                }
                return NULL;
            }
            ++self->line_num;
            linelen = strlen(buf);
            if (buf == NULL || linelen < 0) {
                return NULL;
            }
            for(i=0; i<linelen; ++i){
                c = buf[i];
                if (c == '\0') {
                    PyErr_Format(csv_error_obj,
                                 "line contains NULL byte");
                    goto err;
                }
                if (parse_process_char(self, c) < 0) {
                    goto err;
                }
                //query fail. go next line
                if (self->state == QUERY_FAIL){
                    if(buf[linelen-1]=='\n'){
                        self->state = START_RECORD;
                    }
                    break;
                }
            }
        } while (self->state != START_RECORD);
    }
    fields = self->fields;
    self->fields = NULL;
err:
    return fields;
}
Beispiel #5
0
static PyObject *
CSVParser_iternext_filelike(CSVParser *self)
{
    char *buf;
    char c;
    PyObject *fields = NULL;
    PyObject *lineobj = NULL;
    long i;
    long linelen;
    Logq_Engine_reset(self->engine);
    while(!self->engine->is_success){
        if (parse_reset(self) < 0)
            return NULL;
        do {
            lineobj = PyIter_Next(self->pyfile);
            if (lineobj == NULL) {
                /* End of input OR exception */
                if (!PyErr_Occurred() && (self->field_len != 0 ||
                                          self->state == IN_QUOTED_FIELD)) {
                    if (parse_save_field(self) >= 0 )
                        break;
                }
                return NULL;
            }
            ++self->line_num;

            buf = PyString_AsString(lineobj);
            linelen = PyString_Size(lineobj);

            if (buf == NULL || linelen < 0) {
                return NULL;
            }
            for(i=0; i<linelen; ++i){
                c = buf[i];
                if (c == '\0') {
                    Py_DECREF(lineobj);
                    PyErr_Format(csv_error_obj,
                                 "line contains NULL byte");
                    goto err;
                }
                if (parse_process_char(self, c) < 0) {
                    Py_DECREF(lineobj);
                    goto err;
                }
                //query fail. go next line
                if (self->state == QUERY_FAIL){
                    if(buf[linelen-1]=='\n'){
                        self->state = START_RECORD;
                    }
                    break;
                }
            }
            Py_DECREF(lineobj);
        } while (self->state != START_RECORD);
    }
    fields = self->fields;
    self->fields = NULL;
err:
    return fields;
}
Beispiel #6
0
static PyObject *
Reader_iternext(ReaderObj *self)
{
        PyObject *lineobj;
        PyObject *fields = NULL;
        Py_UNICODE *line, c;
        Py_ssize_t linelen;

	if (parse_reset(self) < 0)
		return NULL;
        do {
                lineobj = PyIter_Next(self->input_iter);
                if (lineobj == NULL) {
                        /* End of input OR exception */
                        if (!PyErr_Occurred() && self->field_len != 0)
                                PyErr_Format(error_obj,
					     "newline inside string");
                        return NULL;
                }
		if (!PyUnicode_Check(lineobj)) {
			PyErr_Format(error_obj,
				     "iterator should return strings, "
				     "not %.200s "
				     "(did you open the file in text mode?)",
				     lineobj->ob_type->tp_name
				);
			Py_DECREF(lineobj);
			return NULL;
		}
                ++self->line_num;
                line = PyUnicode_AsUnicode(lineobj);
                linelen = PyUnicode_GetSize(lineobj);
                if (line == NULL || linelen < 0) {
                        Py_DECREF(lineobj);
                        return NULL;
                }
                while (linelen--) {
			c = *line++;
			if (c == '\0') {
				Py_DECREF(lineobj);
				PyErr_Format(error_obj,
					     "line contains NULL byte");
				goto err;
			}
			if (parse_process_char(self, c) < 0) {
				Py_DECREF(lineobj);
				goto err;
			}
		}
                Py_DECREF(lineobj);
		if (parse_process_char(self, 0) < 0)
			goto err;
        } while (self->state != START_RECORD);

        fields = self->fields;
        self->fields = NULL;
err:
        return fields;
}
Beispiel #7
0
static PyObject *
Reader_iternext(ReaderObj *self)
{
    PyObject *lineobj;
    PyObject *fields = NULL;
    char *line, c;
    int linelen;

    if (parse_reset(self) < 0)
        return NULL;
    do {
        lineobj = PyIter_Next(self->input_iter);
        if (lineobj == NULL) {
            /* End of input OR exception */
            if (!PyErr_Occurred() && (self->field_len != 0 ||
                                      self->state == IN_QUOTED_FIELD)) {
                if (self->dialect->strict)
                    PyErr_SetString(error_obj, "unexpected end of data");
                else if (parse_save_field(self) >= 0 )
                    break;
            }
            return NULL;
        }
        ++self->line_num;

        line = PyString_AsString(lineobj);
        linelen = PyString_Size(lineobj);

        if (line == NULL || linelen < 0) {
            Py_DECREF(lineobj);
            return NULL;
        }
        while (linelen--) {
            c = *line++;
            if (c == '\0') {
                Py_DECREF(lineobj);
                PyErr_Format(error_obj,
                             "line contains NULL byte");
                goto err;
            }
            if (parse_process_char(self, c) < 0) {
                Py_DECREF(lineobj);
                goto err;
            }
        }
        Py_DECREF(lineobj);
        if (parse_process_char(self, 0) < 0)
            goto err;
    } while (self->state != START_RECORD);

    fields = self->fields;
    self->fields = NULL;
err:
    return fields;
}
Beispiel #8
0
bool
window_part::parse(const char* str)
{
    const char* p = str;
    m_index.clear();
    parse_element(p);
    bool success = (*p == 0);
    if (!success)
        parse_reset();
    split();
    return success;
}
Beispiel #9
0
static PyObject *
Reader_iternext(ReaderObj *self)
{
        PyObject *lineobj;
        PyObject *fields = NULL;
        char *line, c;
	int linelen;

	if (parse_reset(self) < 0)
		return NULL;
        do {
                lineobj = PyIter_Next(self->input_iter);
                if (lineobj == NULL) {
                        /* End of input OR exception */
                        if (!PyErr_Occurred() && self->field_len != 0)
                                PyErr_Format(error_obj,
					     "newline inside string");
                        return NULL;
                }
		++self->line_num;

                line = PyString_AsString(lineobj);
		linelen = PyString_Size(lineobj);

                if (line == NULL || linelen < 0) {
                        Py_DECREF(lineobj);
                        return NULL;
                }
                while (linelen--) {
			c = *line++;
			if (c == '\0') {
				Py_DECREF(lineobj);
				PyErr_Format(error_obj,
					     "line contains NULL byte");
				goto err;
			}
			if (parse_process_char(self, c) < 0) {
				Py_DECREF(lineobj);
				goto err;
			}
		}
                Py_DECREF(lineobj);
		if (parse_process_char(self, 0) < 0)
			goto err;
        } while (self->state != START_RECORD);

        fields = self->fields;
        self->fields = NULL;
err:
        return fields;
}
Beispiel #10
0
static PyObject *
csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
{
    PyObject * iterator, * dialect = NULL;
    ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);

    if (!self)
        return NULL;

    self->dialect = NULL;
    self->fields = NULL;
    self->input_iter = NULL;
    self->field = NULL;
    self->field_size = 0;
    self->line_num = 0;

    if (parse_reset(self) < 0) {
        Py_DECREF(self);
        return NULL;
    }

    if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
        Py_DECREF(self);
        return NULL;
    }
    self->input_iter = PyObject_GetIter(iterator);
    if (self->input_iter == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "argument 1 must be an iterator");
        Py_DECREF(self);
        return NULL;
    }
    self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
    if (self->dialect == NULL) {
        Py_DECREF(self);
        return NULL;
    }

    PyObject_GC_Track(self);
    return (PyObject *)self;
}
Beispiel #11
0
window_part::window_part()
{
    parse_reset();
    split();
}
Beispiel #12
0
int assemble(struct membuf *source, struct membuf *dest)
{
    struct vec guesses_history[1];
    struct map guesses_storage[1];
    int dest_pos;
    int result;

    dump_sym_table(LOG_DEBUG, s->initial_symbols);

    vec_init(guesses_history, sizeof(struct map));
    s->guesses = NULL;
    dest_pos = membuf_memlen(dest);
    for(;;)
    {

        map_put_all(s->sym_table, s->initial_symbols);
        named_buffer_copy(s->named_buffer, s->initial_named_buffer);
        map_init(guesses_storage);

        if(s->guesses != NULL)
        {
            /* copy updated guesses from latest pass */
            map_put_all(guesses_storage, s->guesses);
        }
        s->guesses = guesses_storage;

        result = assembleSinglePass(source, dest);
        if(result != 0)
        {
            /* the assemble pass failed */
            break;
        }

        /* check if any guessed symbols was wrong and update them
         * to their actual value */
        if(wasFinalPass())
        {
            /* The assemble pass succeeded without any wrong guesses,
             * we're done */
            break;
        }
        if(loopDetect(guesses_history))
        {
            /* More passes would only get us into a loop */
            LOG(LOG_VERBOSE, ("Aborting due to loop.\n"));
            result = -1;
            break;
        }

        LOG(LOG_VERBOSE, ("Trying another pass.\n"));

        /* allocate storage for the guesses in the history vector */
        s->guesses = vec_push(guesses_history, s->guesses);

        parse_reset();
        membuf_truncate(dest, dest_pos);
    }
    map_free(guesses_storage);
    vec_free(guesses_history, (cb_free*)map_free);
    s->guesses = NULL;
    return result;
}
Beispiel #13
0
static PyObject *
CSVParser_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
    Engine *engine;
    PyObject *pyfile  = NULL;
    PyObject *map = NULL;
    ColMap *colmap = NULL;
    char delimiter = ',';
    char quotechar = '"';
    CSVParser * self = PyObject_GC_New(CSVParser, &CSVParser_Type);
    if (self == NULL){
        return NULL;
    }
    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
                                     "O!OO|cc", parser_kws,
                                     &Logq_Engine_Type, &engine,
                                     &pyfile,
                                     &map,
                                     &delimiter, &quotechar)){
        return NULL;
    }
    Py_INCREF(pyfile);
    Py_INCREF(engine);
    self->engine = engine;
    if(PyFile_Check(pyfile)){
        self->file   = PyFile_AsFile(pyfile);
        self->is_file = 1;
        self->pyfile = pyfile;
    }else{
        self->file    = NULL;
        self->is_file = 0;
        self->pyfile  = PyObject_GetIter(pyfile);
        if (self->pyfile == NULL) {
            PyErr_SetString(PyExc_TypeError,
                            "argument 1 must be an iterator");
            return NULL;
        }
    }
    if(!PySequence_Check(map)){
        PyErr_SetString(PyExc_TypeError,
                        "argument 2 must be a list");
        return NULL;
    }
    colmap = ColMap_new(map);
    if (colmap == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "argument 2 must be a list");
        return NULL;
    }
    self->colmap = colmap;
    self->fields = NULL;
    self->field  = NULL;
    self->field_size = 0;
    self->line_num   = 0;
    self->delimiter = delimiter;
    self->quotechar = quotechar;
    if (parse_reset(self) < 0) {
        Py_DECREF(pyfile);
        Py_DECREF(engine);
        Py_DECREF(self);
        return NULL;
    }
    PyObject_GC_Track(self);
    return (PyObject *)self;
}