/** * \brief Clear the whitespace at the begining and end of string sexpr. * \param sexpr The string. */ void clearwhitespace(string& sexpr) { if (sexpr.size() <= 0) { return; } int leftvalidpos; int rightvalidpos; int length = sexpr.size(); int i; // most left non-whitespace position for (i = 0; i < length; ++i) { if (iswhitespace(sexpr[i])) { continue; } else { leftvalidpos = i; break; } } // most right non-whitespace position for (i = length - 1; i >= 0; i --) { if (iswhitespace(sexpr[i])) { continue; } else { rightvalidpos = i; break; } } // delete the white space at the beginning and end if ( i == -1 ) sexpr = ""; else sexpr = sexpr.substr(leftvalidpos, rightvalidpos - leftvalidpos + 1); }
void CommandLineParser_ExpandCommandLine() { int i ; for(i = 0; i < CommandLineParser_iNoOfCommandLineEntries; i++) { char* par = CommandLineParser_szCommandLineEntries[i] ; if(strlen(par) > 1) { if(par[0] == '$' && !iswhitespace(par[1])) { char temp[COMMAND_LINE_SIZE] ; int j ; for(j = 0; par[j] != '\0' && !iswhitespace(par[j]) && par[j] != '/'; j++) ; strcpy(temp, par + j) ; par[j] = '\0' ; char* val = getenv(par + 1) ; if(val == NULL) strcpy(par, "") ; else strcpy(par, val) ; strcat(par, temp) ; } } } }
int process(char *x){ int c; if(x == NULL){ return -1; } if(strncmp(x, "<Delay>", 7) == 0){ //HACK FOR NOW return 2; } if(x[0] != ';'){ return 0; } if(strncmp(x, ";<Slice>", 8) == 0){ return 1; }else if(strncmp(x, ";<Delay>", 8) == 0){ return 2; } c=0; while(iswhitespace(x[c]) == 1){ if(x[c] == '\0' || c >= 255){ return -1; } c++; } return 0; }
std::shared_ptr<Object> LispReader::read(std::istream &in, bool eof_is_error, std::shared_ptr<Object> eof_value, bool is_recursive) { //return read(in, eof_is_error, eof_value, is_recursive); for (; ;) { int c; while (iswhitespace(c = in.get())) {} if (in.eof()) { return eof_value; } else { if (c == '+' || c == '-') { int c2 = in.peek(); if (std::isdigit(c2)) { in.unget(); return read_number(in); } } if (std::isdigit(c)) { in.unget(); return read_number(in); } macro_fn fn = getmacro(c); if (fn != 0) { return fn(in); } in.unget(); return read_token(in); throw "not yet supported"; } } return eof_value; }
//// // case_whitespace // // Returns true if any error is produced and false otherwise. Handles // the case where the current character is a whitespace. // A following null plug ends parsing. // A following non-whitepsace begins a token preceded by 0, 1, or 3. // A following whitespace is ignored. // bool case_whitespace (char *line, char *buffer, char **tokens, int *ii, int *jj, int *kk, int *iscmd, int *isarg) { // make the passed indices more accessible int i = (*ii); int j = (*jj); int k = (*kk); // a following null plug is ignored // a following meta-char is ignored // a following whitespace is ignored // a following non-whitespace begins a token (including a backslash) if (!iswhitespace (line[i+1]) && line[i+1] != '\0' && !ismetachar (line[i+1])) { if (*isarg) { buffer[j] = '3'; // a redirection argument token }else if (*iscmd) { buffer[j] = '0'; // a command token *iscmd = 0; // no longer looking for a command *isarg = 0; }else { buffer[j] = '1'; // a command argument token } tokens[k] = &buffer[j]; ++(*jj); // increment buffer index ++(*kk); // increment tokens index } return FALSE; }
void getChar() { do { look = getchar(); } while(iswhitespace(look)); }
void readsinglesymbol(string& substring, string& sexpr) { char currentchar; int i = 0; int length = substring.length(); // get the first character currentchar = substring[i]; if (currentchar == '\"') { // read a string literal sexpr += currentchar; do { ++i; currentchar = substring[i]; sexpr += currentchar; } while (currentchar != '\"' && i < length-1); if ('\"' != currentchar) { cout << "error: illegal string" << endl; exit(1); } } else { // read a numeric literal or operator do { sexpr += currentchar; ++i; if (i >= static_cast<int>(substring.size())) { break; } currentchar = substring[i]; } while ((!iswhitespace(currentchar)) && (currentchar != '(') && currentchar != '\"'); --i; } substring = substring.substr(i+1, length - i - 1); }
//// // case_word // // Returns true if any error is produced and false otherwise. Handles // the case where the current character belongs to a word. // A word character is added to the current token. // A following null plug ends the current token and parsing. // A following meta-character ends the current token. // A following whitespace ends the current token. // A following backslash is ignored. // A following word character is ignored. // bool case_word (char *line, char *buffer, char **tokens, int *ii, int *jj, int *kk, int *iscmd, int *isarg) { // make the passed indices more accessible int i = (*ii); int j = (*jj); int k = (*kk); // token is a command if (*iscmd && !(*isarg)) { buffer[j] = '0'; tokens[k] = &buffer[j]; ++(*jj); j = (*jj); ++(*kk); k = (*kk); *iscmd = 0; *isarg = 0; } // place the char buffer[j] = line[i]; ++(*jj); j = (*jj); // a following null plug will end the token // a following meta-char will end the token // a following whitespace will end the token // a following backslash is ignored // a following non-whitespace is ignored if (line[i+1] == '\0' || ismetachar (line[i+1]) || iswhitespace (line[i+1])) { buffer[j] = '\0'; ++(*jj); j = (*jj); tokens[k] = &buffer[j]; *isarg = 0; } return FALSE; }
std::list<std::shared_ptr<Object>> read_delimited_list(int delim, std::istream &in) { std::list<std::shared_ptr<Object> > list; for (; ;) { int c; while (iswhitespace(c = in.get())) {} if (in.eof()) { throw "EOF while reading"; } if (c == delim) { break; } macro_fn fn = getmacro(c); if (fn != 0) { auto m = fn(in); if (m != NOOP) { list.push_back(m); } } else { in.unget(); auto o = read(in, true, Object::nil, true); if (o != NOOP) { list.push_back(o); } } } return list; }
char * skipwhitespace(char * str) { while( iswhitespace(*str) ) str++; return str; }
static void strip(std::string & str) { size_t startpos = 0; while(startpos < str.length() && iswhitespace(str[startpos])) { startpos++; } size_t endpos = str.length(); while(endpos > startpos && iswhitespace(str[endpos - 1])) { endpos--; } if(startpos != 0) { str = str.substr(startpos, endpos - startpos); } else { str.resize(endpos); } }
int next_token(char **s) { while (iswhitespace(**s)) (*s)++; str_clear(&curtok); while (!iswhitespace(**s) && **s != '\0') { str_append(&curtok, **s); (*s)++; } if (strcmp("{%", curtok.s) == 0) token = EXP_START; else if (strcmp("%}", curtok.s) == 0) { token = EXP_END; /* swallow whitespace to not affect output */ while (isnewline(**s)) (*s)++; } else if (strcmp("{$", curtok.s) == 0) token = SH_START; else if (strcmp("$}", curtok.s) == 0) token = SH_END; else if (strcmp("{{", curtok.s) == 0) token = VAR_START; else if (strcmp("}}", curtok.s) == 0) token = VAR_END; else if (strcmp("for", curtok.s) == 0) token = FOR; else if (strcmp("in", curtok.s) == 0) token = IN; else if (strcmp("do", curtok.s) == 0) token = DO; else if (strcmp("done", curtok.s) == 0) token = DONE; else if (strcmp("include", curtok.s) == 0) token = INCLUDE; else { token = IDENT; } return token; }
struct tree_node * write_for(FILE *out, struct tree_node *t, struct lacy_env *env) { DIR *d; struct tree_node *var, *list; struct dirent *de; /* Pop var IDENT */ t = t->next; var = t; /* Pop var list IDENT */ t = t->next; list = t; t = t->next; if (list->token == SH_BLOCK) { char c; struct ut_str file_path; str_init(&file_path); FILE *cmd = popen(list->buffer.s, "r"); while ((c = fgetc(cmd)) != EOF) { if (!iswhitespace(c)) { str_append(&file_path, c); } else { if (!str_is_empty(&file_path)) { env_set(env, var->buffer.s, file_path.s); do_write_tree(out, env, t); str_clear(&file_path); } } } fclose(cmd); str_free(&file_path); } else if (file_exists(list->buffer.s)) { /* Read directory */ if (NULL != (d = opendir(list->buffer.s))) { while ((de = readdir(d)) != NULL) { if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; env_set(env, var->buffer.s, de->d_name); do_write_tree(out, env, t); } closedir(d); } } while (t->scope != var->scope) t = t->next; return t; }
/********************************************************************************* * The contents of this file are subject to the Common Public Attribution * License Version 1.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.openemm.org/cpal1.html. The License is based on the Mozilla * Public License Version 1.1 but Sections 14 and 15 have been added to cover * use of software over a computer network and provide for limited attribution * for the Original Developer. In addition, Exhibit A has been modified to be * consistent with Exhibit B. * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for * the specific language governing rights and limitations under the License. * * The Original Code is OpenEMM. * The Original Developer is the Initial Developer. * The Initial Developer of the Original Code is AGNITAS AG. All portions of * the code written by AGNITAS AG are Copyright (c) 2007 AGNITAS AG. All Rights * Reserved. * * Contributor(s): AGNITAS AG. ********************************************************************************/ # include <stdlib.h> # include <unistd.h> # include <fcntl.h> # include <string.h> # include <dirent.h> # include <errno.h> # include <sys/types.h> # include <sys/stat.h> # include "qctrl.h" entry_t * entry_alloc (const char *fname, int match) /*{{{*/ { entry_t *e; if (e = (entry_t *) malloc (sizeof (entry_t))) { e -> fname = NULL; e -> match = match; e -> next = NULL; if (fname && (! (e -> fname = strdup (fname)))) e = entry_free (e); } return e; }/*}}}*/ entry_t * entry_free (entry_t *e) /*{{{*/ { if (e) { if (e -> fname) free (e -> fname); free (e); } return NULL; }/*}}}*/ entry_t * entry_free_all (entry_t *e) /*{{{*/ { entry_t *tmp; while (tmp = e) { e = e -> next; entry_free (tmp); } return NULL; }/*}}}*/ static bool_t iswhitespace (const char ch) /*{{{*/ { return ((ch == ' ') || (ch == '\t')) ? true : false; }/*}}}*/ qf_t * qf_alloc (const buffer_t *src) /*{{{*/ { qf_t *q; char *temp; int size; char *ptr; q = NULL; if (temp = malloc (src -> length + 1)) { memcpy (temp, src -> buffer, src -> length); temp[src -> length] = '\0'; size = 0; if (q = (qf_t *) malloc (sizeof (qf_t))) { q -> content = NULL; q -> count = 0; q -> idx = 0; for (ptr = temp; ptr; ) { if (q -> count >= size) { size += (size ? size : 32); if (! (q -> content = (char **) realloc (q -> content, (size + 1) * sizeof (char *)))) break; } if (*ptr) { q -> content[q -> count++] = ptr; while (ptr = strchr (ptr, '\n')) if ((! *(ptr + 1)) || (! iswhitespace (*(ptr + 1)))) break; else ++ptr; } if (ptr) if (*ptr) *ptr++ = '\0'; else ptr = NULL; } if (ptr) { if (q -> content) free (q -> content); free (q); q = NULL; } else { q -> content[q -> count] = NULL; if (q -> count) temp = NULL; } } if (temp) free (temp); } return q; }/*}}}*/
std::shared_ptr<Object> read_character(std::istream &in) { std::stringstream buf; int c = in.get(); if (in.eof()) { throw "EOF while reading character"; } buf.put(c); for (; ;) { c = in.get(); if (in.eof() || iswhitespace(c) || isterminator(c)) { in.unget(); break; } buf.put(c); } std::string token = buf.str(); if (token.size() == 1) { return std::make_shared<Character>( token[0] ); } else if (token == "newline") { return std::make_shared<Character>( '\n' ); } else if (token == "space") { return std::make_shared<Character>( ' ' ); } else if (token == "tab") { return std::make_shared<Character>( '\t' ); } else if (token == "backspace") { return std::make_shared<Character>( '\b' ); } else if (token == "formfeed") { return std::make_shared<Character>( '\f' ); } else if (token == "return") { return std::make_shared<Character>( '\r' ); } else if (token[0] == 'u') { long uc = read_unicode_char(token, 1, 4, 16); if (c >= 0xD800 && c <= 0xDFFF) { // TODO: java clojure actually prints u + the hex value of uc // is this any different than token? throw "Invalid character constant: \\" + token; } return std::make_shared<Character>( uc ); } else if (token[0] == 'o') { int len = token.size() - 1; if (len > 3) { throw "Invalid octal escape sequence length: " + std::to_string(len); } long uc = read_unicode_char(token, 1, len, 8); if (uc > 0377) { throw "Octal escape sequence mst be in range [0, 377]."; } return std::make_shared<Character>( uc ); } throw "Unsupported character: \\" + token; }
int htoi(char *str) { int i, c, x; int len = strlen(str); x = 0; for (i=0; iswhitespace(str[i]); i++); if (str[i]=='0' && (str[i+1]=='x' || str[i+1]=='X')) i += 2; while((c=getdigit(str[i++]))!=-1) { x *= 16; x += c; } return x; }
int searchAction(struct actionParameters *ap, union additionalActionParameters *aap) { uint16_t port = 0; pid_t pid; int sockfd = 0; (void) aap; if (iswhitespace((char *)ap->comline.buf)) return 1; switch (pid = fork()) { case -1: logmsg(ap->semid, ap->logfd, LOGLEVEL_FATAL, "(searchAction) Problem forking\n"); return -3; case 0: sockfd = createPassiveSocket(&port); if (sockfd<=0) return -3; if ( -1 == getTokenFromBuffer(&ap->comline, &ap->comword, "\n","\r\n",NULL)) return -3; logmsg(ap->semid, ap->logfd, LOGLEVEL_VERBOSE, "(searchAction) called upon to search for %s\n", ap->comword.buf); logmsg(ap->semid, ap->logfd, LOGLEVEL_VERBOSE, "created port %d to send search results\n", port); char *msg = stringBuilder("RESULT SOCKET %d\n", port); if ( -1 == reply(ap->comfd, ap->logfd, ap->semid, REP_COMMAND, msg)){ free(msg); return -3; } free(msg); setFdBlocking(sockfd); socklen_t addrlen = sizeof(ap->comip); if ((ap->comfd = accept(sockfd, &ap->comip, &addrlen)) == -1 ) { logmsg(ap->semid, ap->logfd, LOGLEVEL_FATAL, "(searchAction) Problem accepting connection\n"); return -3; } int ret = (sendResult(ap->comfd, ap, aap->sap)); close (ap->comfd); return (ret == -1)? -3: -2; default: return 1; } }
std::shared_ptr<Object> read_token(std::istream &in) { std::stringstream buf; std::string ns = ""; for (; ;) { int c = in.get(); if (in.eof() || iswhitespace(c) || isterminator(c)) { in.unget(); break; } buf.put(c); if (c == '/') { ns = buf.str(); } } std::string s = buf.str(); if (s.back() == '/') { // TODO: this is the only case i can spot where we have an invalid token throw "Invalid token: " + s; } if (s == "nil") { return Object::nil; } if (s == "true") { return Object::T; } if (s == "false") { return Object::F; } // TODO: / = slash, clojure.core// = slash if ((ns != "" && ns.substr(ns.size()-3) == ":/") || s.back() == ':' || s.find("::", 1) != std::string::npos) { return nullptr; } if (s[0] == ':' && s[1] == ':') { auto ks = Symbol::create(s.substr(2)); // TODO: handle namespace qualified Keywords return nullptr; } bool iskey = s[0] == ':'; if (iskey) { return Keyword::create(s.substr(1)); } return std::make_shared<Symbol>(s); }
int readPatList(LifeList *pat, char *patname) { int i=0; FILE *patfile; char header[256]; int patcount=0; header[0] = '\0'; if ( (patfile=fopen(patname, "r")) != NULL) { while (fgets(s, 8191, patfile)) { if (iswhitespace(s)) continue; if (s[0] == '#') { if (s[1] == '#') { if (patcount && header[0]) { //fprintf(stderr, "%s x %d\n", header, patcount); patcount = 0; } strcpy(header, s); header[strcspn(header, "\r\n")] = '\0'; } } else { initLifeList(pat+i); getpat(s, pat+i); i++; patcount++; } } //if (patcount && header[0]) // fprintf(stderr, "%s x %d\n", header, patcount); //fprintf(stderr, "Total catalysts %d\n", i); fclose(patfile); } return i; }
char * parse_var(char *s, struct page *p, struct lacy_env *env) { struct ut_str var; str_init(&var); while (*s != '\0') { if (iswhitespace(*s) == 1) { ++s; continue; } if ('.' == *s) { tree_push(IDENT, var.s); tree_push(MEMBER, NULL); str_clear(&var); } else if (slook_ahead(s, "}}", 2)) { s += 2; if (0 == strcmp(var.s, "content")) { if (env_has_next(env)) { env_inc(env); build_tree(env); env_dec(env); } } else { tree_push(IDENT, var.s); } break; } else { str_append(&var, *s); } ++s; } str_free(&var); return s; }
//// // case_pipe // // Returns true if any error is produced and false otherwise. Handles // the case where the current character is a pipe meta-character. // A pipe meta-character is a token by itself preceded by a '4'. // A following null plug will cause a parsing error. // A following pipe meta-character will cause a parsing error. // A following redirection meta-character is ignored. // A following backslash will begin a new command token. // A following non-whitespace begins a new command token. // A following whitespace is ignored. // bool case_pipe (char *line, char *buffer, char **tokens, int *ii, int *jj, int *kk, int *iscmd, int *isarg) { // make the passed indices more accessible int i = (*ii); int j = (*jj); int k = (*kk); // if we are looking for a redirection arg or command // then throw a parsing error if ((*isarg) || (*iscmd)) return TRUE; // '4' represents a pipe meta-char buffer[j] = '4'; // a pipe meta-char *iscmd = 1; // next token should be a command tokens[k] = &buffer[j]; // begin a new token // place symbol into buffer buffer[j+1] = line[i]; // meta-char placement buffer[j+2] = '\0'; // null plug ++(*jj); ++(*jj); ++(*jj); j = (*jj); // increment buffer index ++(*kk); k = (*kk); // increment tokens index // a following null plug will cause a parsing error // a following pipe will cause a parsing error // a following redirection symbol is ignored // a following whitespace is ignored // a following non-whitespace begins a token (including a backslash) if (line[i+1] == '\0' || line[i+1] == '|') { return TRUE; // parsing error }else if (!iswhitespace (line[i+1]) && !ismetachar (line[i+1])) { // non-whitespace begins a command buffer[j] = '0'; *iscmd = 0; // no longer looking for command tokens[k] = &buffer[j]; ++(*jj); ++(*kk); } return FALSE; }
//// // case_backslash // // Returns true if any error is produced and false otherwise. Handles // the case where the current character is a backslash. // A following null plug will cause a parsing error. // Anything else following is added to the current token. // Examine the next following char // A null plug ends the current token. // A meta-character ends the current token. // A whitespace ends the current token. // Anything else is ignored. // bool case_backslash (char *line, char *buffer, char **tokens, int *ii, int *jj, int *kk, int *iscmd, int *isarg) { // make the passed indices more accessible int i = (*ii); int j = (*jj); int k = (*kk); // a following null plug will cause a parsing error // any following char is taken as a literal (including whitespace) if (line[i+1] == '\0' || line[i+1] == '\n' || line[i+1] == '\r') return TRUE; // parsing error if (*iscmd && !(*isarg)) { // token is a command buffer[j] = '0'; tokens[k] = &buffer[j]; ++(*jj); j = (*jj); ++(*kk); k = (*kk); *iscmd = 0; *isarg = 0; } // place char that follows the backslash buffer[j] = line[i+1]; ++(*ii); ++(*jj); // check if this char ends the token if (ismetachar (line[i+2]) || iswhitespace (line[i+2]) || line[i+2] == '\0') { // place a null plug buffer[j+1] = '\0'; ++(*jj); j = (*jj); tokens[k] = &buffer[j]; *isarg = 0; } return FALSE; }
//// // case_redirection // // Returns true if any error is produced and false otherwise. Handles // the case where the current character is a redirection meta-character. // A redirection meta-character is a token by itself preceded by a '2'. // A following null plug will case a parsing error. // A following meta-character will cause a parsing error. // A following backslash begins a token preceded by a '3'. // A following non-whitespace begins a token preceded by a '3'. // A following whitespace is ignored. // bool case_redirection (char *line, char *buffer, char **tokens, int *ii, int *jj, int *kk, int *isarg) { // make the passed indices more accessible int i = (*ii); int j = (*jj); int k = (*kk); // if we are looking for a redirection arg then throw a parsing error if (*isarg) return TRUE; // '2' represents a meta-char buffer[j] = '2'; // a meta-char *isarg = 1; // next token should be a redirection arg tokens[k] = &buffer[j]; // begin a new token // place redirection symbol into buffer buffer[j+1] = line[i]; // meta-char placement buffer[j+2] = '\0'; // null plug ++(*jj); ++(*jj); ++(*jj); j = (*jj); // increment buffer index ++(*kk); k = (*kk); // increment tokens index // a following null plug will cause a parsing error // a following meta-char will cause a parsing error // a following non-whitespace begins a token (including a backslash) // a following whitespace is ignored if (line[i+1] == '\0' || ismetachar (line[i+1]) || isnewline (line[i+1])) { return TRUE; // parsing error }else if (!iswhitespace (line[i+1])) { // non-whitespace begins a redirection argument buffer[j] = '3'; tokens[k] = &buffer[j]; ++(*jj); ++(*kk); } return FALSE; }
int main() { int c; int wc = 0; int valid_word = 1; c = getchar(); while(c != EOF){ if (iswhitespace(c)) { if (valid_word){ wc++; } valid_word = 1; } else if (!isalpha(c)){ valid_word = 0; } c = getchar(); } printf("Word Count --> %d\n", wc); }
bool CsvChunkLoader::loadChunk(boost::shared_ptr<Query>& query, size_t chunkIndex) { // Must do EOF check *before* nextImplicitChunkPosition() call, or // we risk stepping out of bounds. if (_csvParser.empty()) { int ch = ::getc(fp()); if (ch == EOF) { return false; } ::ungetc(ch, fp()); } // Reposition and make sure all is cool. nextImplicitChunkPosition(MY_CHUNK); enforceChunkOrder("csv loader"); // Initialize a chunk and chunk iterator for each attribute. Attributes const& attrs = schema().getAttributes(); size_t nAttrs = attrs.size(); vector< boost::shared_ptr<ChunkIterator> > chunkIterators(nAttrs); for (size_t i = 0; i < nAttrs; i++) { Address addr(i, _chunkPos); MemChunk& chunk = getLookaheadChunk(i, chunkIndex); chunk.initialize(array(), &schema(), addr, attrs[i].getDefaultCompressionMethod()); chunkIterators[i] = chunk.getIterator(query, ChunkIterator::NO_EMPTY_CHECK | ConstChunkIterator::SEQUENTIAL_WRITE); } char const *field = 0; int rc = 0; bool sawData = false; bool sawEof = false; while (!chunkIterators[0]->end()) { _column = 0; array()->countCell(); // Parse and write out a line's worth of fields. NB if you // have to 'continue;' after a writeItem() call, make sure the // iterator (and possibly the _column) gets incremented. // for (size_t i = 0; i < nAttrs; ++i) { try { // Handle empty tag... if (i == emptyTagAttrId()) { attrVal(i).setBool(true); chunkIterators[i]->writeItem(attrVal(i)); ++(*chunkIterators[i]); // ...but don't increment _column. continue; } // Parse out next input field. rc = _csvParser.getField(field); if (rc == CsvParser::END_OF_FILE) { sawEof = true; break; } if (rc == CsvParser::END_OF_RECORD) { // Got record terminator, but we have more attributes! throw USER_EXCEPTION(SCIDB_SE_IMPORT_ERROR, SCIDB_LE_OP_INPUT_TOO_FEW_FIELDS) << _csvParser.getFileOffset() << _csvParser.getRecordNumber() << _column; } if (rc > 0) { // So long as we never call _csvParser.setStrict(true), we should never see this. throw USER_EXCEPTION(SCIDB_SE_IMPORT_ERROR, SCIDB_LE_CSV_PARSE_ERROR) << _csvParser.getFileOffset() << _csvParser.getRecordNumber() << _column << csv_strerror(rc); } SCIDB_ASSERT(rc == CsvParser::OK); SCIDB_ASSERT(field); sawData = true; // Process input field. if (mightBeNull(field) && attrs[i].isNullable()) { int8_t missingReason = parseNullField(field); if (missingReason >= 0) { attrVal(i).setNull(missingReason); chunkIterators[i]->writeItem(attrVal(i)); ++(*chunkIterators[i]); _column += 1; continue; } } if (converter(i)) { Value v; v.setString(field); const Value* vp = &v; (*converter(i))(&vp, &attrVal(i), NULL); chunkIterators[i]->writeItem(attrVal(i)); } else { TypeId const &tid = typeIdOfAttr(i); if (attrs[i].isNullable() && (*field == '\0' || (iswhitespace(field) && IS_NUMERIC(tid)))) { // [csv2scidb compat] With csv2scidb, empty strings (or for numeric // fields, whitespace) became nulls if the target attribute was // nullable. We keep the same behavior. (We should *not* do this for // TSV, that format requires explicit nulls!) attrVal(i).setNull(); } else { StringToValue(tid, field, attrVal(i)); } chunkIterators[i]->writeItem(attrVal(i)); } } catch (Exception& ex) { _badField = field; _fileOffset = _csvParser.getFileOffset(); array()->handleError(ex, chunkIterators[i], i); } _column += 1; ++(*chunkIterators[i]); } if (sawEof) { break; } // We should be at EOL now, otherwise there are too many fields on this line. Post a // warning: it seems useful not to complain too loudly about this or to abort the load, but // we do want to mention it. // rc = _csvParser.getField(field); if (!_tooManyWarning && (rc != CsvParser::END_OF_RECORD)) { _tooManyWarning = true; query->postWarning(SCIDB_WARNING(SCIDB_LE_OP_INPUT_TOO_MANY_FIELDS) << _csvParser.getFileOffset() << _csvParser.getRecordNumber() << _column); } array()->completeShadowArrayRow(); // done with cell/record } for (size_t i = 0; i < nAttrs; i++) { if (chunkIterators[i]) { chunkIterators[i]->flush(); } } return sawData; }
std::shared_ptr<Object> read_number(std::istream &in) { std::stringstream buf; std::string start = ""; bool error = false; number_type type = number_type::none; int base = 10; // lambda to tell if we've reached the end of the number std::function<bool (int)> isend = [&in] (int i) -> bool { return iswhitespace(i) || isterminator(i) || in.eof(); }; int c; // process the first few chars while (!isend(c = in.get())) { if (c == '0') { // if the first character is 0, there's only a few posible options c = in.get(); if (isend(c)) { // check if it's 0 type = number_type::integer; buf.put('0'); break; } else if (c == 'x' || c == 'X') { // check if it's a hex value type = number_type::integer; base = 16; start += "0"; start += (char)c; break; } else if (c >= '0' && c <= '7') { // check if it's an oct value type = number_type::integer; buf.put(c); base = 8; start = "0"; break; } else if (c == 'e' || c == 'E') { type = number_type::scientific; buf.put('0'); buf.put(c); break; } else { error = true; buf << "O" << (char)c; break; } } else if (c == '-') { // add the sign to the buf if a - buf.put(c); } else if (c == '+') { // ignore the sign if it's a + } else { buf.put(c); break; } } while (1) { c = in.get(); if (isend(c)) { // TODO: process buf and get the value in.unget(); // TODO: make sure we want to unget here if (error) { buf.str("Invalid Number: " + start + buf.str()); throw buf.str(); } else { if (type == number_type::integer || type == number_type::none) { return std::make_shared<Integer>(buf.str(), base); } else if (type == number_type::irrational || type == number_type::scientific) { return std::make_shared<Irrational>(buf.str()); } else if (type == number_type::ratio) { std::string r = buf.str(); size_t s = r.find('/'); return std::make_shared<Ratio>(r.substr(0, s), r.substr(s+1)); } } return Object::nil; // should never get here } if (error) { buf.put(c); } else if (c == 'r') { if (type != number_type::none) { // we already have a base, so this is an invalid number error = true; buf.put(c); } else { std::string radix = buf.str(); buf.str(""); if (radix.size() > 2) { error = true; buf.put(c); } else { base = stoi(radix); if (base > 36) { // TODO: NumberFormatException throw "Radix out of range"; } type = number_type::integer; } } } else if (c == '/') { buf.put(c); if (type != number_type::none) { // we already have a base, so this is an invalid number error = true; } else { type = number_type::ratio; c = in.get(); if (isend(c)) { error = true; in.unget(); } else { buf.put(c); } } } else if (c == '.' && type == number_type::none) { type = number_type::irrational; buf.put(c); } else if ((c == 'e' || c == 'E') && (type == number_type::none || type == number_type::irrational)) { type = number_type::scientific; buf.put(c); c = in.get(); if (isend(c)) { error = true; } else { buf.put(c); if (!(c == '-' || c == '+' || (c >= '0' && c <= '9'))) { error = true; } } } else if ((c == 'N' || c == 'M') && base == 10) { // M and N endings only possible with base 10 c = in.get(); if (!isend(c)) { error = true; buf.put(c); } else { in.unget(); } } else if (c >= '0' && ((base < 11 && (c < '0' + base)) || (base > 10 && (c <= '9' || (c >= 'a' && c < 'a' + (base-10)) || (c >= 'A' && c < 'A' + (base-10)) )))) { buf.put(c); } else { buf.put(c); error = true; } } return Object::nil; }
static uint32_t store_symbols(char * file, vm_size_t file_size, struct symbol * symbols, uint32_t idx, uint32_t max_symbols) { char * scan; char * line; char * eol; char * next; uint32_t strtabsize; strtabsize = 0; for (scan = file, line = file; true; scan = next, line = next) { char * name = NULL; char * name_term = NULL; unsigned int name_len = 0; char * indirect = NULL; char * indirect_term = NULL; unsigned int indirect_len = 0; char * option = NULL; char * option_term = NULL; unsigned int option_len = 0; char optionstr[256]; boolean_t obsolete = 0; eol = memchr(scan, '\n', file_size - (scan - file)); if (eol == NULL) { break; } next = eol + 1; /* Skip empty lines. */ if (eol == scan) { continue; } *eol = '\0'; /* Skip comment lines. */ if (scan[0] == '#') { continue; } /* Scan past any non-symbol characters at the beginning of the line. */ while ((scan < eol) && !issymchar(*scan)) { scan++; } /* No symbol on line? Move along. */ if (scan == eol) { continue; } /* Skip symbols starting with '.'. */ if (scan[0] == '.') { continue; } name = scan; /* Find the end of the symbol. */ while ((*scan != '\0') && issymchar(*scan)) { scan++; } /* Note char past end of symbol. */ name_term = scan; /* Stored length must include the terminating nul char. */ name_len = name_term - name + 1; /* Now look for an indirect. */ if (*scan != '\0') { while ((*scan != '\0') && iswhitespace(*scan)) { scan++; } if (*scan == ':') { scan++; while ((*scan != '\0') && iswhitespace(*scan)) { scan++; } if (issymchar(*scan)) { indirect = scan; /* Find the end of the symbol. */ while ((*scan != '\0') && issymchar(*scan)) { scan++; } /* Note char past end of symbol. */ indirect_term = scan; /* Stored length must include the terminating nul char. */ indirect_len = indirect_term - indirect + 1; } else if (*scan == '\0') { fprintf(stderr, "bad format in symbol line: %s\n", line); exit(1); } } else if (*scan != '\0' && *scan != '-') { fprintf(stderr, "bad format in symbol line: %s\n", line); exit(1); } } /* Look for options. */ if (*scan != '\0') { while ((*scan != '\0') && iswhitespace(*scan)) { scan++; } if (*scan == '-') { scan++; if (isalpha(*scan)) { option = scan; /* Find the end of the option. */ while ((*scan != '\0') && isalpha(*scan)) { scan++; } /* Note char past end of option. */ option_term = scan; option_len = option_term - option; if (option_len >= sizeof(optionstr)) { fprintf(stderr, "option too long in symbol line: %s\n", line); exit(1); } memcpy(optionstr, option, option_len); optionstr[option_len] = '\0'; /* Find the option. */ if (!strncmp(optionstr, "obsolete", option_len)) { obsolete = TRUE; } } else if (*scan == '\0') { fprintf(stderr, "bad format in symbol line: %s\n", line); exit(1); } } } if(idx >= max_symbols) { fprintf(stderr, "symbol[%d/%d] overflow: %s\n", idx, max_symbols, line); exit(1); } *name_term = '\0'; if (indirect_term) { *indirect_term = '\0'; } symbols[idx].name = name; symbols[idx].name_len = name_len; symbols[idx].indirect = indirect; symbols[idx].indirect_len = indirect_len; symbols[idx].flags = (obsolete) ? kObsolete : 0; strtabsize += symbols[idx].name_len + symbols[idx].indirect_len; idx++; } return strtabsize; }
/** * \brief Separately parse the sexpr and build the tree. * \param instr The string which consists of s-expressions. * \return A pointer to the conspair cell at the root of the parse tree. */ Cell* separate_parse(string& instr) { string sexp; bool isstartsexp = false; int inumleftparenthesis = 0; // check whether to read the end clearwhitespace(instr); int length = instr.size(); // check whether it is a "()" sexpr while (instr.size() > 0) { // read char by char char currentchar = instr[0]; // skip some white space before new s-expression occurs if ((true == iswhitespace(currentchar))&&(false == isstartsexp)) { continue; } // run accross a new s-expression if ((false == isstartsexp)&&(false == iswhitespace(currentchar))) { // check whether single symbol if ('(' != currentchar) { // read single a single symbol readsinglesymbol(instr, sexp); clearwhitespace(instr); inparsecar = true; Cell* car = parse(sexp); inparsecar = false; Cell* cdr = parse("(" + instr + ")"); Cell* root = cons(car, cdr); sexp.clear(); return root; } else { // start new expression isstartsexp = true; // read left parenthesiss sexp += currentchar; instr = instr.substr(1, instr.size() -1); inumleftparenthesis = 1; } } else { // in the process of reading the current s-expression if (true == isstartsexp) { if (true == iswhitespace(currentchar)) { // append a blankspace sexp += ' '; instr = instr.substr(1, instr.size() -1); } else { // append current character sexp += currentchar; instr = instr.substr(1, instr.size() -1); // count left parenthesiss if ('(' == currentchar) { inumleftparenthesis ++; } if (')' == currentchar) { inumleftparenthesis --; // check whether current s-expression ends if (0 == inumleftparenthesis) { // current s-expression ends isstartsexp = false; clearwhitespace(instr); inparsecar = true; Cell* car = parse(sexp); inparsecar = false; int length = instr.length(); Cell* cdr; Cell* root; if (length <= 0) { cdr = nil; } else { cdr = parse("(" + instr + ")"); } root = cons(car, cdr); sexp.clear(); return root; } } } } } } return nil; }
/*-----------------------------------------------------------------------------------*/ static u16_t parse_word(char *data, u8_t dlen) { static u8_t i; static u8_t len; unsigned char c; len = dlen; switch(s.minorstate) { case MINORSTATE_TEXT: for(i = 0; i < len; ++i) { c = data[i]; if(iswhitespace(c)) { do_word(); } else if(c == ISO_lt) { s.minorstate = MINORSTATE_TAG; s.tagptr = 0; /* do_word();*/ break; } else if(c == ISO_ampersand) { s.minorstate = MINORSTATE_EXTCHAR; break; } else { add_char(c); } } break; case MINORSTATE_EXTCHAR: for(i = 0; i < len; ++i) { c = data[i]; if(c == ISO_semicolon) { s.minorstate = MINORSTATE_TEXT; add_char(' '); break; } else if(iswhitespace(c)) { s.minorstate = MINORSTATE_TEXT; add_char('&'); add_char(' '); break; } } break; case MINORSTATE_TAG: /* We are currently parsing within the name of a tag. We check for the end of a tag (the '>' character) or whitespace (which indicates that we should parse a tag attr argument instead). */ for(i = 0; i < len; ++i) { c = data[i]; if(c == ISO_gt) { /* Full tag found. We continue parsing regular text. */ s.minorstate = MINORSTATE_TEXT; s.tagattrptr = s.tagattrparamptr = 0; endtagfound(); parse_tag(); break; } else if(iswhitespace(c)) { /* The name of the tag found. We continue parsing the tag attr.*/ s.minorstate = MINORSTATE_TAGATTR; s.tagattrptr = 0; endtagfound(); break; } else { /* Keep track of the name of the tag, but convert it to lower case. */ s.tag[s.tagptr] = lowercase(c); ++s.tagptr; /* Check if the ->tag field is full. If so, we just eat up any data left in the tag. */ if(s.tagptr == sizeof(s.tag)) { s.minorstate = MINORSTATE_TAGEND; break; } } /* Check for HTML comment, indicated by <!-- */ if(s.tagptr == 3 && s.tag[0] == ISO_bang && s.tag[1] == ISO_dash && s.tag[2] == ISO_dash) { PRINTF(("Starting comment...\n")); s.minorstate = MINORSTATE_HTMLCOMMENT; s.tagptr = 0; endtagfound(); break; } } break; case MINORSTATE_TAGATTR: /* We parse the "tag attr", i.e., the "href" in <a href="...">. */ for(i = 0; i < len; ++i) { c = data[i]; if(c == ISO_gt) { /* Full tag found. */ s.minorstate = MINORSTATE_TEXT; s.tagattrparamptr = 0; s.tagattrptr = 0; endtagfound(); parse_tag(); s.tagptr = 0; endtagfound(); break; } else if(iswhitespace(c)) { if(s.tagattrptr == 0) { /* Discard leading spaces. */ } else { /* A non-leading space is the end of the attribute. */ s.tagattrparamptr = 0; endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TAGATTRSPACE; break; /* s.tagattrptr = 0; endtagfound();*/ } } else if(c == ISO_eq) { s.minorstate = MINORSTATE_TAGATTRPARAMNQ; s.tagattrparamptr = 0; endtagfound(); break; } else { s.tagattr[s.tagattrptr] = lowercase(c); ++s.tagattrptr; /* Check if the "tagattr" field is full. If so, we just eat up any data left in the tag. */ if(s.tagattrptr == sizeof(s.tagattr)) { s.minorstate = MINORSTATE_TAGEND; break; } } } break; case MINORSTATE_TAGATTRSPACE: for(i = 0; i < len; ++i) { c = data[i]; if(iswhitespace(c)) { /* Discard spaces. */ } else if(c == ISO_eq) { s.minorstate = MINORSTATE_TAGATTRPARAMNQ; s.tagattrparamptr = 0; endtagfound(); parse_tag(); break; } else { s.tagattr[0] = lowercase(c); s.tagattrptr = 1; s.minorstate = MINORSTATE_TAGATTR; break; } } break; case MINORSTATE_TAGATTRPARAMNQ: /* We are parsing the "tag attr parameter", i.e., the link part in <a href="link">. */ for(i = 0; i < len; ++i) { c = data[i]; if(c == ISO_gt) { /* Full tag found. */ endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TEXT; s.tagattrptr = 0; endtagfound(); parse_tag(); s.tagptr = 0; endtagfound(); break; } else if(iswhitespace(c) && s.tagattrparamptr == 0) { /* Discard leading spaces. */ } else if((c == ISO_citation || c == ISO_citation2) && s.tagattrparamptr == 0) { s.minorstate = MINORSTATE_TAGATTRPARAM; s.quotechar = c; PRINTF(("tag attr param q found\n")); break; } else if(iswhitespace(c)) { PRINTF(("Non-leading space found at %d\n", s.tagattrparamptr)); /* Stop parsing if a non-leading space was found */ endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TAGATTR; s.tagattrptr = 0; endtagfound(); break; } else { s.tagattrparam[s.tagattrparamptr] = c; ++s.tagattrparamptr; /* Check if the "tagattr" field is full. If so, we just eat up any data left in the tag. */ if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) { s.minorstate = MINORSTATE_TAGEND; break; } } } break; case MINORSTATE_TAGATTRPARAM: /* We are parsing the "tag attr parameter", i.e., the link part in <a href="link">. */ for(i = 0; i < len; ++i) { c = data[i]; if(c == s.quotechar) { /* Found end of tag attr parameter. */ endtagfound(); parse_tag(); s.minorstate = MINORSTATE_TAGATTR; s.tagattrptr = 0; endtagfound(); break; } else { if(iswhitespace(c)) { s.tagattrparam[s.tagattrparamptr] = ISO_space; } else { s.tagattrparam[s.tagattrparamptr] = c; } ++s.tagattrparamptr; /* Check if the "tagattr" field is full. If so, we just eat up any data left in the tag. */ if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) { s.minorstate = MINORSTATE_TAGEND; break; } } } break; case MINORSTATE_HTMLCOMMENT: for(i = 0; i < len; ++i) { c = data[i]; if(c == ISO_dash) { ++s.tagptr; } else if(c == ISO_gt && s.tagptr > 0) { PRINTF(("Comment done.\n")); s.minorstate = MINORSTATE_TEXT; break; } else { s.tagptr = 0; } } break; case MINORSTATE_TAGEND: /* Discard characters until a '>' is seen. */ for(i = 0; i < len; ++i) { if(data[i] == ISO_gt) { s.minorstate = MINORSTATE_TEXT; s.tagattrptr = 0; endtagfound(); parse_tag(); break; } } break; default: i = 0; break; } if(i >= len) { return len; } return i + 1; }
int WINAPI WinMain(HINSTANCE inst, HINSTANCE previnst, LPSTR cmdline, int cmdshow) { int argc = 1; char** argv; char* p; // so everything else can have it MainInstance = inst; // allocate space for first entry argv = (char**)malloc(1 * sizeof(char*)); // copy executable name into first entry argv[0] = (char*)malloc(sizeof(char) * 520); GetModuleFileName(inst, argv[0], 512); // convert cmdline into arguments p = cmdline; while (*p) { // skip whitespace while (iswhitespace(*p)) p++; if (*p) { int size = 0; // allocate space for new argument argv = (char**)realloc(argv, (argc + 1) * sizeof(char*)); argv[argc] = NULL; if (*p == '\"') // go until next " or end of string { p++; while (*p && *p != '\"') { argv[argc] = (char*)realloc(argv[argc], size + 1); argv[argc][size] = *p; size++; p++; } if (*p) // if we hit a ", skip it p++; } else // go until there is no more whitespace { while (*p && !iswhitespace(*p)) { argv[argc] = (char*)realloc(argv[argc], size + 1); argv[argc][size] = *p; size++; p++; } } // append a terminating zero argv[argc] = (char*)realloc(argv[argc], size + 1); argv[argc][size] = 0; argc++; } } MessageBox(NULL, "Test.", "Your car is on fire!", MB_OK); return main(argc, argv); }