/*=========================================== * ask_for_int -- Ask user to provide integer * titl: [IN] prompt title * TODO: change to BOOLEAN return for failure *=========================================*/ BOOLEAN ask_for_int (STRING ttl, INT * prtn) { INT ival, c, neg; char buffer[MAXPATHLEN]; while (TRUE) { STRING p = buffer; if (!ask_for_string(ttl, _(qSaskint), buffer, sizeof(buffer))) return FALSE; neg = 1; while (iswhite(*p++)) ; --p; if (*p == '-') { neg = -1; p++; while (iswhite(*p++)) ; --p; } if (chartype(*p) == DIGIT) { ival = *p++ - '0'; while (chartype(c = *p++) == DIGIT) ival = ival*10 + c - '0'; --p; while (iswhite(*p++)) ; --p; if (*p == 0) { *prtn = ival*neg; return TRUE; } } } }
/*================================================ * shorten_date -- Return short form of date value * Returns static buffer. *==============================================*/ STRING shorten_date (STRING date) { static char buffer[3][MAXLINELEN+1]; static int dex = 0; STRING p = date, q; INT c, len; /* Allow 3 or 4 digit years. The previous test for strlen(date) < 4 * prevented dates consisting of only 3 digit years from being * returned. - pbm 12 oct 99 */ if (!date || (INT) strlen(date) < 3) return NULL; if (++dex > 2) dex = 0; q = buffer[dex]; while (TRUE) { while ((c = (uchar)*p++) && chartype(c) != DIGIT) ; if (c == 0) return NULL; q = buffer[dex]; *q++ = c; len = 1; while ((c = (uchar)*p++) && chartype(c) == DIGIT) { if (len < 6) { *q++ = c; len++; } } *q = 0; if (strlen(buffer[dex]) == 3 || strlen(buffer[dex]) == 4) return buffer[dex]; if (c == 0) return NULL; } }
/*========================================= * key_possible_to_record -- Returns record with key * str: string that may be a key * let: if string starts with a letter, it must be this (eg, 'I' for indi) * This returns NULL upon failure *=======================================*/ RECORD key_possible_to_record (STRING str, /* string that may be a key */ INT let) /* if string starts with letter it must be this */ { char kbuf[MAXGEDNAMELEN]; INT i = 0, c; if (!str || *str == 0) return NULL; c = *str++; if (c != let && chartype(c) != DIGIT) return NULL; kbuf[i++] = let; if (c != let) kbuf[i++] = c; while ((c = *str++) && chartype(c) == DIGIT) kbuf[i++] = c; if (c != 0) return NULL; kbuf[i] = 0; if (!isrecord(BTR, str2rkey(kbuf))) return NULL; switch (let) { case 'I': return qkey_to_irecord(kbuf); case 'F': return qkey_to_frecord(kbuf); case 'S': return qkey_to_srecord(kbuf); case 'E': return qkey_to_erecord(kbuf); case 'X': return qkey_to_orecord(kbuf); default: FATAL(); } FATAL(); return NULL; }
/*=========================== * lowyylex -- Lexer function *=========================*/ static int lowyylex (PACTX pactx, YYSTYPE * lvalp) { INT c=0, t=0; /* skip over whitespace or comments up to start of token */ while (TRUE) { while ((t = chartype(c = inchar(pactx))) == WHITE) ; if (c != '/') break; if ((c = inchar(pactx)) != '*') { unreadchar(pactx, c); return '/'; } /* inside a comment -- advance til end */ while (TRUE) { while ((c = inchar(pactx)) != '*' && c != EOF) ; if (c == EOF) return 0; while ((c = inchar(pactx)) == '*') ; if (c == '/') break; if (c == EOF) return 0; } } /* now read token */ c = lextok(pactx, lvalp, c, t); return c; }
// Parse the next token from the input stream. void getsym(void) { // dispatch to handler for this type of char (*tokenhandlers[chartype(inchar)])(); #ifdef PARSER_TRACE if (trace) { sp(" sym="); printInteger(sym); sp(" v="); printInteger(symval); spb(' '); } #endif }
/*============================================= * value_to_list -- Convert string to word list * str: [IN] input string to split up * list: [OUT] list of strings in name * plen: [OUT] #entries in list * dlm: [IN] delimiter upon which to split str *===========================================*/ LIST value_to_list (STRING str, INT *plen, STRING dlm) { static STRING buf = NULL; static INT len0 = 0; STRING p, q, n; INT len, c, i, j; LIST list = create_list2(LISTDOFREE); if (!str || *str == 0) return list; if ((len = strlen(str)) > len0 - 2) { if (buf) stdfree(buf); buf = (STRING) stdalloc(len0 = len + 80); } strcpy(buf, str); buf[len + 1] = 0; p = buf; j = 1; while ((c = *p++)) { if (in_string(c, dlm)) { *(p - 1) = 0; j++; } } p = buf; for (i = 1; i <= j; i++) { n = p + strlen(p) + 1; while (chartype(c = *p++) == WHITE) ; p--; q = p + strlen(p) - 1; while (q > p && chartype(*q) == WHITE) *q-- = 0; set_list_element(list, i, strsave(p), NULL); p = n; } *plen = j; return list; }
/*================================================================ * buffer_to_line -- Get GEDCOM line from buffer with <= 1 newline * * p: [in] buffer * plev: [out] level number * pxref: [out] xref * ptag: [out] tag * pval: [out] value * pmsg: [out] error msg (in static buffer) *==============================================================*/ static BOOLEAN buffer_to_line (STRING p, INT *plev, STRING *pxref , STRING *ptag, STRING *pval, STRING *pmsg) { INT lev; static char scratch[MAXLINELEN+40]; *pmsg = *pxref = *pval = 0; if (!p || *p == 0) { sprintf(scratch, _(qSreremp), flineno); *pmsg = scratch; return ERROR; } striptrail(p); if (strlen(p) > MAXLINELEN) { sprintf(scratch, _(qSrerlng), flineno); *pmsg = scratch; return ERROR; } /* Get level number */ skipws(&p); if (chartype((uchar)*p) != DIGIT) { sprintf(scratch, _(qSrernlv), flineno); *pmsg = scratch; return ERROR; } lev = (uchar)*p++ - (uchar)'0'; while (chartype((uchar)*p) == DIGIT) lev = lev*10 + (uchar)*p++ - (uchar)'0'; *plev = lev; /* Get cross reference, if there */ skipws(&p); if (*p == 0) { sprintf(scratch, _(qSrerinc), flineno); *pmsg = scratch; return ERROR; } if (*p != '@') goto gettag; *pxref = p++; if (*p == '@') { sprintf(scratch, _(qSrerbln), flineno); *pmsg = scratch; return ERROR; } while (*p != '@') p++; p++; if (*p == 0) { sprintf(scratch, _(qSrerinc), flineno); *pmsg = scratch; return ERROR; } if (!iswhite((uchar)*p)) { sprintf(scratch, _(qSrernwt), flineno); *pmsg = scratch; return ERROR; } *p++ = 0; /* Get tag field */ gettag: skipws(&p); if (*p == 0) { sprintf(scratch, _(qSrerinc), flineno); *pmsg = scratch; return ERROR; } *ptag = p++; while (!iswhite((uchar)*p) && *p != 0) p++; if (*p == 0) return OKAY; *p++ = 0; /* Get the value field */ skipws(&p); *pval = p; return OKAY; }
// Skip to next nonblank and return the symbol therefrom void skpwhite(void) { while (chartype(inchar) == 0) fetchc(); getsym(); }
byte isalpha(byte c) { return (chartype(c) == 2); }
byte isdigit(byte c) { return (chartype(c) == 1); }
/*=========================== * lextok -- lex the next token *=========================*/ static int lextok (PACTX pactx, YYSTYPE * lvalp, INT c, INT t) { INT retval, mul; extern INT Yival; extern FLOAT Yfval; static char tokbuf[512]; /* token buffer */ STRING p = tokbuf; if (t == LETTER) { p = tokbuf; while (is_iden_char(c, t)) { if (p-tokbuf < (int)sizeof(tokbuf) - 3) { *p++ = c; } else { /* token overlong -- ignore end of it */ /* TODO: How can we force a parse error from here ? */ } t = chartype(c = inchar(pactx)); } *p = 0; unreadchar(pactx, c); if (reserved(tokbuf, &retval)) return retval; /* IDEN values have to be passed from yacc.y to free_iden */ *lvalp = (PNODE) strsave(tokbuf); return IDEN; } if (t == '-' || t == DIGIT || t == '.') { BOOLEAN whole = FALSE; BOOLEAN frac = FALSE; FLOAT fdiv; mul = 1; if (t == '-') { t = chartype(c = inchar(pactx)); if (t != '.' && t != DIGIT) { unreadchar(pactx, c); return '-'; } mul = -1; } Yival = 0; while (t == DIGIT) { whole = TRUE; Yival = Yival*10 + c - '0'; t = chartype(c = inchar(pactx)); } if (t != '.') { unreadchar(pactx, c); Yival *= mul; *lvalp = NULL; return ICONS; } t = chartype(c = inchar(pactx)); Yfval = 0.0; fdiv = 1.0; while (t == DIGIT) { frac = TRUE; Yfval = Yfval*10 + c - '0'; fdiv *= 10.; t = chartype(c = inchar(pactx)); } unreadchar(pactx, c); if (!whole && !frac) { unreadchar(pactx, c); if (mul == -1) { unreadchar(pactx, '.'); return '-'; } else return '.'; } Yfval = mul*(Yival + Yfval/fdiv); *lvalp = NULL; return FCONS; } if (c == '"') { INT start_line = pactx->lineno; p = tokbuf; while (TRUE) { while ((c = inchar(pactx)) != EOF && c != '"' && c != '\\') { if (p-tokbuf > sizeof(tokbuf)/sizeof(tokbuf[0]) - 3) { /* Overflowing tokbuf buffer */ /* TODO: (Perry, 2006-06-30) I don't know how to fail gracefully from here inside parser */ char msg[512]; snprintf(msg, sizeof(msg)/sizeof(msg[0]) , _("String constant overflowing internal buffer tokbuf len=%d, file: %s, start line: %ld") , sizeof(tokbuf)/sizeof(tokbuf[0]) , pactx->fullpath , start_line + 1 ); FATAL2(msg); *p = c = 0; } *p++ = c; } if (c == 0 || c == '"') { *p = 0; *lvalp = make_internal_string_node(pactx, tokbuf); return SCONS; } switch (c = inchar(pactx)) { case 'n': *p++ = '\n'; break; case 't': *p++ = '\t'; break; case 'v': *p++ = '\v'; break; case 'r': *p++ = '\r'; break; case 'b': *p++ = '\b'; break; case 'f': *p++ = '\f'; break; case '"': *p++ = '"'; break; case '\\': *p++ = '\\'; break; case EOF: *p = 0; *lvalp = make_internal_string_node(pactx, tokbuf); return SCONS; default: *p++ = c; break; } } } if (c == EOF) return 0; return c; }
// "y = f(45,124)/3" -> [ "y", "f", "(", "45", ",", "124", ")", "/", "3"] std::vector<Node> tokenize(std::string inp, Metadata metadata, bool lispMode) { int curtype = SPACE; unsigned pos = 0; int lastNewline = 0; metadata.ch = 0; std::string cur; std::vector<Node> out; inp += " "; while (pos < inp.length()) { int headtype = chartype(inp[pos]); if (lispMode) { if (inp[pos] == '\'') headtype = ALPHANUM; } // Are we inside a quote? if (curtype == SQUOTE || curtype == DQUOTE) { // Close quote if (headtype == curtype) { cur += inp[pos]; out.push_back(token(cur, metadata)); cur = ""; metadata.ch = pos - lastNewline; curtype = SPACE; pos += 1; } // eg. \xc3 else if (inp.length() >= pos + 4 && inp.substr(pos, 2) == "\\x") { cur += (std::string("0123456789abcdef").find(inp[pos+2]) * 16 + std::string("0123456789abcdef").find(inp[pos+3])); pos += 4; } // Newline else if (inp.substr(pos, 2) == "\\n") { cur += '\n'; pos += 2; } // Backslash escape else if (inp.length() >= pos + 2 && inp[pos] == '\\') { cur += inp[pos + 1]; pos += 2; } // Normal character else { cur += inp[pos]; pos += 1; } } else { // Handle atoms ( '//', '#', brackets ) for (int i = 0; i < numAtoms; i++) { int split = cur.length() - atoms[i].length(); if (split >= 0 && cur.substr(split) == atoms[i]) { if (split > 0) { out.push_back(token(cur.substr(0, split), metadata)); } metadata.ch += split; out.push_back(token(cur.substr(split), metadata)); metadata.ch = pos - lastNewline; cur = ""; curtype = SPACE; } } // Special case the minus sign if (cur.length() > 1 && (cur.substr(cur.length() - 1) == "-" || cur.substr(cur.length() - 1) == "!")) { out.push_back(token(cur.substr(0, cur.length() - 1), metadata)); out.push_back(token(cur.substr(cur.length() - 1), metadata)); cur = ""; } // Boundary between different char types if (headtype != curtype) { if (curtype != SPACE && cur != "") { out.push_back(token(cur, metadata)); } metadata.ch = pos - lastNewline; cur = ""; } cur += inp[pos]; curtype = headtype; pos += 1; } if (inp[pos] == '\n') { lastNewline = pos; metadata.ch = 0; metadata.ln += 1; } } return out; }
int main(int argc,char const *argv[]) { chartype(); return 0; }