template <typename R> bool extract_control(R& result,tokenizer& tokenizer) { result.clear(); //controls dictionary<string,id<string>> controls= { "\r",symbols()._cr, "\n",symbols()._lf }; //buffer array<ascii> buffer; //any if(!tokenizer.any(buffer,controls.keys())) return false; //commit result=buffer; update(controls[buffer.join("")],buffer); return true; }
template <typename R> bool extract_number(R& result,tokenizer& tokenizer) { result.clear(); //buffer array<ascii> buffer; array<ascii> current; //sign if(tokenizer.any(current,pack("+","-"))) buffer.append(current); //integer part if(tokenizer.digit(current)) buffer.append(current); //real part if(tokenizer.delimit(current,".")) { buffer.append(current); if(tokenizer.digit(current)) buffer.append(current); } //parse with c api if(!buffer.join("").is_real()) return false; //no dot after if(tokenizer.starts_with(".")) return false; //delimited if(!tokenizer.is_delimited()) return false; //commit result=buffer; update(symbols()._number,buffer); return true; }
template <typename R> bool extract_escape(R& result,tokenizer& tokenizer) { result.clear(); //controls dictionary<string,ascii> controls= { "\\r",'\r', "\\n",'\n', "\\t",'\t' }; //buffer R content; array<ascii> buffer; array<ascii> current; //escape if(tokenizer.delimit(current,"\\x")) { //hexa buffer.append(current); //hexa if(!tokenizer.xdigit(current,2)) return false; buffer.append(current); //parse with c api int value=0; if(!current.join("").parse_integer(value,16)) return false; //byte if(!between(value,0,256)) return false; //push content.push(value); } else if(tokenizer.any(current,controls.keys())) { //control buffer.append(current); content.push(controls[current]); } else if(tokenizer.delimit(current,"\\")) { //backslash buffer.append(current); //next if(!tokenizer.shift(current,1)) return false; content.append(current); buffer.append(current); } else return false; //commit result=content; update(symbols()._escape,buffer); return true; }
template <typename R> bool extract_delimiter(R& result,tokenizer& tokenizer) { result.clear(); //delimiters array<string> delimiters= { //comment "//", "/*", "*/", //literal "\"", "'", //block "(", ")", "[", "]", "{", "}", //separator "/", "//", ",", ";", ".", "..", "...", ":", "::", //operator "->", "~", "%", "^", "=", "==", "===", ">", ">>", ">>>", ">=", "<", "<<", "<<<", "<=", "+", "++", "-", "--", "!", "&", "&&", "|", "||" }; //longest first delimiters.sort_descending(); //buffer array<ascii> buffer; //any if(!tokenizer.any(buffer,delimiters)) return false; //commit result=buffer; update(symbols()._delimiter,buffer); return true; }