/*JSON{ "type" : "staticmethod", "class" : "url", "name" : "parse", "generate" : "jswrap_url_parse", "params" : [ ["urlStr","JsVar","A URL to be parsed"], ["parseQuery","bool","Whether to parse the query string into an object not (default = false)"] ], "return" : ["JsVar","An object containing options for ```http.request``` or ```http.get```. Contains `method`, `host`, `path`, `pathname`, `search`, `port` and `query`"] } A utility function to split a URL into parts This is useful in web servers for instance when handling a request. For instance `url.parse("/a?b=c&d=e",true)` returns `{"method":"GET","host":"","path":"/a?b=c&d=e","pathname":"/a","search":"?b=c&d=e","port":80,"query":{"b":"c","d":"e"}}` */ JsVar *jswrap_url_parse(JsVar *url, bool parseQuery) { if (!jsvIsString(url)) return 0; JsVar *obj = jsvNewWithFlags(JSV_OBJECT); if (!obj) return 0; // out of memory // scan string to try and pick stuff out JsvStringIterator it; jsvStringIteratorNew(&it, url, 0); int slashes = 0; int colons = 0; int addrStart = -1; int portStart = -1; int pathStart = -1; int searchStart = -1; int charIdx = 0; int portNumber = 0; while (jsvStringIteratorHasChar(&it)) { char ch = jsvStringIteratorGetChar(&it); if (ch == '/') { slashes++; if (pathStart<0) pathStart = charIdx; if (colons==1 && slashes==2 && addrStart<0) { addrStart = charIdx; pathStart = -1; searchStart = -1; } } if (ch == ':') { colons++; if (addrStart>=0 && pathStart<0) portStart = charIdx; } if (portStart>=0 && charIdx>portStart && pathStart<0 && ch >= '0' && ch <= '9') { portNumber = portNumber*10 + (ch-'0'); } if (ch == '?' && pathStart>=0) { searchStart = charIdx; } jsvStringIteratorNext(&it); charIdx++; } jsvStringIteratorFree(&it); // try and sort stuff out if (pathStart<0) pathStart = charIdx; if (pathStart<0) pathStart = charIdx; int addrEnd = (portStart>=0) ? portStart : pathStart; // pull out details if (addrStart>0) jsvObjectSetChildAndUnLock(obj, "protocol", jsvNewFromStringVar(url, 0, (size_t)addrStart-1)); jsvObjectSetChildAndUnLock(obj, "method", jsvNewFromString("GET")); jsvObjectSetChildAndUnLock(obj, "host", jsvNewFromStringVar(url, (size_t)(addrStart+1), (size_t)(addrEnd-(addrStart+1)))); JsVar *v; v = jsvNewFromStringVar(url, (size_t)pathStart, JSVAPPENDSTRINGVAR_MAXLENGTH); if (jsvGetStringLength(v)==0) jsvAppendString(v, "/"); jsvObjectSetChildAndUnLock(obj, "path", v); v = jsvNewFromStringVar(url, (size_t)pathStart, (size_t)((searchStart>=0)?(searchStart-pathStart):JSVAPPENDSTRINGVAR_MAXLENGTH)); if (jsvGetStringLength(v)==0) jsvAppendString(v, "/"); jsvObjectSetChildAndUnLock(obj, "pathname", v); jsvObjectSetChildAndUnLock(obj, "search", (searchStart>=0)?jsvNewFromStringVar(url, (size_t)searchStart, JSVAPPENDSTRINGVAR_MAXLENGTH):jsvNewNull()); jsvObjectSetChildAndUnLock(obj, "port", (portNumber<=0 || portNumber>65535) ? jsvNewWithFlags(JSV_NULL) : jsvNewFromInteger(portNumber)); JsVar *query = (searchStart>=0)?jsvNewFromStringVar(url, (size_t)(searchStart+1), JSVAPPENDSTRINGVAR_MAXLENGTH):jsvNewNull(); if (parseQuery && !jsvIsNull(query)) { JsVar *queryStr = query; jsvStringIteratorNew(&it, query, 0); query = jsvNewWithFlags(JSV_OBJECT); JsVar *key = jsvNewFromEmptyString(); JsVar *val = jsvNewFromEmptyString(); bool hadEquals = false; while (jsvStringIteratorHasChar(&it)) { char ch = jsvStringIteratorGetChar(&it); if (ch=='&') { if (jsvGetStringLength(key)>0 || jsvGetStringLength(val)>0) { key = jsvAsArrayIndexAndUnLock(key); // make sure "0" gets made into 0 jsvMakeIntoVariableName(key, val); jsvAddName(query, key); jsvUnLock2(key, val); key = jsvNewFromEmptyString(); val = jsvNewFromEmptyString(); hadEquals = false; } } else if (!hadEquals && ch=='=') { hadEquals = true; } else { // decode percent escape chars if (ch=='%') { jsvStringIteratorNext(&it); ch = jsvStringIteratorGetChar(&it); jsvStringIteratorNext(&it); ch = (char)((chtod(ch)<<4) | chtod(jsvStringIteratorGetChar(&it))); } if (hadEquals) jsvAppendCharacter(val, ch); else jsvAppendCharacter(key, ch); } jsvStringIteratorNext(&it); charIdx++; } jsvStringIteratorFree(&it); jsvUnLock(queryStr); if (jsvGetStringLength(key)>0 || jsvGetStringLength(val)>0) { key = jsvAsArrayIndexAndUnLock(key); // make sure "0" gets made into 0 jsvMakeIntoVariableName(key, val); jsvAddName(query, key); } jsvUnLock2(key, val); } jsvObjectSetChildAndUnLock(obj, "query", query); return obj; }
void jslGetNextToken(JsLex *lex) { jslGetNextToken_start: // Skip whitespace while (isWhitespace(lex->currCh)) jslGetNextCh(lex); // Search for comments if (lex->currCh=='/') { // newline comments if (jslNextCh(lex)=='/') { while (lex->currCh && lex->currCh!='\n') jslGetNextCh(lex); jslGetNextCh(lex); goto jslGetNextToken_start; } // block comments if (jslNextCh(lex)=='*') { while (lex->currCh && !(lex->currCh=='*' && jslNextCh(lex)=='/')) jslGetNextCh(lex); if (!lex->currCh) { lex->tk = LEX_UNFINISHED_COMMENT; return; /* an unfinished multi-line comment. When in interactive console, detect this and make sure we accept new lines */ } jslGetNextCh(lex); jslGetNextCh(lex); goto jslGetNextToken_start; } } lex->tk = LEX_EOF; lex->tokenl = 0; // clear token string if (lex->tokenValue) { jsvUnLock(lex->tokenValue); lex->tokenValue = 0; } // record beginning of this token lex->tokenLastStart = jsvStringIteratorGetIndex(&lex->tokenStart.it) - 1; /* we don't lock here, because we know that the string itself will be locked * because of lex->sourceVar */ lex->tokenStart.it = lex->it; lex->tokenStart.currCh = lex->currCh; // tokens if (((unsigned char)lex->currCh) < jslJumpTableStart || ((unsigned char)lex->currCh) > jslJumpTableEnd) { // if unhandled by the jump table, just pass it through as a single character jslSingleChar(lex); } else { switch(jslJumpTable[((unsigned char)lex->currCh) - jslJumpTableStart]) { case JSLJT_ID: { while (isAlpha(lex->currCh) || isNumeric(lex->currCh) || lex->currCh=='$') { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } lex->tk = LEX_ID; // We do fancy stuff here to reduce number of compares (hopefully GCC creates a jump table) switch (lex->token[0]) { case 'b': if (jslIsToken(lex,"break", 1)) lex->tk = LEX_R_BREAK; break; case 'c': if (jslIsToken(lex,"case", 1)) lex->tk = LEX_R_CASE; else if (jslIsToken(lex,"continue", 1)) lex->tk = LEX_R_CONTINUE; break; case 'd': if (jslIsToken(lex,"default", 1)) lex->tk = LEX_R_DEFAULT; else if (jslIsToken(lex,"do", 1)) lex->tk = LEX_R_DO; break; case 'e': if (jslIsToken(lex,"else", 1)) lex->tk = LEX_R_ELSE; break; case 'f': if (jslIsToken(lex,"false", 1)) lex->tk = LEX_R_FALSE; else if (jslIsToken(lex,"for", 1)) lex->tk = LEX_R_FOR; else if (jslIsToken(lex,"function", 1)) lex->tk = LEX_R_FUNCTION; break; case 'i': if (jslIsToken(lex,"if", 1)) lex->tk = LEX_R_IF; else if (jslIsToken(lex,"in", 1)) lex->tk = LEX_R_IN; else if (jslIsToken(lex,"instanceof", 1)) lex->tk = LEX_R_INSTANCEOF; break; case 'n': if (jslIsToken(lex,"new", 1)) lex->tk = LEX_R_NEW; else if (jslIsToken(lex,"null", 1)) lex->tk = LEX_R_NULL; break; case 'r': if (jslIsToken(lex,"return", 1)) lex->tk = LEX_R_RETURN; break; case 's': if (jslIsToken(lex,"switch", 1)) lex->tk = LEX_R_SWITCH; break; case 't': if (jslIsToken(lex,"this", 1)) lex->tk = LEX_R_THIS; else if (jslIsToken(lex,"true", 1)) lex->tk = LEX_R_TRUE; else if (jslIsToken(lex,"typeof", 1)) lex->tk = LEX_R_TYPEOF; break; case 'u': if (jslIsToken(lex,"undefined", 1)) lex->tk = LEX_R_UNDEFINED; break; case 'w': if (jslIsToken(lex,"while", 1)) lex->tk = LEX_R_WHILE; break; case 'v': if (jslIsToken(lex,"var", 1)) lex->tk = LEX_R_VAR; else if (jslIsToken(lex,"void", 1)) lex->tk = LEX_R_VOID; break; default: break; } break; case JSLJT_NUMBER: { // TODO: check numbers aren't the wrong format bool canBeFloating = true; if (lex->currCh=='0') { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } if ((lex->currCh=='x' || lex->currCh=='X') || (lex->currCh=='b' || lex->currCh=='B') || (lex->currCh=='o' || lex->currCh=='O')) { canBeFloating = false; jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } lex->tk = LEX_INT; while (isNumeric(lex->currCh) || (!canBeFloating && isHexadecimal(lex->currCh))) { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } if (canBeFloating && lex->currCh=='.') { lex->tk = LEX_FLOAT; jslTokenAppendChar(lex, '.'); jslGetNextCh(lex); while (isNumeric(lex->currCh)) { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } } // do fancy e-style floating point if (canBeFloating && (lex->currCh=='e'||lex->currCh=='E')) { lex->tk = LEX_FLOAT; jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); if (lex->currCh=='-' || lex->currCh=='+') { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } while (isNumeric(lex->currCh)) { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); } } } break; case JSLJT_STRING: { char delim = lex->currCh; lex->tokenValue = jsvNewFromEmptyString(); // strings... jslGetNextCh(lex); while (lex->currCh && lex->currCh!=delim) { if (lex->currCh == '\\') { jslGetNextCh(lex); char ch = lex->currCh; switch (lex->currCh) { case 'n' : ch = '\n'; jslGetNextCh(lex); break; case 'a' : ch = '\a'; jslGetNextCh(lex); break; case 'r' : ch = '\r'; jslGetNextCh(lex); break; case 't' : ch = '\t'; jslGetNextCh(lex); break; case 'x' : { // hex digits char buf[5] = "0x??"; jslGetNextCh(lex); buf[2] = lex->currCh; jslGetNextCh(lex); buf[3] = lex->currCh; jslGetNextCh(lex); ch = (char)stringToInt(buf); } break; default: if (lex->currCh>='0' && lex->currCh<='7') { // octal digits char buf[5] = "0"; buf[1] = lex->currCh; int n=2; jslGetNextCh(lex); if (lex->currCh>='0' && lex->currCh<='7') { buf[n++] = lex->currCh; jslGetNextCh(lex); if (lex->currCh>='0' && lex->currCh<='7') { buf[n++] = lex->currCh; jslGetNextCh(lex); } } buf[n]=0; ch = (char)stringToInt(buf); } else { // for anything else, just push the character through jslGetNextCh(lex); } break; } if (lex->tokenValue) { jslTokenAppendChar(lex, ch); jsvAppendCharacter(lex->tokenValue, ch); } } else { if (lex->tokenValue) { jslTokenAppendChar(lex, lex->currCh); jsvAppendCharacter(lex->tokenValue, lex->currCh); } jslGetNextCh(lex); } } jslGetNextCh(lex); lex->tk = LEX_STR; } break; case JSLJT_EXCLAMATION: jslSingleChar(lex); if (lex->currCh=='=') { // != lex->tk = LEX_NEQUAL; jslGetNextCh(lex); if (lex->currCh=='=') { // !== lex->tk = LEX_NTYPEEQUAL; jslGetNextCh(lex); } } break; case JSLJT_PLUS: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_PLUSEQUAL; jslGetNextCh(lex); } else if (lex->currCh=='+') { lex->tk = LEX_PLUSPLUS; jslGetNextCh(lex); } break; case JSLJT_MINUS: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_MINUSEQUAL; jslGetNextCh(lex); } else if (lex->currCh=='-') { lex->tk = LEX_MINUSMINUS; jslGetNextCh(lex); } break; case JSLJT_AND: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_ANDEQUAL; jslGetNextCh(lex); } else if (lex->currCh=='&') { lex->tk = LEX_ANDAND; jslGetNextCh(lex); } break; case JSLJT_OR: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_OREQUAL; jslGetNextCh(lex); } else if (lex->currCh=='|') { lex->tk = LEX_OROR; jslGetNextCh(lex); } break; case JSLJT_TOPHAT: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_XOREQUAL; jslGetNextCh(lex); } break; case JSLJT_STAR: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_MULEQUAL; jslGetNextCh(lex); } break; case JSLJT_FORWARDSLASH: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_DIVEQUAL; jslGetNextCh(lex); } break; case JSLJT_PERCENT: jslSingleChar(lex); if (lex->currCh=='=') { lex->tk = LEX_MODEQUAL; jslGetNextCh(lex); } break; case JSLJT_EQUAL: jslSingleChar(lex); if (lex->currCh=='=') { // == lex->tk = LEX_EQUAL; jslGetNextCh(lex); if (lex->currCh=='=') { // === lex->tk = LEX_TYPEEQUAL; jslGetNextCh(lex); } } break; case JSLJT_LESSTHAN: jslSingleChar(lex); if (lex->currCh=='=') { // <= lex->tk = LEX_LEQUAL; jslGetNextCh(lex); } else if (lex->currCh=='<') { // << lex->tk = LEX_LSHIFT; jslGetNextCh(lex); if (lex->currCh=='=') { // <<= lex->tk = LEX_LSHIFTEQUAL; jslGetNextCh(lex); } } break; case JSLJT_GREATERTHAN: jslSingleChar(lex); if (lex->currCh=='=') { // >= lex->tk = LEX_GEQUAL; jslGetNextCh(lex); } else if (lex->currCh=='>') { // >> lex->tk = LEX_RSHIFT; jslGetNextCh(lex); if (lex->currCh=='=') { // >>= lex->tk = LEX_RSHIFTEQUAL; jslGetNextCh(lex); } else if (lex->currCh=='>') { // >>> jslGetNextCh(lex); if (lex->currCh=='=') { // >>>= lex->tk = LEX_RSHIFTUNSIGNEDEQUAL; jslGetNextCh(lex); } else { lex->tk = LEX_RSHIFTUNSIGNED; } } } break; case JSLJT_SINGLECHAR: jslSingleChar(lex); break; default: assert(0);break; } } } }