TestSchema* TestSchema_init(TestSchema *self) { Tokenizer *tokenizer = Tokenizer_new(NULL); FullTextType *type = FullTextType_new((Analyzer*)tokenizer); Schema_init((Schema*)self); FullTextType_Set_Highlightable(type, true); Schema_Spec_Field(self, &content, (FieldType*)type); DECREF(type); DECREF(tokenizer); return self; }
static void test_Dump_Load_and_Equals(TestBatch *batch) { Tokenizer *word_char_tokenizer = Tokenizer_new((CharBuf*)&word_char_pattern); Tokenizer *whitespace_tokenizer = Tokenizer_new((CharBuf*)&whitespace_pattern); Obj *word_char_dump = Tokenizer_Dump(word_char_tokenizer); Obj *whitespace_dump = Tokenizer_Dump(whitespace_tokenizer); Tokenizer *word_char_clone = Tokenizer_Load(whitespace_tokenizer, word_char_dump); Tokenizer *whitespace_clone = Tokenizer_Load(whitespace_tokenizer, whitespace_dump); ASSERT_FALSE(batch, Tokenizer_Equals(word_char_tokenizer, (Obj*)whitespace_tokenizer), "Equals() false with different pattern"); ASSERT_TRUE(batch, Tokenizer_Dump_Equals(whitespace_tokenizer, (Obj*)whitespace_dump), "Dump_Equals()"); ASSERT_TRUE(batch, Tokenizer_Dump_Equals(word_char_tokenizer, (Obj*)word_char_dump), "Dump_Equals()"); ASSERT_FALSE(batch, Tokenizer_Dump_Equals(word_char_tokenizer, (Obj*)whitespace_dump), "Dump_Equals() false with different pattern"); ASSERT_FALSE(batch, Tokenizer_Dump_Equals(whitespace_tokenizer, (Obj*)word_char_dump), "Dump_Equals() false with different pattern"); ASSERT_TRUE(batch, Tokenizer_Equals(word_char_tokenizer, (Obj*)word_char_clone), "Dump => Load round trip"); ASSERT_TRUE(batch, Tokenizer_Equals(whitespace_tokenizer, (Obj*)whitespace_clone), "Dump => Load round trip"); DECREF(word_char_tokenizer); DECREF(word_char_dump); DECREF(word_char_clone); DECREF(whitespace_tokenizer); DECREF(whitespace_dump); DECREF(whitespace_clone); }
/** * Open and read an initialization file, putting the information * therein into a newly-allocated object of type Ini. * * @param[in] ifname Name of input file * * @returns Newly-allocate Ini object containing info from input file. */ Ini *Ini_new(const char *ifname) { FILE *ifp = fopen(ifname, "r"); int inPopHist = 0; if(ifp == NULL) return NULL; Ini *ini = malloc(sizeof(Ini)); checkmem(ini, __FILE__, __LINE__); memset(ini, 0, sizeof(Ini)); ini->a = NULL; ini->epochList = NULL; Tokenizer *tkz = Tokenizer_new(100); char buff[1000]; int lineno = 0, ntokens; while(fgets(buff, sizeof(buff), ifp) != NULL) { ++lineno; if(!strchr(buff, '\n') && !feof(ifp)) eprintf("ERR@%s:%d: Buffer overflow. buff=\"%s\"\n", __FILE__, __LINE__, buff); /* skip blank lines and comments */ stripComment(buff); if(strempty(buff)) continue; if(inPopHist) { Tokenizer_split(tkz, buff, " \t"); /* tokenize */ ntokens = Tokenizer_strip(tkz, " \t\n"); /* strip extraneous */ if(ntokens != 2) break; double t = strtod(Tokenizer_token(tkz, 0), NULL); double twoN = strtod(Tokenizer_token(tkz, 1), NULL); ini->epochList = EpochLink_new(ini->epochList, t, twoN); if(!isfinite(t)) break; } else if(strchr(buff, '=')) { Tokenizer_split(tkz, buff, "="); /* tokenize */ ntokens = Tokenizer_strip(tkz, " \t\n"); /* strip extraneous */ if(ntokens != 2) eprintf("ERR@:%s:%d:" "Broken assignment @ line %u" " of initialization file", __FILE__, __LINE__, lineno); ini->a = Assignment_new(ini->a, Tokenizer_token(tkz, 0), Tokenizer_token(tkz, 1)); } else { Tokenizer_split(tkz, buff, " \t"); /* tokenize */ ntokens = Tokenizer_strip(tkz, " \t\n"); /* strip * extraneous */ if(ntokens == 0) continue; if(ntokens != 1) eprintf("ERR@:%s:%d:" "Broken command @ line %u" " of initialization file." " inPopHist=%d; ntokens=%d\n", __FILE__, __LINE__, lineno, inPopHist, ntokens); if(!strcmp("PopHist", Tokenizer_token(tkz, 0))) inPopHist = 1; else ini->a = Assignment_new(ini->a, Tokenizer_token(tkz, 0), "1"); } } Tokenizer_free(tkz); fclose(ifp); return ini; }
int Xml_eval(lua_State *L) { char* str = 0; size_t str_size=0; if(lua_isuserdata(L,1)) str = (char*)lua_touserdata(L,1); else { const char * sTmp = luaL_checklstring(L,1,&str_size); str = (char*)malloc(str_size+1); memcpy(str, sTmp, str_size); str[str_size]=0; } Tokenizer* tok = Tokenizer_new(str, str_size ? str_size : strlen(str)); lua_settop(L,0); const char* token=0; int firstStatement = 1; while((token=Tokenizer_next(tok))!=0) if(token[0]==OPN) { // new tag found if(lua_gettop(L)) { int newIndex=lua_objlen(L,-1)+1; lua_pushnumber(L,newIndex); lua_newtable(L); lua_settable(L, -3); lua_pushnumber(L,newIndex); lua_gettable(L,-2); } else { if (firstStatement) { lua_newtable(L); firstStatement = 0; } else return lua_gettop(L); } // set metatable: lua_newtable(L); lua_pushliteral(L, "__index"); lua_getglobal(L, "xml"); lua_settable(L, -3); lua_pushliteral(L, "__tostring"); // set __tostring metamethod lua_getglobal(L, "xml"); lua_pushliteral(L,"str"); lua_gettable(L, -2); lua_remove(L, -2); lua_settable(L, -3); lua_setmetatable(L, -2); // parse tag and content: lua_pushnumber(L,0); // use index 0 for storing the tag lua_pushstring(L, Tokenizer_next(tok)); lua_settable(L, -3); while(((token = Tokenizer_next(tok))!=0)&&(token[0]!=CLS)&&(token[0]!=ESC)) { // parse tag header size_t sepPos=find(token, "=", 0); if(token[sepPos]) { // regular attribute const char* aVal =token+sepPos+2; lua_pushlstring(L, token, sepPos); Xml_pushDecode(L, aVal, strlen(aVal)-1); lua_settable(L, -3); } } if(!token||(token[0]==ESC)) { if(lua_gettop(L)>1) lua_settop(L,-2); // this tag has no content, only attributes else break; } } else if(token[0]==ESC) { // previous tag is over if(lua_gettop(L)>1) lua_settop(L,-2); // pop current table else break; } else { // read elements lua_pushnumber(L,lua_objlen(L,-1)+1); Xml_pushDecode(L, token, 0); lua_settable(L, -3); } Tokenizer_delete(tok); free(str); return lua_gettop(L); }
int Xml_eval(HSQUIRRELVM v) { SQ_FUNC_VARS_NO_TOP(v); SQChar* str = 0; size_t str_size=0; if(sq_gettype(v,2) == OT_USERPOINTER) sq_getuserpointer(v, 2, &str); else { SQ_GET_STRING(v, 2, sTmp); str = (SQChar*)sq_malloc(sTmp_size+(sizeof(SQChar))); memcpy(str, sTmp, sTmp_size); str[sTmp_size]=0; str_size = sTmp_size; } Tokenizer* tok = Tokenizer_new(str, str_size ? str_size : scstrlen(str)); sq_settop(v,0); const SQChar* token=0; int firstStatement = 1; while((token=Tokenizer_next(tok))!=0) if(token[0]==OPN) { // new tag found if(sq_gettop(v)) { int newIndex=sq_size(v,-1)+1; sq_pushinteger(v,newIndex); sq_newtable(v); sq_set(v, -3); sq_pushinteger(v,newIndex); sq_get(v,-2); } else { if (firstStatement) { sq_newtable(v); firstStatement = 0; } else return lua_gettop(L); } // set metatable: sq_newtable(v); sq_pushliteral(v, _SC("__index")); sq_getglobal(v, "xml"); lua_settable(v, -3); sq_pushliteral(v, _SC("__tostring")); // set __tostring metamethod lua_getglobal(L, "xml"); lua_pushliteral(L,"str"); lua_gettable(v, -2); sq_remove(v, -2); sq_set(v, -3); lua_setmetatable(L, -2); // parse tag and content: sq_pushinteger(v,0); // use index 0 for storing the tag sq_pushstring(v, Tokenizer_next(tok), -1); sq_set(v, -3); while(((token = Tokenizer_next(tok))!=0)&&(token[0]!=CLS)&&(token[0]!=ESC)) { // parse tag header size_t sepPos=find(token, "=", 0); if(token[sepPos]) { // regular attribute const SQChar* aVal =token+sepPos+2; sq_pushstring(v, token, sepPos); size_t lenVal = strlen(aVal)-1; if(!lenVal) Xml_pushDecode(v, _SC(""), 0); else Xml_pushDecode(v, aVal, lenVal); sq_set(v, -3); } } if(!token||(token[0]==ESC)) { if(sq_gettop(v)>1) sq_settop(v,-2); // this tag has no content, only attributes else break; } } else if(token[0]==ESC) { // previous tag is over if(sq_gettop(v)>1) sq_settop(v,-2); // pop current table else break; } else { // read elements sq_pushinteger(v,sq_size(v,-1)+1); Xml_pushDecode(v, token, 0); sq_rawset(v, -3); } Tokenizer_delete(tok); sq_free(str); return sq_gettop(v); }
int main(int argc, char **argv) { int maxtokens = 50; int ntokens, verbose = 0, i; Tokenizer *tkz; const char *sep; char str[100]; #ifdef NDEBUG eprintf("ERR@%s:%d:" "Unit tests must be compiled without -DNDEBUG flag\n", __FILE__, __LINE__); #endif switch (argc) { case 1: break; case 2: if(strncmp(argv[1], "-v", 2) != 0) eprintf("usage: xtokenizer [-v]\n"); verbose = 1; break; default: eprintf("usage: xtokenizer [-v]\n"); } tkz = Tokenizer_new(maxtokens); assert(Tokenizer_ntokens(tkz) == 0); strcpy(str, "; now; \t: \t is ::the ,time \n,"); sep = ";:,"; if(verbose) { printf("sep=\"%s\"\n", sep); printf("str=\"%s\"\n", str); } ntokens = Tokenizer_split(tkz, str, sep); if(verbose) { printf("ntokens=%d\n", ntokens); fflush(stdout); } assert(ntokens == Tokenizer_ntokens(tkz)); assert(ntokens == 5); assert(strcmp(Tokenizer_token(tkz, 0), " now") == 0); assert(strcmp(Tokenizer_token(tkz, 1), " \t") == 0); assert(strcmp(Tokenizer_token(tkz, 2), " \t is ") == 0); assert(strcmp(Tokenizer_token(tkz, 3), "the ") == 0); assert(strcmp(Tokenizer_token(tkz, 4), "time \n") == 0); if(verbose) { for(i = 0; i < ntokens; ++i) printf("%4d \"%s\"\n", i, Tokenizer_token(tkz, i)); } ntokens = Tokenizer_strip(tkz, " \t\n"); assert(ntokens == 4); assert(strcmp(Tokenizer_token(tkz, 0), "now") == 0); assert(strcmp(Tokenizer_token(tkz, 1), "is") == 0); assert(strcmp(Tokenizer_token(tkz, 2), "the") == 0); assert(strcmp(Tokenizer_token(tkz, 3), "time") == 0); assert(Tokenizer_find(tkz, "now") == 0); assert(Tokenizer_find(tkz, "is") == 1); assert(Tokenizer_find(tkz, "the") == 2); assert(Tokenizer_find(tkz, "time") == 3); assert(Tokenizer_find(tkz, "not there") == ntokens); if(verbose) { printf("after stripping extraneous chars, ntokens is %d\n", ntokens); for(i = 0; i < ntokens; ++i) printf("%4d \"%s\"\n", i, Tokenizer_token(tkz, i)); printf("Tokenizer_print:\n"); Tokenizer_print(tkz, stdout); } strcpy(str, "afasf"); ntokens = Tokenizer_split(tkz, str, ":"); assert(ntokens == 1); strcpy(str, ""); ntokens = Tokenizer_split(tkz, str, ":"); assert(ntokens == 0); Tokenizer_free(tkz); unitTstResult("Tokenizer", "OK"); fflush(stdout); return 0; }