/** * Create a short form of flags summary for displaying in a word node. */ const char *gword_status(Sentence sent, const Gword *w) { dyn_str *s = dyn_str_new(); const char *r; size_t len; if (w->status & WS_UNKNOWN) dyn_strcat(s, "UNK|"); if (w->status & WS_INDICT) dyn_strcat(s, "IN|"); if (w->status & WS_REGEX) dyn_strcat(s, "RE|"); if (w->status & WS_SPELL) dyn_strcat(s, "SP|"); if (w->status & WS_RUNON) dyn_strcat(s, "RU|"); if (w->status & WS_HASALT) dyn_strcat(s, "HA|"); if (w->status & WS_UNSPLIT) dyn_strcat(s, "UNS|"); if (w->status & WS_PL) dyn_strcat(s, "PL|"); len = strlen(s->str); if (len > 0) s->str[len-1] = '\0'; r = string_set_add(s->str, sent->string_set); dyn_str_delete(s); return r; }
/** * Concatenate the definitions for the given affix class. * This allows specifying the characters in different definitions * instead in a one long string, e.g. instead of: * ""«»《》【】『』`„": QUOTES+; * One can specify (note the added spaces): * """ «» 《》 【】 『』 ` „: QUOTES+; * Or even: * """: QUOTES+; * «» : QUOTES+; * etc. * Note that if there are no definitions or only one definition, there is * nothing to do. * The result is written to the first entry. * @param classno The given affix class. */ static void concat_class(Dictionary afdict, int classno) { Afdict_class * ac; size_t i; dyn_str * qs; ac = AFCLASS(afdict, classno); if (1 >= ac->length) return; qs = dyn_str_new(); for (i = 0; i < ac->length; i++) dyn_strcat(qs, ac->string[i]); ac->string[0] = string_set_add(qs->str, afdict->string_set); dyn_str_delete(qs); }
static void db_lookup_common(Dictionary dict, const char *s, int (*cb)(void *, int, char **, char **), cbdata* bs) { sqlite3 *db = dict->db_handle; dyn_str *qry; /* The token to look up is called the 'morpheme'. */ qry = dyn_str_new(); dyn_strcat(qry, "SELECT subscript, classname FROM Morphemes WHERE morpheme = \'"); dyn_strcat(qry, s); dyn_strcat(qry, "\';"); sqlite3_exec(db, qry->str, cb, bs, NULL); dyn_str_delete(qry); }
/** * Convert a list of utf8 chars to wide-chars. The reason for doing * this is kind-of dorky: its so that we can easily find, * character-by-character, if a given character is a quotation mark * or a bullet. This works only because the quotation marks and * bullets are exactly one (wide) character in length. I would like * it better if we didn't do this wide-char conversion, since wide-chars * are badly-behaved in crazy locales, and on MS Windows. */ static bool afdict_to_wide(Dictionary afdict, int classno) { Afdict_class * ac; wchar_t * wqs; mbstate_t mbs; size_t i; int w; dyn_str * qs; const char *pqs; ac = AFCLASS(afdict, classno); if (0 == ac->length) return true; qs = dyn_str_new(); for (i = 0; i < ac->length; i++) dyn_strcat(qs, ac->string[i]); /* * Convert utf8 to wide chars before use. * In case of error the result is undefined. */ pqs = qs->str; memset(&mbs, 0, sizeof(mbs)); w = mbsrtowcs(NULL, &pqs, 0, &mbs); if (0 > w) { prt_error("Error: Affix dictionary: %s: " "Invalid utf8 character\n", afdict_classname[classno]); return false; } /* Store the wide char version at the AFCLASS entry. */ ac->mem_elems = sizeof(*wqs) * (w+1); /* bytes here, but we don't care */ ac->string = malloc(ac->mem_elems); wqs = (wchar_t *)ac->string; pqs = qs->str; (void)mbsrtowcs(wqs, &pqs, w, &mbs); wqs[w] = L'\0'; dyn_str_delete(qs); return true; }
static void db_lookup_exp(Dictionary dict, const char *s, cbdata* bs) { sqlite3 *db = dict->db_handle; dyn_str *qry; /* The token to look up is called the 'morpheme'. */ qry = dyn_str_new(); dyn_strcat(qry, "SELECT disjunct, cost FROM Disjuncts WHERE classname = \'"); dyn_strcat(qry, s); dyn_strcat(qry, "\';"); sqlite3_exec(db, qry->str, exp_cb, bs, NULL); dyn_str_delete(qry); if (4 < verbosity) { printf("Found expression for class %s: ", s); print_expression(bs->exp); } }
/** * Graph node name: Add "Sentence:" for the main node; Convert SUBSCRIPT_MARK. * Also escape " and \ with a \. */ static const char *wlabel(Sentence sent, const Gword *w) { const char *s; const char sentence_label[] = "Sentence:\\n"; dyn_str *l = dyn_str_new(); char c0[] = "\0\0"; assert((NULL != w) && (NULL != w->subword), "Word must exist"); if ('\0' == *w->subword) return string_set_add("(nothing)", sent->string_set); if (w == sent->wordgraph) dyn_strcat(l, sentence_label); for (s = w->subword; *s; s++) { switch (*s) { case SUBSCRIPT_MARK: dyn_strcat(l, "."); break; case '\"': dyn_strcat(l, "\\\""); break; case '\\': dyn_strcat(l, "\\"); break; default: *c0 = *s; dyn_strcat(l, c0); } } s = string_set_add(l->str, sent->string_set); dyn_str_delete(l); return s; }
/** * Print the chosen_disjuncts words. * This is used for debug, e.g. for tracking them in the Wordgraph display. */ static void print_chosen_disjuncts_words(const Linkage lkg, bool prt_optword) { size_t i; dyn_str *djwbuf = dyn_str_new(); err_msg(lg_Debug, "Linkage %p (%zu words): ", lkg, lkg->num_words); for (i = 0; i < lkg->num_words; i++) { Disjunct *cdj = lkg->chosen_disjuncts[i]; const char *djw; /* disjunct word - the chosen word */ if (NULL == cdj) djw = (prt_optword && lkg->sent->word[i].optional) ? "{}" : "[]"; else if ('\0' == cdj->word_string[0]) djw = "\\0"; /* null string - something is wrong */ else djw = cdj->word_string; dyn_strcat(djwbuf, djw); dyn_strcat(djwbuf, " "); } err_msg(lg_Debug, "%s\n", djwbuf->str); dyn_str_delete(djwbuf); }
static bool afdict_init(Dictionary dict) { Afdict_class * ac; Dictionary afdict = dict->affix_table; /* FIXME: read_entry() builds word lists in reverse order (can we * just create the list top-down without breaking anything?). Unless * it is fixed to preserve the order, reverse here the word list for * each affix class. */ for (ac = afdict->afdict_class; ac < &afdict->afdict_class[ARRAY_SIZE(afdict_classname)]; ac++) { int i; int l = ac->length - 1; const char * t; for (i = 0; i < l; i++, l--) { t = ac->string[i]; ac->string[i] = ac->string[l]; ac->string[l] = t; } } /* Create the affix lists */ ac = AFCLASS(afdict, AFDICT_INFIXMARK); if ((1 < ac->length) || ((1 == ac->length) && (1 != strlen(ac->string[0])))) { prt_error("Error: afdict_init: Invalid value for class %s in file %s" " (should have been one ASCII punctuation - ignored)\n", afdict_classname[AFDICT_INFIXMARK], afdict->name); free((void *)ac->string); ac->length = 0; ac->mem_elems = 0; ac->string = NULL; } /* XXX For now there is a possibility to use predefined SUF and PRE lists. * So if SUF or PRE are defined, don't extract any of them from the dict. */ if (1 == ac->length) { if ((0 == AFCLASS(afdict, AFDICT_PRE)->length) && (0 == AFCLASS(afdict, AFDICT_SUF)->length)) { char last_entry[MAX_WORD+1] = ""; get_dict_affixes(dict, dict->root, ac->string[0][0], last_entry); } } else { /* No INFIX_MARK - create a dummy one that always mismatches */ affix_list_add(afdict, &afdict->afdict_class[AFDICT_INFIXMARK], ""); } if (debug_level(+D_AI)) { size_t l; for (ac = afdict->afdict_class; ac < &afdict->afdict_class[ARRAY_SIZE(afdict_classname)]; ac++) { if (0 == ac->length) continue; lgdebug(+0, "Class %s, %zd items:", afdict_classname[ac-afdict->afdict_class], ac->length); for (l = 0; l < ac->length; l++) lgdebug(0, " '%s'", ac->string[l]); lgdebug(0, "\n"); } } #undef D_AI /* Store the SANEMORPHISM regex in the unused (up to now) * regex_root element of the affix dictionary, and precompile it */ assert(NULL == afdict->regex_root, "SM regex is already assigned"); ac = AFCLASS(afdict, AFDICT_SANEMORPHISM); if (0 != ac->length) { int rc; Regex_node *sm_re = malloc(sizeof(*sm_re)); dyn_str *rebuf = dyn_str_new(); /* The regex used to be converted to: ^((original-regex)b)+$ * In the initial wordgraph version word boundaries are not supported, * so instead it is converted to: ^(original-regex)+$ */ #ifdef WORD_BOUNDARIES dyn_strcat(rebuf, "^(("); #else dyn_strcat(rebuf, "^("); #endif dyn_strcat(rebuf, ac->string[0]); #ifdef WORD_BOUNDARIES dyn_strcat(rebuf, ")b)+$"); #else dyn_strcat(rebuf, ")+$"); #endif sm_re->pattern = strdup(rebuf->str); dyn_str_delete(rebuf); afdict->regex_root = sm_re; sm_re->name = strdup(afdict_classname[AFDICT_SANEMORPHISM]); sm_re->re = NULL; sm_re->next = NULL; sm_re->neg = false; rc = compile_regexs(afdict->regex_root, afdict); if (rc) { prt_error("Error: afdict_init: Failed to compile " "regex '%s' in file %s, return code %d\n", afdict_classname[AFDICT_SANEMORPHISM], afdict->name, rc); return false; } lgdebug(+5, "%s regex %s\n", afdict_classname[AFDICT_SANEMORPHISM], sm_re->pattern); } /* sort the UNITS list */ /* Longer unit names must get split off before shorter ones. * This prevents single-letter splits from screwing things * up. e.g. split 7gram before 7am before 7m */ ac = AFCLASS(afdict, AFDICT_UNITS); if (0 < ac->length) { qsort(ac->string, ac->length, sizeof(char *), cmplen); } #ifdef AFDICT_ORDER_NOT_PRESERVED /* pre-sort the MPRE list */ ac = AFCLASS(afdict, AFDICT_MPRE); if (0 < ac->length) { /* Longer subwords have priority over shorter ones, * reverse-sort by length. * XXX mprefix_split() for Hebrew depends on that. */ qsort(ac->string, ac->length, sizeof(char *), revcmplen); } #endif /* AFDICT_ORDER_NOT_PRESERVED */ concat_class(afdict, AFDICT_QUOTES); concat_class(afdict, AFDICT_BULLETS); if (! anysplit_init(afdict)) return false; return true; }
/* Was main() of the test program... */ static int regex_split(const char *inpat, int flags, const char *str, Dictionary dict) { const char *p; dyn_str *pat; int plevel; /* paren level */ int cglevel; /* capture group level */ int nplevel; /* paren level within named capture group */ int icgnum; /* capture group number*/ int options; const char *errptr; int erroffset; pcre *pcre; const char * const prog = "regex_tokenizer_test"; int rc; pcre_extra *extra = NULL; #define OVCNT 15 int ovector[OVCNT]; callout_data_t callout_data; #if 0 const char **wordlist; #endif bool word_compare_flag = true; #ifdef notdef dyn_str *wordalts; #endif const char *group_name = NULL; char *word_classname; char c0[2] = "\0\0"; /* FIXME: validate we use PCRE version 2 at least. */ /* Find the number of capturing groups in the input pattern. */ icgnum = 0; for (p = inpat; '\0' != *p; p++) { /* Count as capture groups only (string) or (?<name>). Especially, avoid * counting (?<=...) (positive look behind) and (?(condition)...) (the * (condition) part). * FIXME: support () inside []. * FIXME: support \. */ if ((*p == '(') && (*p != '*') && ((p[1] != '?') || ((p[2] == '<') && (p[3] != '='))) && ((p-inpat < 2) || (p[-2] != '(') || (p[-1] != '?'))) { icgnum++; } } if (0 == icgnum) { printf("%s: pattern must include at least one () group (was: %s)\n", prog, inpat); return 9; } #if 0 if (p[-1] != '$') { /* FIXME: add $ if needed */ printf("%s: pattern must end with $ (was: %s)\n", prog, inpat); return 9; } #endif /* Regex syntax check of the pattern. * FIXME: Add support for "(?J)" */ options = PCRE_UTF8; pcre = pcre_compile(inpat, options, &errptr, &erroffset, NULL); if (NULL == pcre) { printf("%s: pcre_compile: Error in pattern '%s' at offset %d: %s\n", prog, inpat, erroffset, errptr); return 2; } callout_data.wordlist = NULL; callout_data.cgnum = NULL; if (word_compare_flag) { int i; #if 0 callout_data.wordlist = malloc(sizeof(*callout_data.wordlist)*icgnum); #endif callout_data.cgnum = malloc(sizeof(*callout_data.cgnum)*icgnum); //printf("ALLOCATED callout_data.cgnum %ld for %d groups\n", //sizeof(*callout_data.wordlist)*cgnum, icgnum); for (i = 0; i < icgnum; i++) { #if 0 callout_data.wordlist[i] = NULL; #endif callout_data.cgnum[i] = NULL; } } /* Build the pattern that finds all possible matches. */ pat = dyn_str_new(); plevel = 0; cglevel = 0; icgnum = -1; /* First capture group (plevel==1) is icgnum==0. */ /* Convert the input regex to the tokenizer regex. * cglevel counts named capture groups * plevel counts all groups * * FIXME: Add support for: * (?x) - comment mode. * (?i) - ignore case. * \ - backslash for ()<>?* . * [] - () inside it * FIXME: Add "(?: ... )" over the result pattern. */ //dyn_strcat(pat, "(?J)"); for (p = inpat; '\0' != *p; p++) { char *re = NULL; /* a regex from the 4.0.regex file */ switch (*p) { const char *c; case '(': if (cglevel > 0) { printf("Error at position %ld: Tokenizer capture groups cannot have nested groups\n", p-inpat); } plevel++; if ((p[1] == '*') || ((p[1] == '?') && ((p[2] != '<') || (p[3] == '='))) || ((p-inpat > 1) && (p[-2] == '(') && (p[-1] == '?'))) { break; } cglevel++; if (cglevel > 1) { printf("Error at position %ld: Tokenizer aregex cannot have capture group level > 1\n", p-inpat); free(callout_data.cgnum); return 199; } icgnum++; dyn_strcat(pat, "(?:"); group_name = NULL; break; case ')': plevel--; if (cglevel > 0) { cglevel--; /* Add the dict lookup and capturing callback. */ dyn_strcat(pat, ")(?C)"); } group_name = NULL; break; case '<': /* Remember it as a potential start of a named group. */ if ((p-2 >= inpat) && (p[-2] == '(') && (p[-1] == '?') && (p[1] != '=')) { group_name = p + 1; } else group_name = NULL; break; case '>': if (NULL != group_name) { /* Check if this is actually a group name */ for (c = group_name; c < p; c++) { /* FIXME: 'a' and 'p' are part of a hack for lookup_mark. * FIXME: 'r' is part of a hack for regex names that match affix * class names. The fix is not to use matching names. */ if ((*c > 'Z' || *c < 'A') && *c != 'a' && *c != 'p' && *c != 'r') break; } if (c == p) { word_classname = malloc(p-group_name+1); strncpy(word_classname, group_name, p-group_name); word_classname[p-group_name] = '\0'; } else { printf("%s: Invalid class name in group name found at '%s'\n", prog, group_name-4); word_classname = NULL; } } else { word_classname = NULL; } if (!word_classname) { group_name = NULL; break; } dyn_strcat(pat, ">"); lgdebug(6, "Found word-class %s\n", word_classname); #if 0 wordlist = readwords(word_classname); if (NULL == wordlist) { printf("i%s: Invalid class name %s in group name\n", prog, word_classname); return 100; } if (!word_compare_flag) { printf("Invocation without -w is not supported\n"); return 103; } #endif if (word_compare_flag) { char *t; const char *lookup_mark = NULL; #if 0 callout_data.wordlist[icgnum] = wordlist; printf("WORDLIST %p at cgnum %d\n", wordlist, icgnum); #endif /* Allocate per group info */ callout_data.cgnum[icgnum] = malloc(sizeof(*(callout_data.cgnum)[0])); callout_data.cgnum[icgnum]->name = NULL; //printf("ALLOCATED cgnum[%d]=%p\n", icgnum, //callout_data.cgnum[icgnum]); /* A hack for testing: Handle WORDpX or WORDaX. * The above a/p marks mean append/prepend X to word before making * the lookup. * FIXME: Find another way to specify that, maybe in the affix file * or in a tokenizer definition file. */ t = strpbrk(word_classname, "pa"); if (NULL != t) { Afdict_class *ac; callout_data.cgnum[icgnum]->lookup_mark_pos = *t; *t = '\0'; ac = afdict_find(dict->affix_table, t+1, /*notify_err*/false); if (NULL == ac) { printf("%s: Unknown afclass '%s'\n", prog, t+1); return 253; } /* Check if the requested affix class is defined and is not an * empty string (like the default INFIXMARK). */ if (0 == ac->length || '\0' == ac->string[0][0]) { printf("%s: No value for afclass '%s'\n", prog, t+1); return 252; } lookup_mark = ac->string[0]; /* FIXME: support more than one value. */ } callout_data.cgnum[icgnum]->lookup_mark = lookup_mark; callout_data.cgnum[icgnum]->name = word_classname; if (0 == strcmp(word_classname, "DICTWORD")) { /* Assign data for looking up a word in the main dict. */ callout_data.cgnum[icgnum]->dict = dict; callout_data.cgnum[icgnum]->afclass = NULL; } else if (afdict_find(dict->affix_table, word_classname, /*notify_err*/false)) { callout_data.cgnum[icgnum]->dict = dict->affix_table; callout_data.cgnum[icgnum]->afclass = word_classname; } else { if ('r' == word_classname[0]) word_classname++; re = get_regex_by_name(dict, word_classname); if (re) { lgdebug(6, "Regex %s with modified groups: '%s'\n", word_classname, re); callout_data.cgnum[icgnum]->dict = NULL; /* FIXME: No need to allocate callout_data.cgnum[icgnum] in this * case. */ } else { printf("%s: Unknown word classname '%s'\n", prog, word_classname); return 254; } } /* TODO: Assign flags, e.g. for emitting the words with stem/infix marks. */ } else { #if 0 wordalts = make_wordalts(wordlist); dyn_strcat(pat, wordalts->str); dyn_str_delete(wordalts); free(wordlist); #else printf("%s: Invocation without -w is not supported\n", prog); return 103; #endif } /* Default match for dictionary lookup is ".*". * Allow replacing it by something else. * E.g: .{2,}|a */ if (')' == p[1]) { if (NULL == re) { dyn_strcat(pat, ".*"); } else { dyn_strcat(pat, re); free(re); re = NULL; } } else { nplevel = 1; /* FIXME: Add support for: * (?x) - comment mode. * \ - backslash for ()<>?* . * [] - () inside it */ for (; p[1] != '\0' && nplevel > 0; p++) { switch (p[1]) { case '(': if (('?' != p[2]) && ('*' != p[2]) && ((p[-1] != '(') || (p[0] != '?'))) { printf("%s: Capture_group %d: Nested capture group is not supported\n", prog, icgnum+1); return 250; } nplevel++; break; case ')': nplevel--; if (0 == nplevel) continue; /* we are done */ break; } c0[0] = p[1]; dyn_strcat(pat, c0); } p--; } word_classname = NULL; group_name = NULL; continue; } c0[0] = *p; dyn_strcat(pat, c0); } /* Add '$' at the end if needed. */ if ('$' != pat->str[pat->end-1]) dyn_strcat(pat, "$"); /* Add the backtracking callback. */ dyn_strcat(pat, "(?C1)"); printf("Modified pattern: %s", pat->str); lgdebug(2, " (len %zu/%zu)", pat->end, pat->len); printf("\n"); pcre_callout = callout; callout_data.function = 1; callout_data.subp_i = 0; callout_data.subp[0].s = 0; callout_data.subp[0].e = SUBP0END_DEBUG_SIGNATURE; callout_data.subp_ovfl = false; callout_data.capture_last = 0; callout_data.pattern = pat->str; callout_data.alt_counter = 0; options = PCRE_UTF8; pcre = pcre_compile(pat->str, options, &errptr, &erroffset, NULL); if (NULL == pcre) { printf("%s: Internal error: pcre_compile: Error in pattern '%s' at offset %d: %s\n", prog, pat->str, erroffset, errptr); return 99; } /* TODO: Check if using JIT may optimize out some needed callouts. */ options = 0; //PCRE_STUDY_JIT_COMPILE; extra = pcre_study(pcre, options, &errptr); if (NULL == extra) { if (NULL != errptr) { printf("%s: pcre_study: Error for pattern '%s': %s\n", prog, pat->str, errptr); return 3; } extra = malloc(sizeof(*extra)); memset(extra, 0, sizeof(*extra)); } else { /* For some reason JIT is sometimes done even though it was not requested. * But the callouts are still invoked as expected in such cases. */ lgdebug(6, "%s: pcre_study: JIT %ld\n", prog, extra->flags & PCRE_STUDY_JIT_COMPILE); } #if 0 extra->match_limit = 10000; extra->match_limit_recursion = 10000; extra->flags |= PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION; #endif extra->callout_data = (void *)&callout_data; extra->flags |= PCRE_EXTRA_CALLOUT_DATA; #if 0 printf("CGNUM %d\n", icgnum); if (NULL != callout_data.cgnum) { int i; for (i = 0; i <= icgnum; i++) { printf("callout_data.cgnum[%d] %p\n", i, callout_data.cgnum[i]); } } else printf("CGNUM %p\n", callout_data.cgnum); #endif options = PCRE_ANCHORED; /* XXX Maybe PCRE_NO_START_OPTIMIZE is needed too */ rc = pcre_exec(pcre, extra, str, strlen(str), 0, options, ovector, OVCNT); if (rc < 0) { if (PCRE_ERROR_NOMATCH == rc) { lgdebug(2, "No match (must always happen)\n"); } else { printf("%s: pcre_exec: Error %d\n", prog, rc); } } else { printf("Internal error: Unexpected match, rc=%d\n", rc); } if (0 == rc) { rc = OVCNT/3; printf("ovector only has room for %d captured substrings\n", rc - 1); } printov(str, (ov_t *)ovector, rc, NULL, /*is_pcreov*/true); if (verbosity > 6) { if (0 != callout_data.subp_i) { printf("Callout stack:\n"); printov(str, callout_data.subp, callout_data.subp_i, &callout_data, /*is_pcreov*/false); } } /* Free everything. */ dyn_str_delete(pat); /* note - callback_data uses parts of pat */ pcre_free_study(extra); /* safe even if malloc'ed */ free(pcre); if (NULL != callout_data.cgnum) { int i; for (i = 0; i <= icgnum; i++) { if (callout_data.cgnum[i]) { /* FIXME: Free also word_classname. */ free(callout_data.cgnum[i]); } } free(callout_data.cgnum); } #if 0 if (NULL != callout_data.wordlist) { int i; for (i = 0; i < icgnum; i++) { free(callout_data.wordlist[i]); } free(callout_data.wordlist); } #endif return 0; }
/** * Get a regex (of 4.0.regex) by name. * Replace all capturing groups by non-capturing ones, since the invoking * function cannot currently handle them. Hence back references are not * supported. This can be fixed if needed. * * If a regex name appears multiple times, concatenate them using an alternation * bar. Remove anchors ^ and $ if exist (suppose they can only appear at the * start and end of the regex, as currently in 4.0.regex). */ static char *get_regex_by_name(Dictionary const dict, const char * const name) { dyn_str * const pat = dyn_str_new(); char *result = NULL; Regex_node *re = dict->regex_root; const char *p; while (NULL != re) { if (0 == strcmp(re->name, name)) { /* re analyze state */ bool insqb = false; /* in square brackets */ bool qn = false; /* quote next character */ p = re->pattern; if ('\0' != pat->str[0]) dyn_strcat(pat, "|"); if ('^' == *p) p++; /* Change groups in POSIX regex to PCRE non-capturing groups. * FIXME: Add support for PCRE syntax, * especially, skip (?...) and (*...). * The following code supports backslash and square brackets. * It supposes the regex is valid. */ for (; '\0' != *p; p++) { char c0[2] = "\0\0"; if (qn) { qn = false; } else { switch (*p) { case '\\': qn = true; break; case '[': insqb = true; break; case ']': if (p > re->pattern && '[' == p[-1]) break; insqb = false; break; case '(': if (insqb) break; dyn_strcat(pat, "(?:"); continue; } } if ('$' != *p || '\0' != p[1]) { c0[0] = *p; dyn_strcat(pat, c0); } } } re = re->next; } if ('\0' != pat->str[0]) result = strdup(pat->str); dyn_str_delete(pat); return result; }