int dupl(int n) { /* duplicate the subtree whose root is n, return ptr to it */ int i; i = name[n]; if(i < NCH) return(mn0(i)); switch(i){ case RNULLS: return(mn0(i)); case RCCL: case RNCCL: return(mnp(i,ptr[n])); case FINAL: case S1FINAL: case S2FINAL: return(mn1(i,left[n])); case STAR: case QUEST: case PLUS: case CARAT: return(mn1(i,dupl(left[n]))); case RSTR: case RSCON: return(mn2(i,dupl(left[n]),right[n])); case BAR: case RNEWE: case RCAT: case DIV: return(mn2(i,dupl(left[n]),dupl(right[n]))); # ifdef DEBUG default: warning("bad switch dupl %d",n); # endif } return(0); }
optionStruct::optionStruct() { #if defined PROGLEMMATISE defaultbformat = true; defaultBformat = true; defaultCformat = true; dictfile = NULL; v = NULL; x = NULL; XML = false; ancestor = NULL; // if not null, restrict lemmatisation to elements that are offspring of ancestor element = NULL; // if null, analyse all PCDATA that is text wordAttribute = NULL; // if null, word is PCDATA POSAttribute = NULL; // if null, POS is PCDATA lemmaAttribute = NULL; // if null, Lemma is PCDATA lemmaClassAttribute = NULL; // if null, lemma class is PCDATA z = NULL; #endif #if (defined PROGMAKESUFFIXFLEX || defined PROGLEMMATISE) flx = NULL; #endif #if defined PROGLEMMATISE InputHasTags = true; CollapseHomographs = true; keepPunctuation = 1; Sep = dupl(DefaultSep); #endif whattodo = LEMMATISE; argi = NULL; argo = NULL; arge = NULL; cformat = NULL;//dupl(DefaultCFormat); nice = false; #if defined PROGLEMMATISE Wformat = NULL; bformat = NULL;//dupl(Default_b_format); Bformat = NULL;//dupl(Default_B_format); freq = NULL; SortOutput = 0; RulesUnique = true; DictUnique = true; Iformat = NULL; UseLemmaFreqForDisambiguation = 0; baseformsAreLowercase = true; size = ULONG_MAX; treatSlashAsAlternativesSeparator = false; #endif #ifdef COUNTOBJECTS ++COUNT; #endif #if defined PROGMAKESUFFIXFLEX showRefcount = false; CutoffRefcount = 0; #endif }
void optionStruct::setcformat(const char * format) // -c { delete [] cformat; cformat = dupl(format); #if defined PROGLEMMATISE defaultCformat = format == DefaultCFormat || format == DefaultCFormat_NoTags || format == DefaultCFormatXML || format == DefaultCFormatXML_NoDict; #endif }
/* - repeat - generate code for a bounded repetition, recursively if needed */ static void repeat(struct parse *p, sopno start, /* operand from here to end of strip */ int from, /* repeated from this number */ int to) /* to this number of times (maybe INFINITY) */ { sopno finish = HERE(); # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; assert(from <= to); switch (REP(MAP(from), MAP(to))) { case REP(0, 0): /* must be user doing this */ DROP(finish-start); /* drop the operand */ break; case REP(0, 1): /* as x{1,1}? */ case REP(0, N): /* as x{1,n}? */ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ repeat(p, start+1, 1, to); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); AHEAD(THERE()); ASTERN(O_CH, THERETHERE()); break; case REP(1, 1): /* trivial case */ /* done */ break; case REP(1, N): /* as x?x{1,n-1} */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); ASTERN(OOR1, start); AHEAD(start); EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); repeat(p, copy, 1, to-1); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); ASTERN(O_PLUS, start); break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to-1); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ break; } }
/* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition */ static int /* was the simple RE an unbackslashed $? */ p_simp_re(struct parse *p, int starordinary) /* is a leading * an ordinary character? */ { int c; int count; int count2; sopno pos; int i; sopno subno; # define BACKSL (1<<CHAR_BIT) pos = HERE(); /* repetion op, if any, covers from here */ assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); if (c == '\\') { REQUIRE(MORE(), REG_EESCAPE); c = BACKSL | GETNEXT(); } switch (c) { case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case BACKSL|'{': SETERROR(REG_BADRPT); break; case BACKSL|'(': p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) p_bre(p, '\\', ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); REQUIRE(EATTWO('\\', ')'), REG_EPAREN); break; case BACKSL|')': /* should not get here -- must be user */ case BACKSL|'}': SETERROR(REG_EPAREN); break; case BACKSL|'1': case BACKSL|'2': case BACKSL|'3': case BACKSL|'4': case BACKSL|'5': case BACKSL|'6': case BACKSL|'7': case BACKSL|'8': case BACKSL|'9': i = (c&~BACKSL) - '0'; assert(i < NPAREN); if (p->pend[i] != 0) { assert(i <= p->g->nsub); EMIT(OBACK_, i); assert(p->pbegin[i] != 0); assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); assert(OP(p->strip[p->pend[i]]) == ORPAREN); (void) dupl(p, p->pbegin[i]+1, p->pend[i]); EMIT(O_BACK, i); } else SETERROR(REG_ESUBREG); p->g->backrefs = 1; break; case '*': REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, (char)c); break; } if (EAT('*')) { /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { if (MORE() && isdigit((uch)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == '$') /* $ (but not \$) ends it */ return(1); return(0); }
void optionStruct::setSep(const char * format) // -s { delete [] Sep; Sep = dupl(format); }
void optionStruct::setWformat(const char * format) // -W { delete [] Wformat; Wformat = dupl(format); }
void optionStruct::setbformat(const char * format) // -b { delete [] bformat; bformat = dupl(format); defaultbformat = format == Default_b_format; }
void optionStruct::setBformat(const char * format) // -B { delete [] Bformat; Bformat = dupl(format); defaultBformat = format == Default_B_format; }
void optionStruct::setIformat(const char * format) // -I { delete [] Iformat; Iformat = dupl(format); }
OptReturnTp optionStruct::doSwitch(int c,char * locoptarg,char * progname) { switch (c) { case '@': readOptsFromFile(locoptarg,progname); break; #if defined PROGLEMMATISE case 'A': if(locoptarg && *locoptarg == '-') { treatSlashAsAlternativesSeparator = false; } else { treatSlashAsAlternativesSeparator = true; } break; case 'b': setbformat(locoptarg); // bformat = dupl(locoptarg); // defaultbformat = false; break; case 'B': setBformat(locoptarg); // Bformat = dupl(locoptarg); // defaultBformat = false; break; #endif case 'c': cformat = dupl(locoptarg); defaultCformat = false; break; #if defined PROGMAKESUFFIXFLEX case 'C': //CutoffRefcount = locoptarg == NULL || *locoptarg != '-'; if(!locoptarg || *locoptarg == '-') CutoffRefcount = 0; else CutoffRefcount = strtol(locoptarg,NULL,10); break; break; #endif #if defined PROGLEMMATISE case 'd': dictfile = locoptarg; break; #endif case 'D': whattodo = MAKEDICT; break; case 'e': arge = locoptarg; switch(*arge) { case '0': case '1': case '2': case '7': case '9': setEncoding(*arge - '0'); break; case 'u': case 'U': setEncoding(ENUNICODE); break; } break; #if (defined PROGMAKESUFFIXFLEX || defined PROGLEMMATISE) case 'f': flx = locoptarg; break; #endif case 'F': whattodo = MAKEFLEXPATTERNS; break; case 'h': case '?': printf("usage:\n"); printf("============================\n"); #if defined PROGMAKEDICT printf(" Create binary dictionary\n"); printf("%s -D \\\n",progname); printf(" -c<format> [-N<frequency file> -n<format>] [-y[-]] \\\n" " [-i<lemmafile>] [-o<binarydictionary>]\n" " -c column format of dictionary (tab separated), e.g. -cBFT, which means:\n" " 1st column B(ase form), 2nd column F(ull form), 3rd column T(ype)\n" " -n column format of frequency file (tab separated)\n" " Example: -nN?FT, which means:\n" " 1st column N(frequency), 2nd column irrelevant,\n" " 3rd column F(ull form), 4th column T(ype)\n" " -y test output\n -y- release output (default)\n" " -k collapse homographs (remove \",n\" endings)(default)\n" " -k- do not collapse homographs (keep \",n\" endings)\n"); // printf("--More--");getchar(); printf("===============================\n"); #endif #if defined PROGMAKESUFFIXFLEX printf(" Create or add flex patterns\n"); printf("%s -F \\\n",progname); printf(" -c<format> [-y[-]] [-i<lemmafile>] \\\n" " [-f<old flexpatterns>] [-o<new flexpatterns>]\n" " -c column format, e.g. -cBFT, which means:\n" " 1st column B(aseform), 2nd column F(ullform), 3rd column T(ype)\n" " For lemmatising untagged text, suppress lexical type information by\n" " specifying '?' for the column containing the type.\n" " -y test output\n -y- release output (default)\n"); printf(" -R- Do not append refcount to base form (default)\n");// Bart 20050905 printf(" -R Append refcount to base form (format: [<base form>#<refcount>])\n");// Bart 20050905 printf(" -C- Include all rules in output (default)\n");// Bart 20050905 printf(" -C<n> Do not include rules with refcount <= <n>\n");// Bart 20050905 // printf("--More--");getchar(); printf("=============\n"); #endif #if defined PROGLEMMATISE printf(" Lemmatise\n"); // printf("%s [-L] -c<format> -b<format> -B<format> [-s[<sep>]] [-u[-]] -d<binarydictionary> -f<flexpatterns> [-z<type conversion table>] [-i<input text>] [-o<output text>] [-m<conflicts>] [-n<newlemmas>] [-x<Lexical type translation table>]\n",argv[0]); printf("%s [-L] \\\n",progname); printf(" -f<flex patterns> [-d<binary dictionary>] [-u[-]] [-v[-]] \\\n" " [-I<input format>] [-i<input text>] [-o<output text>] \\\n" " [-c<format>] [-b<format>] [-B<format>] [-W<format>] [-s[<sep>]] \\\n" " [-x<Lexical type translation table>] [-v<tag friends file>] \\\n" " [-z<type conversion table>] [-@<option file>]\n"); printf(" -i<input text>\tIf -t- defined: any flat text. Otherwise: words must be\n" " followed by tags, separated by '/'. Default: standard input.\n"); printf(" -I<format>\tInput format (if not word/tag (-t) or word (-t-)).\n" " $w word to be lemmatised\n" " $t tag\n" " $d dummy\n" " \\t tab\n" " \\n new line\n" " \\s white space\n" " \\S all except white space\n"); printf(" -o<output text>\tOutput format dependent on -b, -B, -c and -W arguments.\n" " Default output: standard output\n"); printf(" -d<binarydictionary>\tDictionary as produced with the -D option set.\n" " If no dictionary is specified, only the flex patterns are used.\n" " Without dictionary, wrong tags in the input can not be corrected.\n"); printf(" -f<flexpatterns>\tFile with flex patterns. (see -F). Best results for\n" " untagged input are obtained if the rules are made without lexical type\n" " information. See -c option above.\n"); printf(" -b<format string>\tdefault:" commandlineQuote "%s" commandlineQuote "\n",Default_b_format); printf(" Output format for data pertaining to the base form, according to the\n" " dictionary:\n" " $f sum of frequencies of the words $W having the base form $w\n" " (lemmafrequency).\n"); /* printf(" $f base form type or token frequency.\n"); printf(" (The frequency of the base form type is given if you have\n"); printf(" (a) specified $f in the -c<format> argument, or\n"); printf(" (b) specified a -W<format> argument, or\n"); printf(" (c) specified a -H0 or -H1 argument.\n"); printf(" Otherwise, base form token frequency is given.)\n"); */ #if FREQ24 printf(" $n frequency of the full form $w/$t in \"standard\" corpus.\n"); #endif // printf(" $p probability of this lexical type (%%) = 100x$n/sum($n).\n"); printf(" $t lexical type\n"); printf(" $w base form\n"); printf(" $W full form(s)\n"); printf(" \\$ dollar\n"); printf(" \\[ [\n"); printf(" \\] ]\n"); printf(" Example: -b" commandlineQuote "$f $w/$t" commandlineQuote "\n"); printf(" -B<format string>\tdefault:" commandlineQuote "%s" commandlineQuote "\n",Default_B_format); printf(" Output format for data pertaining to the base form, as predicted by\n"); printf(" flex pattern rules. See -b\n"); // printf("--More--");getchar(); printf(" -W<format string>\tdefault: not present.\n"); printf(" Output format:\n"); printf(" $w full form\n"); printf(" $t lexical type(s) according to dictionary\n"); printf(" $f full form type frequency\n"); printf(" $i info: - full form not in dictionary\n"); printf(" + full form in dictionary, but other type\n"); printf(" (blank) full form in dictionary\n"); printf(" \\t tab\n"); printf(" $X?, [X]? Do not output X. (X can be tested, though).\n"); printf(" [X]+ Output X only if X occurs at least once. (X is an expression\n"); printf(" containing $b or $B)\n"); printf(" [X]>n Output X only if X occurs more than n times.\n"); printf(" [X]n Output X only if X occurs exactly n times.\n"); printf(" [X]<n Output X only if X occurs less than n times.\n"); printf(" [X] Output X if all nested conditions are met, or if X occurs\n"); printf(" at least once. ([X] itself is always met!)\n"); printf(" Example: -b" commandlineQuote "$w ($W)[>1[$W?]>1]" commandlineQuote "\n"); printf(" -W" commandlineQuote "$w\\n" commandlineQuote "\n"); printf(" (Output lemma (full form|full form..)>1\n" " if different words have same base form)\n"); // printf("--More--");getchar(); printf(" -c<format string>\tdefault:\t" commandlineQuote "%s" commandlineQuote "\n",DefaultCFormat);// word/lemma/tag lemma: if dictionary gives 1 solution, take dictionary, otherwise rules printf(" Output format:\n"); printf(" $w full form\n"); printf(" $b base form(s) according to dictionary.\n" " (You also need to specify -b<format>)\n" " (If the full form is found in the dictionary and tag=lexical type,\n" " then only one base form is output.\n" " Otherwise all base forms are output)\n"); printf(" $B base form(s) according to flex pattern rules\n" " (You also need to specify -B<format>)\n" " (only if full form not in dictionary, or in dictionary,\n" " but with other lexical type.)\n"); printf(" $s word separator: new line character when the current word is the last\n" " word before a line break, blank otherwise\n"); printf(" $t lexical type(s) according to dictionary\n"); printf(" $f full form frequency\n"); printf(" $i info: indicates - full form not in dictionary\n"); printf(" + full form in dictionary, but other type\n"); printf(" * full form in dictionary\n"); printf(" \\t tab\n"); printf(" $X?, [X]? Do not output X. (X can be tested, though).\n"); printf(" $b and $B are variables: they can occur any number of times,\n"); printf(" including zero. This number can be tested in conditions:\n"); printf(" $bn Output $b only if $b occurs exactly n-times (n >= 0).\n"); printf(" $Bn Output $B only if $B occurs exactly n-times (n >= 0).\n"); printf(" [X]+ Output X only if X occurs at least once. (X is an expression\n"); printf(" containing $b or $B)\n"); printf(" [X]>n Output X only if X occurs more than n times.\n"); printf(" [X]n Output X only if X occurs exactly n times.\n"); printf(" [X]<n Output X only if X occurs less than n times.\n"); printf(" [X] Output X if all nested conditions are met, or if X occurs\n"); printf(" at least once. ([X] itself is always met!)\n"); printf(" Example: -c" commandlineQuote "[+$b?]>0[-$b0]$w\\n" commandlineQuote "\n"); printf(" -b" commandlineQuote "$w\t/$t" commandlineQuote "\n"); printf(" (Output +lemma if the word is found in the dictionary,\n" " otherwise -lemma)\n"); // printf("--More--");getchar(); printf(" -l force lemma to all-lowercase (default)\n"); printf(" -l- make case of lemma similar to full form's case\n"); printf(" -p keep punctuation (default)\n"); printf(" -p- ignore punctuation (only together with -t- and no -W format)\n"); printf(" -p+ treat punctuation as tokens (only together with -t- and no -W format)\n"); printf(" -q sort output\n"); printf(" -q- do not sort output (default)\n"); printf(" -q# sort output by frequency\n"); printf(" -s<sep> multiple base forms (-b -B) are <sep>-separated. Example: -s" commandlineQuote " | " commandlineQuote "\n"); printf(" -s multiple base forms (-b -B) are " commandlineQuote "%s" commandlineQuote "-separated (default)\n",DefaultSep); printf(" -t input text is tagged (default)\n -t- input text is not tagged\n"); printf(" -U enforce unique flex rules (default)\n"); printf(" -U- allow ambiguous flex rules\n"); printf(" -u enforce unique dictionary look-up (default)\n"); printf(" -u- allow ambiguous dictionary look-up\n"); printf(" -Hn n = 0: use lemma frequencies for disambiguation (default)\n"); printf(" n = 1: use lemma frequencies for disambiguation,\n"); printf(" show candidates for pruning between << and >>\n"); printf(" n = 2: do not use lemma frequencies for disambiguation.\n"); printf(" -v<tag friends file>: Use this to coerce the nearest fit between input\n" " tag and the dictionary's lexical types if the dictionary has more than\n" " one readings of the input word and none of these has a lexical type\n" " that exactly agrees with the input tag. Format:\n" " {<dict type> {<space> <tag>}* <newline>}*\n" " The more to the left the tag is, the better the agreement with the\n" " dictionary'e lexical type\n"); printf(" -x<Lexical type translation table>: Use this to handle tagged texts with\n" " tags that do not occur in the dictionary. Format:\n" " {<dict type> {<space> <tag>}* <newline>}*\n"); printf(" -z<type conversion table>: Use this to change the meaning of $t in -b and\n" " -B formats. Without conversion table, $t is the lexical type of the\n" " full form. With conversion table, $t is the lexical type of the base\n" " form, as defined by the table. Format:\n" " {<full form type> <space> <base form type> <newline>}*\n"); // Bart 20090203: wrongly stated <base form type> <space> <full form type> printf(" -m<size>: Max. number of words in input. Default: 0 (meaning: unlimited)\n"); printf(" -A Treat / as separator between alternative words.\n"); // Bart 20030108 printf(" -A- Do not treat / as separator between alternative words (default)\n");// Bart 20030108 printf(" -e<n> ISO8859 Character encoding. 'n' is one of 1,2,7 and 9 (ISO8859-1,2, etc).\n");// Bart 20080219 printf(" -eU Unicode (UTF8) input.\n");// Bart 20081106 printf(" -e Don't use case conversion.\n");// Bart 20080219 printf(" -X XML input. Leave XML elements unchanged.\n");// Bart 20081219 printf(" The next options do not allow space between option letters and argument!\n");// Bart 20090202 printf(" -Xa<ancestor> Only analyse elements with specified ancestor. e.g -Xabody\n");// Bart 20090202 printf(" -Xe<element> Only analyse specified element. e.g -Xpp\n");// Bart 20090202 printf(" -Xw<word> Words are to be found in attribute. e.g -Xwword\n");// Bart 20090202 printf(" -Xp<pos> Words' POS-tags are to be found in attribute. e.g -Xppos\n");// Bart 20090202 printf(" -Xl<lemma> Destination of lemma is the specified attribute. e.g -Xllemma\n");// Bart 20090202 printf(" -Xc<lemmaclass> Destination of lemma class is the specified attribute. e.g -Xllemmaclass\n");// Bart 20090202 #endif return Leave; #if defined PROGLEMMATISE case 'H': if(locoptarg) { UseLemmaFreqForDisambiguation = *locoptarg - '0'; if(UseLemmaFreqForDisambiguation < 0 || UseLemmaFreqForDisambiguation > 2) { printf("-H option: specify -H0, -H1 or -H2 (found -H%s)\n",locoptarg); return Error; } } else { printf("-H option: specify -H0, -H1 or -H2\n"); return Error; } break; #endif case 'i': argi = locoptarg; break; #if defined PROGLEMMATISE case 'I': Iformat = dupl(locoptarg); break; case 'k': CollapseHomographs = locoptarg == NULL || *locoptarg != '-'; break; case 'l': baseformsAreLowercase = !locoptarg || *locoptarg != '-'; break; #endif case 'L': whattodo = LEMMATISE; // default action break; #if defined PROGLEMMATISE case 'm': if(locoptarg) { size = strtoul(locoptarg,NULL,10); printf("size %lu\n",size); if(size == 0) size = ULONG_MAX; printf("size %lu\n",size); } else size = ULONG_MAX; break; #endif #if defined PROGMAKEDICT case 'n': //Bart 20021223 if(freq) { if(!freq) { freq = new FreqFile(); } (freq)->addFormat(locoptarg); } break; case 'N': //Bart 20021223 if(freq) { if(!freq) { freq = new FreqFile(); } (freq)->addName(locoptarg); } break; #endif case 'o': argo = locoptarg; break; #if defined PROGLEMMATISE case 'p': if(locoptarg) { if(*locoptarg == '-') { keepPunctuation = 0; } else if(*locoptarg == '+') { keepPunctuation = 2; } else if(*locoptarg == '\0') { keepPunctuation = 1; } else { printf("Invalid argument %s for -p option.\n",locoptarg); return Error; } } else { keepPunctuation = 1; } break; case 'q': if(!locoptarg) locoptarg = "w#"; else if(*locoptarg == '-') { SortOutput = 0; break; } SortOutput = 0; while(*locoptarg) { SortOutput <<= 2; switch(*locoptarg) { case '#': case 'f': case 'F': case 'n': case 'N': SortOutput += SORTFREQ; break; case 'l': case 'L': case 'w': case 'W': SortOutput += SORTWORD; break; case 'p': case 'P': case 't': case 'T': SortOutput += SORTPOS; break; default: SortOutput = SORTWORD; break; } ++locoptarg; } break; #endif // GNU >> case 'r': printf("12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n"); printf("WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n"); printf("REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n"); printf("INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n"); printf("OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n"); printf("TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n"); printf("YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n"); printf("PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n"); printf("POSSIBILITY OF SUCH DAMAGES.\n"); return Leave; // << GNU #if defined PROGMAKESUFFIXFLEX case 'R': showRefcount = locoptarg == NULL || *locoptarg != '-'; break; #endif #if defined PROGLEMMATISE case 's': if(locoptarg && *locoptarg) { for(char * p = locoptarg;*p;) { if(*p == '\\') { switch(*(p + 1)) { case 't': *p++ = '\t'; memmove(p,p+1,strlen(p)); break; case 'n': *p++ = '\n'; memmove(p,p+1,strlen(p)); break; default: *p = *(p+1); ++p; memmove(p,p+1,strlen(p)); break; } } else ++p; } Sep = dupl(locoptarg); } else Sep = dupl(DefaultSep); break; case 't': InputHasTags = locoptarg == NULL || *locoptarg != '-'; break; case 'u': DictUnique = locoptarg == NULL || *locoptarg != '-'; break; case 'U': RulesUnique = locoptarg == NULL || *locoptarg != '-'; break; case 'v': v = locoptarg; break; #endif // GNU >> case 'w': printf("11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n"); printf("FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN\n"); printf("OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\n"); printf("PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n"); printf("OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n"); printf("MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS\n"); printf("TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE\n"); printf("PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n"); printf("REPAIR OR CORRECTION.\n"); return Leave; // << GNU #if defined PROGLEMMATISE case 'W': Wformat = dupl(locoptarg); break; case 'x': x = locoptarg; break; case 'X': if(locoptarg) { if(*locoptarg == '-') { XML = false; } else { XML = true; switch(*locoptarg) { case 'a': ancestor = dupl(locoptarg+1); break; case 'e': element = dupl(locoptarg+1); break; case 'w': wordAttribute = dupl(locoptarg+1); break; case 'p': POSAttribute = dupl(locoptarg+1); break; case 'l': lemmaAttribute = dupl(locoptarg+1); if(defaultCformat) { if(Bformat) setcformat(DefaultCFormatXML); } break; case 'c': lemmaClassAttribute = dupl(locoptarg+1); break; } } } else XML = true; break; case 'z': z = locoptarg; break; #endif case 'y': nice = locoptarg == NULL || *locoptarg != '-'; break; } return GoOn; }
OptReturnTp optionStruct::doSwitch(int c,char * locoptarg,char * progname) { switch (c) { case '@': readOptsFromFile(locoptarg,progname); break; case 'h': case '?': printf("usage:\n"); printf("%s [options] [LEXICON] [CORPUS-TO-TAG] [BIGRAMS] [LEXICALRULEFILE] [CONTEXTUALRULEFILE]\n",progname); printf("options:\n"); printf(" -@<optionsfile>\n"); printf(" -h help\n"); printf(" -? help\n"); printf(" -D<LEXICON>\n"); printf(" -i<CORPUS-TO-TAG>\n"); printf(" -B<BIGRAMS>\n"); printf(" -L<LEXICALRULEFILE>\n"); printf(" -C<CONTEXTUALRULEFILE>\n"); printf(" -w<WORDLIST>\n"); printf(" -m<INTERMEDFILE>\n"); printf(" -S start state tagger only\n"); printf(" -F final state tagger only\n"); printf(" -o<out> output (optional, otherwise stdout)\n"); printf(" -r About redistribution (GNU)\n"); printf(" -W About warranty (GNU)\n"); printf(" -x<path> path to file with extra options (deprecated)\n"); printf(" -f ConvertToLowerCaseIfFirstWord (default off)\n"); printf(" -a ConvertToLowerCaseIfMostWordsAreCapitalized (default off)\n"); printf(" -s ShowIfLowercaseConversionHelped (default off)\n"); //printf(" -l Language (default danish) (One of the LC_CTYPES accepted by setlocale(), e.g. \"danish\", \"dutch\", \"english\", \"french\", \"german\", \"italian\", \"spanish\")\n"); printf(" -n<class> Noun (default NN)\n"); printf(" -p<class> Proper (default NNP)\n"); printf(" -v Verbose (default off)\n"); printf("============================\n"); printf(" -X- Not XML input. XML tags will be treated as text and POS-tagged. (default)\n"); printf(" -X+ XML input. Leave XML elements unchanged. POS as suffix behind word, separated by slash.\n"); printf(" The next options do not allow space between option letters and argument!\n"); printf(" -Xa<ancestor> Only analyse elements with specified ancestor. e.g -Xap\n"); printf(" -Xs<delimiter> Segment (sentence) delimiter. Can be empty tag. e.g -Xsbr or -Xss\n"); printf(" -Xe<element> Only analyse specified element. e.g -Xew\n"); printf(" -Xw<word> Words are to be found in attribute. e.g -Xwword\n"); printf(" -Xt<pretag> Words' pre-tagging to be found in attribute. e.g -Xtprepos\n"); printf(" -Xp<POS> Destination of POS is the specified attribute. e.g -Xppos\n"); return Leave; case 'f': ConvertToLowerCaseIfFirstWord = true;//boolean(locoptarg); break; case 'a': ConvertToLowerCaseIfMostWordsAreCapitalized = true;//boolean(locoptarg); break; case 's': ShowIfLowercaseConversionHelped = true;//boolean(locoptarg); break; case 'n': Noun = dupl(locoptarg);//(default NN)\n"); break; case 'p': Proper = dupl(locoptarg);// (default NNP)\n"); break; case 'v': Verbose = true;//boolean(locoptarg); break; case 'D': //LEXICON Lexicon = dupl(locoptarg); break; case 'i': //CORPUS-TO-TAG Corpus = dupl(locoptarg); break; case 'o': Output = dupl(locoptarg); break; case 'B': //BIGRAMS Bigrams = dupl(locoptarg); break; case 'L': //LEXICALRULEFILE Lexicalrulefile = dupl(locoptarg); break; case 'C': //CONTEXTUALRULEFILE Contextualrulefile = dupl(locoptarg); break; case 'd': case 'w': //WORDLIST wdlistname = dupl(locoptarg); break; /* case 'm': //INTERMEDFILE intermed = dupl(locoptarg); break; */ case 'S': //start state tagger only START_ONLY_FLAG = true; break; case 'F': //final state tagger only FINAL_ONLY_FLAG = true; break; case 'x': //path to file with extra options xoptions = dupl(locoptarg); break; case 'X': if(locoptarg) { if(*locoptarg == '-') { XML = false; } else { XML = true; switch(*locoptarg) { case 'a': ancestor = dupl(locoptarg+1); break; case 's': segment = dupl(locoptarg+1); break; case 'e': element = dupl(locoptarg+1); break; case 'w': wordAttribute = dupl(locoptarg+1); break; case 't': PreTagAttribute = dupl(locoptarg+1); break; case 'p': POSAttribute = dupl(locoptarg+1); break; } } } else XML = true; break; case 'r': printf("12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n"); printf("WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n"); printf("REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n"); printf("INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n"); printf("OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n"); printf("TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n"); printf("YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n"); printf("PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n"); printf("POSSIBILITY OF SUCH DAMAGES.\n"); return Leave; case 'W': printf("11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n"); printf("FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN\n"); printf("OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\n"); printf("PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n"); printf("OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n"); printf("MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS\n"); printf("TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE\n"); printf("PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n"); printf("REPAIR OR CORRECTION.\n"); return Leave; } return GoOn; }