Exemplo n.º 1
0
int
dupl(int n)
{
	/* duplicate the subtree whose root is n, return ptr to it */
	int i;

	i = name[n];
	if(i < NCH) return(mn0(i));
	switch(i){
	case RNULLS:
		return(mn0(i));
	case RCCL: case RNCCL:
		return(mnp(i,ptr[n]));
	case FINAL: case S1FINAL: case S2FINAL:
		return(mn1(i,left[n]));
	case STAR: case QUEST: case PLUS: case CARAT:
		return(mn1(i,dupl(left[n])));
	case RSTR: case RSCON:
		return(mn2(i,dupl(left[n]),right[n]));
	case BAR: case RNEWE: case RCAT: case DIV:
		return(mn2(i,dupl(left[n]),dupl(right[n])));
# ifdef DEBUG
	default:
		warning("bad switch dupl %d",n);
# endif
	}
	return(0);
}
Exemplo n.º 2
0
optionStruct::optionStruct()
    {
#if defined PROGLEMMATISE
    defaultbformat = true;
    defaultBformat = true;
    defaultCformat = true;
    dictfile = NULL;
    v = NULL;
    x = NULL;
    XML = false;
    ancestor = NULL; // if not null, restrict lemmatisation to elements that are offspring of ancestor
    element = NULL; // if null, analyse all PCDATA that is text
    wordAttribute = NULL; // if null, word is PCDATA
    POSAttribute = NULL; // if null, POS is PCDATA
    lemmaAttribute = NULL; // if null, Lemma is PCDATA
    lemmaClassAttribute = NULL; // if null, lemma class is PCDATA
    z = NULL;
#endif
#if (defined PROGMAKESUFFIXFLEX || defined PROGLEMMATISE)
    flx = NULL;
#endif
#if defined PROGLEMMATISE
    InputHasTags = true;
    CollapseHomographs = true;
    keepPunctuation = 1;
    Sep = dupl(DefaultSep);
#endif
    whattodo = LEMMATISE;
    argi = NULL;
    argo = NULL;
    arge = NULL;
    cformat = NULL;//dupl(DefaultCFormat);
    nice = false;
#if defined PROGLEMMATISE
    Wformat = NULL;
    bformat = NULL;//dupl(Default_b_format);
    Bformat = NULL;//dupl(Default_B_format);
    freq = NULL;
    SortOutput = 0;
    RulesUnique = true;
    DictUnique = true;
    Iformat = NULL;
    UseLemmaFreqForDisambiguation = 0;
    baseformsAreLowercase = true;
    size = ULONG_MAX;
    treatSlashAsAlternativesSeparator = false;
#endif
#ifdef COUNTOBJECTS
    ++COUNT;
#endif
#if defined PROGMAKESUFFIXFLEX
    showRefcount = false;
    CutoffRefcount = 0;
#endif
    }
Exemplo n.º 3
0
void optionStruct::setcformat(const char * format)  // -c
    {
    delete [] cformat;
    cformat = dupl(format);
#if defined PROGLEMMATISE
    defaultCformat = 
           format == DefaultCFormat 
        || format == DefaultCFormat_NoTags 
        || format == DefaultCFormatXML
        || format == DefaultCFormatXML_NoDict;
#endif
    }
Exemplo n.º 4
0
/*
 - repeat - generate code for a bounded repetition, recursively if needed
 */
static void
repeat(struct parse *p,
    sopno start,		/* operand from here to end of strip */
    int from,			/* repeated from this number */
    int to)			/* to this number of times (maybe INFINITY) */
{
	sopno finish = HERE();
#	define	N	2
#	define	INF	3
#	define	REP(f, t)	((f)*8 + (t))
#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
	sopno copy;

	if (p->error != 0)	/* head off possible runaway recursion */
		return;

	assert(from <= to);

	switch (REP(MAP(from), MAP(to))) {
	case REP(0, 0):			/* must be user doing this */
		DROP(finish-start);	/* drop the operand */
		break;
	case REP(0, 1):			/* as x{1,1}? */
	case REP(0, N):			/* as x{1,n}? */
	case REP(0, INF):		/* as x{1,}? */
		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
		INSERT(OCH_, start);		/* offset is wrong... */
		repeat(p, start+1, 1, to);
		ASTERN(OOR1, start);
		AHEAD(start);			/* ... fix it */
		EMIT(OOR2, 0);
		AHEAD(THERE());
		ASTERN(O_CH, THERETHERE());
		break;
	case REP(1, 1):			/* trivial case */
		/* done */
		break;
	case REP(1, N):			/* as x?x{1,n-1} */
		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
		INSERT(OCH_, start);
		ASTERN(OOR1, start);
		AHEAD(start);
		EMIT(OOR2, 0);			/* offset very wrong... */
		AHEAD(THERE());			/* ...so fix it */
		ASTERN(O_CH, THERETHERE());
		copy = dupl(p, start+1, finish+1);
		assert(copy == finish+4);
		repeat(p, copy, 1, to-1);
		break;
	case REP(1, INF):		/* as x+ */
		INSERT(OPLUS_, start);
		ASTERN(O_PLUS, start);
		break;
	case REP(N, N):			/* as xx{m-1,n-1} */
		copy = dupl(p, start, finish);
		repeat(p, copy, from-1, to-1);
		break;
	case REP(N, INF):		/* as xx{n-1,INF} */
		copy = dupl(p, start, finish);
		repeat(p, copy, from-1, to);
		break;
	default:			/* "can't happen" */
		SETERROR(REG_ASSERT);	/* just in case */
		break;
	}
}
Exemplo n.º 5
0
/*
 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
 */
static int			/* was the simple RE an unbackslashed $? */
p_simp_re(struct parse *p,
    int starordinary)		/* is a leading * an ordinary character? */
{
	int c;
	int count;
	int count2;
	sopno pos;
	int i;
	sopno subno;
#	define	BACKSL	(1<<CHAR_BIT)

	pos = HERE();		/* repetion op, if any, covers from here */

	assert(MORE());		/* caller should have ensured this */
	c = GETNEXT();
	if (c == '\\') {
		REQUIRE(MORE(), REG_EESCAPE);
		c = BACKSL | GETNEXT();
	}
	switch (c) {
	case '.':
		if (p->g->cflags&REG_NEWLINE)
			nonnewline(p);
		else
			EMIT(OANY, 0);
		break;
	case '[':
		p_bracket(p);
		break;
	case BACKSL|'{':
		SETERROR(REG_BADRPT);
		break;
	case BACKSL|'(':
		p->g->nsub++;
		subno = p->g->nsub;
		if (subno < NPAREN)
			p->pbegin[subno] = HERE();
		EMIT(OLPAREN, subno);
		/* the MORE here is an error heuristic */
		if (MORE() && !SEETWO('\\', ')'))
			p_bre(p, '\\', ')');
		if (subno < NPAREN) {
			p->pend[subno] = HERE();
			assert(p->pend[subno] != 0);
		}
		EMIT(ORPAREN, subno);
		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
		break;
	case BACKSL|')':	/* should not get here -- must be user */
	case BACKSL|'}':
		SETERROR(REG_EPAREN);
		break;
	case BACKSL|'1':
	case BACKSL|'2':
	case BACKSL|'3':
	case BACKSL|'4':
	case BACKSL|'5':
	case BACKSL|'6':
	case BACKSL|'7':
	case BACKSL|'8':
	case BACKSL|'9':
		i = (c&~BACKSL) - '0';
		assert(i < NPAREN);
		if (p->pend[i] != 0) {
			assert(i <= p->g->nsub);
			EMIT(OBACK_, i);
			assert(p->pbegin[i] != 0);
			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
			EMIT(O_BACK, i);
		} else
			SETERROR(REG_ESUBREG);
		p->g->backrefs = 1;
		break;
	case '*':
		REQUIRE(starordinary, REG_BADRPT);
		/* FALLTHROUGH */
	default:
		ordinary(p, (char)c);
		break;
	}

	if (EAT('*')) {		/* implemented as +? */
		/* this case does not require the (y|) trick, noKLUDGE */
		INSERT(OPLUS_, pos);
		ASTERN(O_PLUS, pos);
		INSERT(OQUEST_, pos);
		ASTERN(O_QUEST, pos);
	} else if (EATTWO('\\', '{')) {
		count = p_count(p);
		if (EAT(',')) {
			if (MORE() && isdigit((uch)PEEK())) {
				count2 = p_count(p);
				REQUIRE(count <= count2, REG_BADBR);
			} else		/* single number with comma */
				count2 = INFINITY;
		} else		/* just a single number */
			count2 = count;
		repeat(p, pos, count, count2);
		if (!EATTWO('\\', '}')) {	/* error heuristics */
			while (MORE() && !SEETWO('\\', '}'))
				NEXT();
			REQUIRE(MORE(), REG_EBRACE);
			SETERROR(REG_BADBR);
		}
	} else if (c == '$')	/* $ (but not \$) ends it */
		return(1);

	return(0);
}
Exemplo n.º 6
0
void optionStruct::setSep(const char * format)      // -s
    {
    delete [] Sep;
    Sep = dupl(format);
    }
Exemplo n.º 7
0
void optionStruct::setWformat(const char * format)  // -W
    {
    delete [] Wformat;
    Wformat = dupl(format);
    }
Exemplo n.º 8
0
void optionStruct::setbformat(const char * format)  // -b
    {
    delete [] bformat;
    bformat = dupl(format);
    defaultbformat = format == Default_b_format;
    }
Exemplo n.º 9
0
void optionStruct::setBformat(const char * format)  // -B
    {
    delete [] Bformat;
    Bformat = dupl(format);
    defaultBformat = format == Default_B_format;
    }
Exemplo n.º 10
0
void optionStruct::setIformat(const char * format)  // -I
    {
    delete [] Iformat;
    Iformat = dupl(format);
    }
Exemplo n.º 11
0
OptReturnTp optionStruct::doSwitch(int c,char * locoptarg,char * progname)
    {
    switch (c)
        {
        case '@':
            readOptsFromFile(locoptarg,progname);
            break;
#if defined PROGLEMMATISE
        case 'A':
            if(locoptarg && *locoptarg == '-')
                {
                treatSlashAsAlternativesSeparator = false;
                }
            else
                {
                treatSlashAsAlternativesSeparator = true;
                }	    
            break;
        case 'b':
            setbformat(locoptarg);
//            bformat = dupl(locoptarg); 
  //          defaultbformat = false;
            break;
        case 'B':
            setBformat(locoptarg);
//            Bformat = dupl(locoptarg); 
  //          defaultBformat = false;
            break;
#endif
        case 'c':
            cformat = dupl(locoptarg);
            defaultCformat = false;
            break;
#if defined PROGMAKESUFFIXFLEX
        case 'C':
            //CutoffRefcount = locoptarg == NULL  || *locoptarg != '-'; 
            if(!locoptarg || *locoptarg == '-')
                CutoffRefcount = 0;
            else
                CutoffRefcount = strtol(locoptarg,NULL,10);
            break;

            break;
#endif
#if defined PROGLEMMATISE
        case 'd':
            dictfile = locoptarg;
            break;
#endif
        case 'D':
            whattodo = MAKEDICT;
            break;
        case 'e':
            arge = locoptarg;
            switch(*arge)
                {
                case '0':
                case '1':
                case '2':
                case '7':
                case '9':
                    setEncoding(*arge - '0');
                    break;
                case 'u':
                case 'U':
                    setEncoding(ENUNICODE);
                    break;
                }
            break;
#if (defined PROGMAKESUFFIXFLEX || defined PROGLEMMATISE)
        case 'f':
            flx = locoptarg;
            break;
#endif
        case 'F':
            whattodo = MAKEFLEXPATTERNS;
            break;
        case 'h':
        case '?':
            printf("usage:\n");
            printf("============================\n");
#if defined PROGMAKEDICT
            printf("    Create binary dictionary\n");
            printf("%s -D \\\n",progname);
            printf("         -c<format> [-N<frequency file> -n<format>] [-y[-]] \\\n"
                   "        [-i<lemmafile>] [-o<binarydictionary>]\n"
                   "    -c  column format of dictionary (tab separated), e.g. -cBFT, which means:\n"
                   "        1st column B(ase form), 2nd column F(ull form), 3rd column T(ype)\n"
                   "    -n  column format of frequency file (tab separated)\n"
                   "        Example: -nN?FT, which means:\n"
                   "        1st column N(frequency), 2nd column irrelevant,\n"
                   "        3rd column F(ull form), 4th column T(ype)\n"
                   "    -y  test output\n    -y- release output (default)\n"
                   "    -k  collapse homographs (remove \",n\" endings)(default)\n"
                   "    -k- do not collapse homographs (keep \",n\" endings)\n");
//                printf("--More--");getchar();
            printf("===============================\n");
#endif
#if defined PROGMAKESUFFIXFLEX
            printf("    Create or add flex patterns\n");
            printf("%s -F \\\n",progname);
            printf("         -c<format> [-y[-]] [-i<lemmafile>] \\\n"
                   "        [-f<old flexpatterns>] [-o<new flexpatterns>]\n"
                   "    -c  column format, e.g. -cBFT, which means:\n"
                   "        1st column B(aseform), 2nd column F(ullform), 3rd column T(ype)\n"
                   "        For lemmatising untagged text, suppress lexical type information by\n"
                   "        specifying '?' for the column containing the type.\n"
                   "    -y  test output\n    -y- release output (default)\n");
            printf("    -R- Do not append refcount to base form (default)\n");// Bart 20050905
            printf("    -R  Append refcount to base form (format: [<base form>#<refcount>])\n");// Bart 20050905
            printf("    -C- Include all rules in output (default)\n");// Bart 20050905
            printf("    -C<n> Do not include rules with refcount <= <n>\n");// Bart 20050905
//            printf("--More--");getchar();
            printf("=============\n");
#endif
#if defined PROGLEMMATISE
            printf("    Lemmatise\n");
//                printf("%s [-L] -c<format> -b<format> -B<format> [-s[<sep>]] [-u[-]] -d<binarydictionary> -f<flexpatterns> [-z<type conversion table>] [-i<input text>] [-o<output text>] [-m<conflicts>] [-n<newlemmas>] [-x<Lexical type translation table>]\n",argv[0]);
            printf("%s [-L] \\\n",progname);
            printf("         -f<flex patterns> [-d<binary dictionary>] [-u[-]] [-v[-]] \\\n"
                   "         [-I<input format>] [-i<input text>] [-o<output text>] \\\n"
                   "         [-c<format>] [-b<format>] [-B<format>] [-W<format>] [-s[<sep>]] \\\n"
                   "         [-x<Lexical type translation table>] [-v<tag friends file>] \\\n"
                   "         [-z<type conversion table>] [-@<option file>]\n");
            printf("    -i<input text>\tIf -t- defined: any flat text. Otherwise: words must be\n"
                   "        followed by tags, separated by '/'. Default: standard input.\n");  
            printf("    -I<format>\tInput format (if not word/tag (-t) or word (-t-)).\n" 
                   "        $w word to be lemmatised\n" 
                   "        $t tag\n" 
                   "        $d dummy\n" 
                   "        \\t tab\n" 
                   "        \\n new line\n" 
                   "        \\s white space\n" 
                   "        \\S all except white space\n"); 
            printf("    -o<output text>\tOutput format dependent on -b, -B, -c and -W arguments.\n"
                   "        Default output: standard output\n");  
            printf("    -d<binarydictionary>\tDictionary as produced with the -D option set.\n"  
                   "        If no dictionary is specified, only the flex patterns are used.\n"  
                   "        Without dictionary, wrong tags in the input can not be corrected.\n");  
            printf("    -f<flexpatterns>\tFile with flex patterns. (see -F). Best results for\n"
                   "        untagged input are obtained if the rules are made without lexical type\n"
                   "        information. See -c option above.\n");  
            printf("    -b<format string>\tdefault:" commandlineQuote "%s" commandlineQuote "\n",Default_b_format);  
            printf("        Output format for data pertaining to the base form, according to the\n"
                   "        dictionary:\n"
                   "        $f sum of frequencies of the words $W having the base form $w\n"
                   "           (lemmafrequency).\n");
            /*
            printf("        $f base form type or token frequency.\n");
            printf("           (The frequency of the base form type is given if you have\n");
            printf("            (a) specified $f in the -c<format> argument, or\n");
            printf("            (b) specified a -W<format> argument, or\n");
            printf("            (c) specified a -H0 or -H1 argument.\n");
            printf("            Otherwise, base form token frequency is given.)\n");
            */
#if FREQ24
            printf("        $n frequency of the full form $w/$t in \"standard\" corpus.\n");
#endif
//                printf("        $p probability of this lexical type (%%) = 100x$n/sum($n).\n");
            printf("        $t lexical type\n");
            printf("        $w base form\n");
            printf("        $W full form(s)\n");
            printf("        \\$ dollar\n");
            printf("        \\[ [\n");
            printf("        \\] ]\n");
            printf("        Example: -b" commandlineQuote "$f $w/$t" commandlineQuote "\n");
            printf("    -B<format string>\tdefault:" commandlineQuote "%s" commandlineQuote "\n",Default_B_format);  
            printf("        Output format for data pertaining to the base form, as predicted by\n");
            printf("        flex pattern rules. See -b\n");
//            printf("--More--");getchar();
            printf("    -W<format string>\tdefault: not present.\n");  
            printf("        Output format:\n");
            printf("        $w full form\n");
            printf("        $t lexical type(s) according to dictionary\n");
            printf("        $f full form type frequency\n");
            printf("        $i info:  -    full form not in dictionary\n");
            printf("                  +    full form in dictionary, but other type\n");
            printf("               (blank) full form in dictionary\n");
            printf("        \\t tab\n");
            printf("        $X?, [X]? Do not output X. (X can be tested, though).\n");
            printf("        [X]+  Output X only if X occurs at least once. (X is an expression\n");
            printf("              containing $b or $B)\n");
            printf("        [X]>n Output X only if X occurs more than n times.\n");
            printf("        [X]n  Output X only if X occurs exactly n times.\n");
            printf("        [X]<n Output X only if X occurs less than n times.\n");
            printf("        [X]   Output X if all nested conditions are met, or if X occurs\n");
            printf("              at least once. ([X] itself is always met!)\n");
            printf("        Example: -b" commandlineQuote "$w ($W)[>1[$W?]>1]" commandlineQuote "\n");
            printf("                 -W" commandlineQuote "$w\\n" commandlineQuote "\n");
            printf("                (Output lemma (full form|full form..)>1\n"
                   "                 if different words have same base form)\n");
//            printf("--More--");getchar();
            printf("    -c<format string>\tdefault:\t" commandlineQuote "%s" commandlineQuote "\n",DefaultCFormat);// word/lemma/tag lemma: if dictionary gives 1 solution, take dictionary, otherwise rules
            printf("        Output format:\n");
            printf("        $w full form\n");
            printf("        $b base form(s) according to dictionary.\n"
                   "           (You also need to specify -b<format>)\n"
                   "           (If the full form is found in the dictionary and tag=lexical type,\n"
                   "            then only one base form is output.\n"
                   "            Otherwise all base forms are output)\n");
            printf("        $B base form(s) according to flex pattern rules\n"
                   "           (You also need to specify -B<format>)\n"
                   "           (only if full form not in dictionary, or in dictionary,\n"
                   "            but with other lexical type.)\n");
            printf("        $s word separator: new line character when the current word is the last\n"
                   "           word before a line break, blank otherwise\n");
            printf("        $t lexical type(s) according to dictionary\n");
            printf("        $f full form frequency\n");
            printf("        $i info: indicates - full form not in dictionary\n");
            printf("                           + full form in dictionary, but other type\n");
            printf("                           * full form in dictionary\n");
            printf("        \\t tab\n");
            printf("        $X?, [X]? Do not output X. (X can be tested, though).\n");
            printf("        $b and $B are variables: they can occur any number of times,\n");
            printf("        including zero. This number can be tested in conditions:\n");
            printf("        $bn   Output $b only if $b occurs exactly n-times (n >= 0).\n");
            printf("        $Bn   Output $B only if $B occurs exactly n-times (n >= 0).\n");
            printf("        [X]+  Output X only if X occurs at least once. (X is an expression\n");
            printf("              containing $b or $B)\n");
            printf("        [X]>n Output X only if X occurs more than n times.\n");
            printf("        [X]n  Output X only if X occurs exactly n times.\n");
            printf("        [X]<n Output X only if X occurs less than n times.\n");
            printf("        [X]   Output X if all nested conditions are met, or if X occurs\n");
            printf("              at least once. ([X] itself is always met!)\n");
            printf("        Example: -c" commandlineQuote "[+$b?]>0[-$b0]$w\\n" commandlineQuote "\n");
            printf("                 -b" commandlineQuote "$w\t/$t" commandlineQuote "\n");
            printf("                (Output +lemma if the word is found in the dictionary,\n"
                   "                 otherwise -lemma)\n");
//            printf("--More--");getchar();
            printf("    -l  force lemma to all-lowercase (default)\n");
            printf("    -l- make case of lemma similar to full form's case\n");
            printf("    -p  keep punctuation (default)\n");
            printf("    -p- ignore punctuation (only together with -t- and no -W format)\n");
            printf("    -p+ treat punctuation as tokens (only together with -t- and no -W format)\n");
            printf("    -q  sort output\n");
            printf("    -q- do not sort output (default)\n");
            printf("    -q# sort output by frequency\n");
            printf("    -s<sep> multiple base forms (-b -B) are <sep>-separated. Example: -s" commandlineQuote " | " commandlineQuote "\n");
            printf("    -s  multiple base forms (-b -B) are " commandlineQuote "%s" commandlineQuote "-separated (default)\n",DefaultSep);
            printf("    -t  input text is tagged (default)\n    -t- input text is not tagged\n");
            printf("    -U  enforce unique flex rules (default)\n");
            printf("    -U- allow ambiguous flex rules\n");
            printf("    -u  enforce unique dictionary look-up (default)\n");
            printf("    -u- allow ambiguous dictionary look-up\n");
            printf("    -Hn n = 0: use lemma frequencies for disambiguation (default)\n");
            printf("        n = 1: use lemma frequencies for disambiguation,\n");
            printf("               show candidates for pruning between << and >>\n");
            printf("        n = 2: do not use lemma frequencies for disambiguation.\n");
            printf("    -v<tag friends file>: Use this to coerce the nearest fit between input\n"
                   "        tag and the dictionary's lexical types if the dictionary has more than\n"
                   "        one readings of the input word and none of these has a lexical type\n"
                   "        that exactly agrees with the input tag. Format:\n"
                   "             {<dict type> {<space> <tag>}* <newline>}*\n"
                   "        The more to the left the tag is, the better the agreement with the\n"
                   "        dictionary'e lexical type\n");
            printf("    -x<Lexical type translation table>: Use this to handle tagged texts with\n"
                   "        tags that do not occur in the dictionary. Format:\n"
                   "             {<dict type> {<space> <tag>}* <newline>}*\n");
            printf("    -z<type conversion table>: Use this to change the meaning of $t in -b and\n"
                   "        -B formats. Without conversion table, $t is the lexical type of the\n"
                   "        full form. With conversion table, $t is the lexical type of the base\n"
                   "        form, as defined by the table. Format:\n"
                   "             {<full form type> <space> <base form type> <newline>}*\n"); // Bart 20090203: wrongly stated <base form type> <space> <full form type>
            printf("    -m<size>: Max. number of words in input. Default: 0 (meaning: unlimited)\n");
            printf("    -A  Treat / as separator between alternative words.\n"); // Bart 20030108
            printf("    -A- Do not treat / as separator between alternative words (default)\n");// Bart 20030108
            printf("    -e<n> ISO8859 Character encoding. 'n' is one of 1,2,7 and 9 (ISO8859-1,2, etc).\n");// Bart 20080219
            printf("    -eU Unicode (UTF8) input.\n");// Bart 20081106
            printf("    -e  Don't use case conversion.\n");// Bart 20080219
            printf("    -X  XML input. Leave XML elements unchanged.\n");// Bart 20081219
            printf("    The next options do not allow space between option letters and argument!\n");// Bart 20090202
            printf("    -Xa<ancestor>  Only analyse elements with specified ancestor. e.g -Xabody\n");// Bart 20090202
            printf("    -Xe<element>  Only analyse specified element. e.g -Xpp\n");// Bart 20090202
            printf("    -Xw<word>  Words are to be found in attribute. e.g -Xwword\n");// Bart 20090202
            printf("    -Xp<pos>  Words' POS-tags are to be found in attribute. e.g -Xppos\n");// Bart 20090202
            printf("    -Xl<lemma>  Destination of lemma is the specified attribute. e.g -Xllemma\n");// Bart 20090202
            printf("    -Xc<lemmaclass>  Destination of lemma class is the specified attribute. e.g -Xllemmaclass\n");// Bart 20090202
#endif
            return Leave;
#if defined PROGLEMMATISE
        case 'H':
            if(locoptarg)
                {
                UseLemmaFreqForDisambiguation = *locoptarg - '0';
                if(UseLemmaFreqForDisambiguation < 0 || UseLemmaFreqForDisambiguation > 2)
                    {
                    printf("-H option: specify -H0, -H1 or -H2 (found -H%s)\n",locoptarg);
                    return Error;
                    }
                }
            else
                {   
                printf("-H option: specify -H0, -H1 or -H2\n");
                return Error;
                }
            break;
#endif
        case 'i':
            argi = locoptarg;
            break;
#if defined PROGLEMMATISE
        case 'I':
            Iformat = dupl(locoptarg); 
            break;
        case 'k':
            CollapseHomographs = locoptarg == NULL || *locoptarg != '-';
            break;
        case 'l':
            baseformsAreLowercase = !locoptarg || *locoptarg != '-';
            break;
#endif
        case 'L':
            whattodo = LEMMATISE; // default action
            break;
#if defined PROGLEMMATISE
        case 'm':
            if(locoptarg)
                {
                size = strtoul(locoptarg,NULL,10);
                printf("size %lu\n",size);
                if(size == 0)
                    size = ULONG_MAX;
                printf("size %lu\n",size);
                }
            else
                size = ULONG_MAX;
            break;
#endif
#if defined PROGMAKEDICT
        case 'n':
//Bart 20021223            if(freq)
                {
                if(!freq)
                    {
                    freq = new FreqFile();
                    }
                (freq)->addFormat(locoptarg);
                }
            break;
        case 'N':
//Bart 20021223            if(freq)
                {
                if(!freq)
                    {
                    freq = new FreqFile();
                    }
                (freq)->addName(locoptarg);
                }
            break;
#endif
        case 'o':
            argo = locoptarg;
            break;
#if defined PROGLEMMATISE
        case 'p':
            if(locoptarg)
                {
                if(*locoptarg == '-')
                    {
                    keepPunctuation = 0;
                    }
                else if(*locoptarg == '+')
                    {
                    keepPunctuation = 2;
                    }
                else if(*locoptarg == '\0')
                    {
                    keepPunctuation = 1;
                    }
                else
                    {
                    printf("Invalid argument %s for -p option.\n",locoptarg);
                    return Error;
                    }
                }
            else
                {
                keepPunctuation = 1;
                }
            break;
        case 'q':
            if(!locoptarg)
                locoptarg = "w#";
            else if(*locoptarg == '-')
                {
                SortOutput = 0;
                break;
                }

            SortOutput = 0;
            while(*locoptarg)
                {
                SortOutput <<= 2;
                switch(*locoptarg)
                    {
                    case '#':
                    case 'f':
                    case 'F':
                    case 'n':
                    case 'N':
                        SortOutput += SORTFREQ;
                        break;
                    case 'l':
                    case 'L':
                    case 'w':
                    case 'W':
                        SortOutput += SORTWORD;
                        break;
                    case 'p':
                    case 'P':
                    case 't':
                    case 'T':
                        SortOutput += SORTPOS;
                        break;
                    default:
                        SortOutput = SORTWORD;
                        break;
                    }
                ++locoptarg;
                }
            break;
#endif
// GNU >>
        case 'r':
            printf("12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n");
            printf("WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n");
            printf("REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n");
            printf("INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n");
            printf("OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n");
            printf("TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n");
            printf("YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n");
            printf("PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n");
            printf("POSSIBILITY OF SUCH DAMAGES.\n");
            return Leave;
// << GNU
#if defined PROGMAKESUFFIXFLEX
        case 'R':
            showRefcount = locoptarg == NULL  || *locoptarg != '-';
            break;
#endif
#if defined PROGLEMMATISE
        case 's':
            if(locoptarg && *locoptarg)
                {
                for(char * p = locoptarg;*p;)
                    {
                    if(*p == '\\')
                        {
                        switch(*(p + 1))
                            {
                            case 't':
                                *p++ = '\t';
                                memmove(p,p+1,strlen(p));
                                break;
                            case 'n':
                                *p++ = '\n';
                                memmove(p,p+1,strlen(p));
                                break;
                            default:
                                *p = *(p+1);
                                ++p;
                                memmove(p,p+1,strlen(p));
                                break;
                            }
                        }
                    else
                        ++p;
                    }
                Sep = dupl(locoptarg);
                }
            else
                Sep = dupl(DefaultSep);
            break;
        case 't':
            InputHasTags = locoptarg == NULL || *locoptarg != '-';
            break;
        case 'u':
            DictUnique = locoptarg == NULL  || *locoptarg != '-';
            break;
        case 'U':
            RulesUnique = locoptarg == NULL  || *locoptarg != '-';
            break;
        case 'v':
            v = locoptarg;
            break;
#endif
// GNU >>
        case 'w':
            printf("11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n");
            printf("FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN\n");
            printf("OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\n");
            printf("PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n");
            printf("OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n");
            printf("MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS\n");
            printf("TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE\n");
            printf("PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n");
            printf("REPAIR OR CORRECTION.\n");
            return Leave;
// << GNU
#if defined PROGLEMMATISE
        case 'W':
            Wformat = dupl(locoptarg);
            break;
        case 'x':
            x = locoptarg;
            break;
        case 'X':
            if(locoptarg)
                {
                if(*locoptarg == '-')
                    {
                    XML = false;
                    }
                else
                    {
                    XML = true;
                    switch(*locoptarg)
                        {
                        case 'a':
                            ancestor = dupl(locoptarg+1);
                            break;
                        case 'e':
                            element = dupl(locoptarg+1);
                            break;
                        case 'w':
                            wordAttribute = dupl(locoptarg+1);
                            break;
                        case 'p':
                            POSAttribute = dupl(locoptarg+1);
                            break;
                        case 'l':
                            lemmaAttribute = dupl(locoptarg+1);
                            if(defaultCformat)
                                {
                                if(Bformat)
                                    setcformat(DefaultCFormatXML);
                                }
                            break;
                        case 'c':
                            lemmaClassAttribute = dupl(locoptarg+1);
                            break;
                        }
                    }
                }
            else
                XML = true;
            break;
        case 'z':
            z = locoptarg;
            break;
#endif
        case 'y':
            nice = locoptarg == NULL  || *locoptarg != '-';
            break;
        }
    return GoOn;
    }
Exemplo n.º 12
0
    OptReturnTp optionStruct::doSwitch(int c,char * locoptarg,char * progname)
        {
        switch (c)
            {
            case '@':
                readOptsFromFile(locoptarg,progname);
                break;
            case 'h':
            case '?':
                printf("usage:\n");
                printf("%s [options] [LEXICON] [CORPUS-TO-TAG] [BIGRAMS] [LEXICALRULEFILE] [CONTEXTUALRULEFILE]\n",progname);
                printf("options:\n");
                printf("    -@<optionsfile>\n");
                printf("    -h   help\n");
                printf("    -?   help\n");
                printf("    -D<LEXICON>\n");
                printf("    -i<CORPUS-TO-TAG>\n");
                printf("    -B<BIGRAMS>\n");
                printf("    -L<LEXICALRULEFILE>\n");
                printf("    -C<CONTEXTUALRULEFILE>\n");
                printf("    -w<WORDLIST>\n");
                printf("    -m<INTERMEDFILE>\n");
                printf("    -S   start state tagger only\n");
                printf("    -F   final state tagger only\n");
                printf("    -o<out> output (optional, otherwise stdout)\n");
                printf("    -r   About redistribution (GNU)\n");
                printf("    -W   About warranty (GNU)\n");
                printf("    -x<path> path to file with extra options (deprecated)\n");
                printf("    -f ConvertToLowerCaseIfFirstWord (default off)\n");
                printf("    -a ConvertToLowerCaseIfMostWordsAreCapitalized (default off)\n");
                printf("    -s ShowIfLowercaseConversionHelped (default off)\n");
                //printf("    -l Language (default danish)  (One of the LC_CTYPES accepted by setlocale(), e.g. \"danish\", \"dutch\", \"english\", \"french\", \"german\", \"italian\", \"spanish\")\n");
                printf("    -n<class> Noun (default NN)\n");
                printf("    -p<class> Proper (default NNP)\n");
                printf("    -v Verbose (default off)\n");
                printf("============================\n");
                printf("    -X- Not XML input. XML tags will be treated as text and POS-tagged. (default)\n");
                printf("    -X+ XML input. Leave XML elements unchanged. POS as suffix behind word, separated by slash.\n");
                printf("    The next options do not allow space between option letters and argument!\n");
                printf("    -Xa<ancestor>  Only analyse elements with specified ancestor. e.g -Xap\n");
                printf("    -Xs<delimiter> Segment (sentence) delimiter. Can be empty tag. e.g -Xsbr or -Xss\n");
                printf("    -Xe<element>  Only analyse specified element. e.g -Xew\n");
                printf("    -Xw<word>  Words are to be found in attribute. e.g -Xwword\n");
                printf("    -Xt<pretag>  Words' pre-tagging to be found in attribute. e.g -Xtprepos\n");
                printf("    -Xp<POS>  Destination of POS is the specified attribute. e.g -Xppos\n");
                return Leave;
            case 'f':
                ConvertToLowerCaseIfFirstWord = true;//boolean(locoptarg);
                break;
            case 'a':
                ConvertToLowerCaseIfMostWordsAreCapitalized = true;//boolean(locoptarg);
                break;
            case 's':
                ShowIfLowercaseConversionHelped = true;//boolean(locoptarg);
                break;
            case 'n':
                Noun = dupl(locoptarg);//(default NN)\n");
                break;
            case 'p':
                Proper = dupl(locoptarg);// (default NNP)\n");
                break;
            case 'v':
                Verbose = true;//boolean(locoptarg);
                break;
            case 'D':
                //LEXICON
                Lexicon = dupl(locoptarg);
                break;
            case 'i':
                //CORPUS-TO-TAG
                Corpus = dupl(locoptarg);
                break;
            case 'o':
                Output = dupl(locoptarg);
                break;
            case 'B':
                //BIGRAMS
                Bigrams = dupl(locoptarg);
                break;
            case 'L':
                //LEXICALRULEFILE
                Lexicalrulefile = dupl(locoptarg);
                break;
            case 'C':
                //CONTEXTUALRULEFILE
                Contextualrulefile = dupl(locoptarg);
                break;
            case 'd':
            case 'w':
                //WORDLIST
                wdlistname = dupl(locoptarg);
                break;
                /*
            case 'm':
                //INTERMEDFILE
                intermed = dupl(locoptarg);
                break;
                */
            case 'S':
                //start state tagger only
                START_ONLY_FLAG = true;
                break;
            case 'F':
                //final state tagger only
                FINAL_ONLY_FLAG = true;
                break;
            case 'x':
                //path to file with extra options
                xoptions = dupl(locoptarg);
                break;
            case 'X':
                if(locoptarg)
                    {
                    if(*locoptarg == '-')
                        {
                        XML = false;
                        }
                    else
                        {
                        XML = true;
                        switch(*locoptarg)
                            {
                            case 'a':
                                ancestor = dupl(locoptarg+1);
                                break;
                            case 's':
                                segment = dupl(locoptarg+1);
                                break;
                            case 'e':
                                element = dupl(locoptarg+1);
                                break;
                            case 'w':
                                wordAttribute = dupl(locoptarg+1);
                                break;
                            case 't':
                                PreTagAttribute = dupl(locoptarg+1);
                                break;
                            case 'p':
                                POSAttribute = dupl(locoptarg+1);
                                break;
                            }
                        }
                    }
                else
                    XML = true;
                break;

            case 'r':
                printf("12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n");
                printf("WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n");
                printf("REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n");
                printf("INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n");
                printf("OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n");
                printf("TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n");
                printf("YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n");
                printf("PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n");
                printf("POSSIBILITY OF SUCH DAMAGES.\n");
                return Leave;
            case 'W':
                printf("11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n");
                printf("FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN\n");
                printf("OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\n");
                printf("PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n");
                printf("OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n");
                printf("MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS\n");
                printf("TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE\n");
                printf("PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n");
                printf("REPAIR OR CORRECTION.\n");
                return Leave;
            }
        return GoOn;
        }