Exemple #1
0
struct berval * UTF8bvnormalize(
	struct berval *bv,
	struct berval *newbv,
	unsigned flags,
	void *ctx )
{
	int i, j, len, clen, outpos, ucsoutlen, outsize, last;
	char *out, *outtmp, *s;
	ac_uint4 *ucs, *p, *ucsout;

	static unsigned char mask[] = {
		0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };

	unsigned casefold = flags & LDAP_UTF8_CASEFOLD;
	unsigned approx = flags & LDAP_UTF8_APPROX;

	if ( bv == NULL ) {
		return NULL;
	}

	s = bv->bv_val;
	len = bv->bv_len;

	if ( len == 0 ) {
		return ber_dupbv_x( newbv, bv, ctx );
	}

	if ( !newbv ) {
		newbv = ber_memalloc_x( sizeof(struct berval), ctx );
		if ( !newbv ) return NULL;
	}

	/* Should first check to see if string is already in proper
	 * normalized form. This is almost as time consuming as
	 * the normalization though.
	 */

	/* finish off everything up to character before first non-ascii */
	if ( LDAP_UTF8_ISASCII( s ) ) {
		if ( casefold ) {
			outsize = len + 7;
			out = (char *) ber_memalloc_x( outsize, ctx );
			if ( out == NULL ) {
				return NULL;
			}
			outpos = 0;

			for ( i = 1; (i < len) && LDAP_UTF8_ISASCII(s + i); i++ ) {
				out[outpos++] = TOLOWER( s[i-1] );
			}
			if ( i == len ) {
				out[outpos++] = TOLOWER( s[len-1] );
				out[outpos] = '\0';
				newbv->bv_val = out;
				newbv->bv_len = outpos;
				return newbv;
			}
		} else {
			for ( i = 1; (i < len) && LDAP_UTF8_ISASCII(s + i); i++ ) {
				/* empty */
			}

			if ( i == len ) {
				return ber_str2bv_x( s, len, 1, newbv, ctx );
			}
				
			outsize = len + 7;
			out = (char *) ber_memalloc_x( outsize, ctx );
			if ( out == NULL ) {
				return NULL;
			}
			outpos = i - 1;
			memcpy(out, s, outpos);
		}
	} else {
		outsize = len + 7;
		out = (char *) ber_memalloc_x( outsize, ctx );
		if ( out == NULL ) {
			return NULL;
		}
		outpos = 0;
		i = 0;
	}

	p = ucs = ber_memalloc_x( len * sizeof(*ucs), ctx );
	if ( ucs == NULL ) {
		ber_memfree_x(out, ctx);
		return NULL;
	}

	/* convert character before first non-ascii to ucs-4 */
	if ( i > 0 ) {
		*p = casefold ? TOLOWER( s[i-1] ) : s[i-1];
		p++;
	}

	/* s[i] is now first non-ascii character */
	for (;;) {
		/* s[i] is non-ascii */
		/* convert everything up to next ascii to ucs-4 */
		while ( i < len ) {
			clen = LDAP_UTF8_CHARLEN2( s + i, clen );
			if ( clen == 0 ) {
				ber_memfree_x( ucs, ctx );
				ber_memfree_x( out, ctx );
				return NULL;
			}
			if ( clen == 1 ) {
				/* ascii */
				break;
			}
			*p = s[i] & mask[clen];
			i++;
			for( j = 1; j < clen; j++ ) {
				if ( (s[i] & 0xc0) != 0x80 ) {
					ber_memfree_x( ucs, ctx );
					ber_memfree_x( out, ctx );
					return NULL;
				}
				*p <<= 6;
				*p |= s[i] & 0x3f;
				i++;
			}
			if ( casefold ) {
				*p = uctolower( *p );
			}
			p++;
		}
		/* normalize ucs of length p - ucs */
		uccompatdecomp( ucs, p - ucs, &ucsout, &ucsoutlen, ctx );
		if ( approx ) {
			for ( j = 0; j < ucsoutlen; j++ ) {
				if ( ucsout[j] < 0x80 ) {
					out[outpos++] = ucsout[j];
				}
			}
		} else {
			ucsoutlen = uccanoncomp( ucsout, ucsoutlen );
			/* convert ucs to utf-8 and store in out */
			for ( j = 0; j < ucsoutlen; j++ ) {
				/* allocate more space if not enough room for
				   6 bytes and terminator */
				if ( outsize - outpos < 7 ) {
					outsize = ucsoutlen - j + outpos + 6;
					outtmp = (char *) ber_memrealloc_x( out, outsize, ctx );
					if ( outtmp == NULL ) {
						ber_memfree_x( ucsout, ctx );
						ber_memfree_x( ucs, ctx );
						ber_memfree_x( out, ctx );
						return NULL;
					}
					out = outtmp;
				}
				outpos += ldap_x_ucs4_to_utf8( ucsout[j], &out[outpos] );
			}
		}

		ber_memfree_x( ucsout, ctx );
		ucsout = NULL;
		
		if ( i == len ) {
			break;
		}

		last = i;

		/* Allocate more space in out if necessary */
		if (len - i >= outsize - outpos) {
			outsize += 1 + ((len - i) - (outsize - outpos));
			outtmp = (char *) ber_memrealloc_x(out, outsize, ctx);
			if (outtmp == NULL) {
				ber_memfree_x( ucs, ctx );
				ber_memfree_x( out, ctx );
				return NULL;
			}
			out = outtmp;
		}

		/* s[i] is ascii */
		/* finish off everything up to char before next non-ascii */
		for ( i++; (i < len) && LDAP_UTF8_ISASCII(s + i); i++ ) {
			out[outpos++] = casefold ? TOLOWER( s[i-1] ) : s[i-1];
		}
		if ( i == len ) {
			out[outpos++] = casefold ? TOLOWER( s[len-1] ) : s[len-1];
			break;
		}

		/* convert character before next non-ascii to ucs-4 */
		*ucs = casefold ? TOLOWER( s[i-1] ) : s[i-1];
		p = ucs + 1;
	}

	ber_memfree_x( ucs, ctx );
	out[outpos] = '\0';
	newbv->bv_val = out;
	newbv->bv_len = outpos;
	return newbv;
}
Exemple #2
0
/* Compare the initial segment of the character string S1 consisting of at most
   N characters with the initial segment of the character string S2 consisting
   of at most N characters, ignoring case, returning less than, equal to or
   greater than zero if the initial segment of S1 is lexicographically less
   than, equal to or greater than the initial segment of S2.
   Note: This function may, in multibyte locales, return 0 for initial segments
   of different lengths!  */
int
mbsncasecmp (const char *s1, const char *s2, size_t n)
{
  if (s1 == s2 || n == 0)
    return 0;

  /* Be careful not to look at the entire extent of s1 or s2 until needed.
     This is useful because when two strings differ, the difference is
     most often already in the very few first characters.  */
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter1;
      mbui_iterator_t iter2;

      mbui_init (iter1, s1);
      mbui_init (iter2, s2);

      while (mbui_avail (iter1) && mbui_avail (iter2))
        {
          int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));

          if (cmp != 0)
            return cmp;

          if (--n == 0)
            return 0;

          mbui_advance (iter1);
          mbui_advance (iter2);
        }
      if (mbui_avail (iter1))
        /* s2 terminated before s1 and n.  */
        return 1;
      if (mbui_avail (iter2))
        /* s1 terminated before s2 and n.  */
        return -1;
      return 0;
    }
  else
    {
      const unsigned char *p1 = (const unsigned char *) s1;
      const unsigned char *p2 = (const unsigned char *) s2;
      unsigned char c1, c2;

      for (; ; p1++, p2++)
        {
          c1 = TOLOWER (*p1);
          c2 = TOLOWER (*p2);

          if (--n == 0 || c1 == '\0' || c1 != c2)
            break;
        }

      if (UCHAR_MAX <= INT_MAX)
        return c1 - c2;
      else
        /* On machines where 'char' and 'int' are types of the same size, the
           difference of two 'unsigned char' values - including the sign bit -
           doesn't fit in an 'int'.  */
        return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
    }
}
Exemple #3
0
int
findopt(int argc, char *argv[])
{
	char **eargv = argv;
	int eargc = 0, c;

	while(--argc > 0) {
		switch(c = **++argv) {
		case '-':
			if((c = *++*argv) == '\0')
				break;
		case '+':
			do {
				if(isdigit(c)) {
					--*argv;
					Ncols = atoix(argv);
				} else
				switch(c = TOLOWER(c)) {
				case '+':
					if((Fpage = atoix(argv)) < 1)
						Fpage = 1;
					continue;
				case 'd':
					Dblspace = 2;
					continue;
				case 'e':
					TABS(Etabn, Etabc);
					continue;
				case 'f':
					Formfeed++;
					continue;
				case 'h':
					if(--argc > 0)
						Head = argv[1];
					continue;
				case 'i':
					TABS(Itabn, Itabc);
					continue;
				case 'l':
					Len = atoix(argv);
					continue;
				case 'a':
				case 'm':
					Multi = c;
					continue;
				case 'o':
					Offset = atoix(argv);
					continue;
				case 's':
					if((Sepc = (*argv)[1]) != '\0')
						++*argv;
					else
						Sepc = '\t';
					continue;
				case 't':
					Margin = 0;
					continue;
				case 'w':
					Linew = atoix(argv);
					continue;
				case 'n':
					Lnumb++;
					if((Numw = intopt(argv, &Nsepc)) <= 0)
						Numw = NUMW;
				case 'b':
					Balance = 1;
					continue;
				case 'p':
					Padodd = 1;
					continue;
				default:
					die("bad option");
				}
			} while((c = *++*argv) != '\0');
			if(Head == argv[1])
				argv++;
			continue;
		}
		*eargv++ = *argv;
		eargc++;
	}
	if(Len == 0)
		Len = LENGTH;
	if(Len <= Margin)
		Margin = 0;
	Plength = Len - Margin/2;
	if(Multi == 'm')
		Ncols = eargc;
	switch(Ncols) {
	case 0:
		Ncols = 1;
	case 1:
		break;
	default:
		if(Etabn == 0)		/* respect explicit tab specification */
			Etabn = DEFTAB;
	}
	if(Linew == 0)
		Linew = Ncols != 1 && Sepc == 0? LINEW: 512;
	if(Lnumb)
		Linew -= Multi == 'm'? Numw: Numw*Ncols;
	if((Colw = (Linew - Ncols + 1)/Ncols) < 1)
		die("width too small");
	if(Ncols != 1 && Multi == 0) {
		uint32_t buflen = ((uint32_t)(Plength/Dblspace + 1))*(Linew+1)*sizeof(char);
		Buffer = getspace(buflen*sizeof(*Buffer));
		Bufend = &Buffer[buflen];
		Colpts = getspace((Ncols+1)*sizeof(*Colpts));
	}
	return eargc;
}
Exemple #4
0
void RMFontObj::setBothCase(int nChar, int nNext, signed char spiazz) {
	_l2Table[TOUPPER(nChar)][TOUPPER(nNext)] = spiazz;
	_l2Table[TOUPPER(nChar)][TOLOWER(nNext)] = spiazz;
	_l2Table[TOLOWER(nChar)][TOUPPER(nNext)] = spiazz;
	_l2Table[TOLOWER(nChar)][TOLOWER(nNext)] = spiazz;
}
Exemple #5
0
/**
 * xsltNewLocale:
 * @languageTag: RFC 3066 language tag
 *
 * Creates a new locale of an opaque system dependent type based on the
 * language tag.
 *
 * Returns the locale or NULL on error or if no matching locale was found
 */
xsltLocale
xsltNewLocale(const xmlChar *languageTag) {
#ifdef XSLT_LOCALE_XLOCALE
    xsltLocale locale;
    char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
    const xmlChar *p = languageTag;
    const char *region = NULL;
    char *q = localeName;
    int i, llen;

    /* Convert something like "pt-br" to "pt_BR.utf8" */

    if (languageTag == NULL)
        return(NULL);

    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
        *q++ = TOLOWER(*p++);

    if (i == 0)
        return(NULL);

    llen = i;
    *q++ = '_';

    if (*p) {
        if (*p++ != '-')
            return(NULL);

        for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
            *q++ = TOUPPER(*p++);

        if (i == 0 || *p)
            return(NULL);

        memcpy(q, ".utf8", 6);
        locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
        if (locale != NULL)
            return(locale);

        /* Continue without using country code */

        q = localeName + llen + 1;
    }

    /* Try locale without territory, e.g. for Esperanto (eo) */

    memcpy(q, ".utf8", 6);
    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
    if (locale != NULL)
        return(locale);

    /* Try to find most common country for language */

    if (llen != 2)
        return(NULL);

    region = (char *)xsltDefaultRegion((xmlChar *)localeName);
    if (region == NULL)
        return(NULL);

    q = localeName + llen + 1;
    *q++ = region[0];
    *q++ = region[1];
    memcpy(q, ".utf8", 6);
    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);

    return(locale);
#endif

#ifdef XSLT_LOCALE_WINAPI
    {
        xsltLocale    locale = (xsltLocale)0;
        xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
        xmlChar       *q = localeName;
        const xmlChar *p = languageTag;
        int           i, llen;
        const xmlChar *region = NULL;

        if (languageTag == NULL) goto end;

        xsltEnumSupportedLocales();

        for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
            *q++ = TOLOWER(*p++);
        if (i == 0) goto end;

        llen = i;
        *q++ = '-';
        if (*p) { /*if country tag is given*/
            if (*p++ != '-') goto end;

            for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
                *q++ = TOUPPER(*p++);
            if (i == 0 || *p) goto end;

            *q = '\0';
            locale = xslt_locale_WINAPI(localeName);
            if (locale != (xsltLocale)0) goto end;
        }
        /* Try to find most common country for language */
        region = xsltDefaultRegion(localeName);
        if (region == NULL) goto end;

        strcpy(localeName + llen + 1, region);
        locale = xslt_locale_WINAPI(localeName);
end:
        return(locale);
    }
#endif

#ifdef XSLT_LOCALE_NONE
    return(NULL);
#endif
}
Exemple #6
0
/* The core of recursive retrieving.  Endless recursion is avoided by
   having all URLs stored to a linked list of URLs, which is checked
   before loading any URL.  That way no URL can get loaded twice.

   The function also supports specification of maximum recursion depth
   and a number of other goodies.  */
uerr_t
recursive_retrieve (const char *file, const char *this_url)
{
  char *constr, *filename, *newloc;
  char *canon_this_url = NULL;
  int dt, inl, dash_p_leaf_HTML = FALSE;
  int meta_disallow_follow;
  int this_url_ftp;            /* See below the explanation */
  uerr_t err;
  struct urlinfo *rurl;
  urlpos *url_list, *cur_url;
  char *rfile; /* For robots */
  struct urlinfo *u;

  assert (this_url != NULL);
  assert (file != NULL);
  /* If quota was exceeded earlier, bail out.  */
  if (downloaded_exceeds_quota ())
    return QUOTEXC;
  /* Cache the current URL in the list.  */
  if (first_time)
    {
      /* These three operations need to be done only once per Wget
         run.  They should probably be at a different location.  */
      if (!undesirable_urls)
	undesirable_urls = make_string_hash_table (0);

      hash_table_clear (undesirable_urls);
      string_set_add (undesirable_urls, this_url);
      /* Enter this_url to the hash table, in original and "enhanced" form.  */
      u = newurl ();
      err = parseurl (this_url, u, 0);
      if (err == URLOK)
	{
	  string_set_add (undesirable_urls, u->url);
	  if (opt.no_parent)
	    base_dir = xstrdup (u->dir); /* Set the base dir.  */
	  /* Set the canonical this_url to be sent as referer.  This
	     problem exists only when running the first time.  */
	  canon_this_url = xstrdup (u->url);
	}
      else
	{
	  DEBUGP (("Double yuck!  The *base* URL is broken.\n"));
	  base_dir = NULL;
	}
      freeurl (u, 1);
      depth = 1;
      robots_host = NULL;
      forbidden = NULL;
      first_time = 0;
    }
  else
    ++depth;

  if (opt.reclevel != INFINITE_RECURSION && depth > opt.reclevel)
    /* We've exceeded the maximum recursion depth specified by the user. */
    {
      if (opt.page_requisites && depth <= opt.reclevel + 1)
	/* When -p is specified, we can do one more partial recursion from the
	   "leaf nodes" on the HTML document tree.  The recursion is partial in
	   that we won't traverse any <A> or <AREA> tags, nor any <LINK> tags
	   except for <LINK REL="stylesheet">. */
	dash_p_leaf_HTML = TRUE;
      else
	/* Either -p wasn't specified or it was and we've already gone the one
	   extra (pseudo-)level that it affords us, so we need to bail out. */
	{
	  DEBUGP (("Recursion depth %d exceeded max. depth %d.\n",
		   depth, opt.reclevel));
	  --depth;
	  return RECLEVELEXC;
	}
    }

  /* Determine whether this_url is an FTP URL.  If it is, it means
     that the retrieval is done through proxy.  In that case, FTP
     links will be followed by default and recursion will not be
     turned off when following them.  */
  this_url_ftp = (urlproto (this_url) == URLFTP);

  /* Get the URL-s from an HTML file: */
  url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
			    dash_p_leaf_HTML, &meta_disallow_follow);

  if (opt.use_robots && meta_disallow_follow)
    {
      /* The META tag says we are not to follow this file.  Respect
         that.  */
      free_urlpos (url_list);
      url_list = NULL;
    }

  /* Decide what to do with each of the URLs.  A URL will be loaded if
     it meets several requirements, discussed later.  */
  for (cur_url = url_list; cur_url; cur_url = cur_url->next)
    {
      /* If quota was exceeded earlier, bail out.  */
      if (downloaded_exceeds_quota ())
	break;
      /* Parse the URL for convenient use in other functions, as well
	 as to get the optimized form.  It also checks URL integrity.  */
      u = newurl ();
      if (parseurl (cur_url->url, u, 0) != URLOK)
	{
	  DEBUGP (("Yuck!  A bad URL.\n"));
	  freeurl (u, 1);
	  continue;
	}
      if (u->proto == URLFILE)
	{
	  DEBUGP (("Nothing to do with file:// around here.\n"));
	  freeurl (u, 1);
	  continue;
	}
      assert (u->url != NULL);
      constr = xstrdup (u->url);

      /* Several checkings whether a file is acceptable to load:
	 1. check if URL is ftp, and we don't load it
	 2. check for relative links (if relative_only is set)
	 3. check for domain
	 4. check for no-parent
	 5. check for excludes && includes
	 6. check for suffix
	 7. check for same host (if spanhost is unset), with possible
	 gethostbyname baggage
	 8. check for robots.txt

	 Addendum: If the URL is FTP, and it is to be loaded, only the
	 domain and suffix settings are "stronger".

	 Note that .html and (yuck) .htm will get loaded regardless of
	 suffix rules (but that is remedied later with unlink) unless
	 the depth equals the maximum depth.

	 More time- and memory- consuming tests should be put later on
	 the list.  */

      /* inl is set if the URL we are working on (constr) is stored in
	 undesirable_urls.  Using it is crucial to avoid unnecessary
	 repeated continuous hits to the hash table.  */
      inl = string_set_contains (undesirable_urls, constr);

      /* If it is FTP, and FTP is not followed, chuck it out.  */
      if (!inl)
	if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
	  {
	    DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
	    string_set_add (undesirable_urls, constr);
	    inl = 1;
	  }
      /* If it is absolute link and they are not followed, chuck it
	 out.  */
      if (!inl && u->proto != URLFTP)
	if (opt.relative_only && !cur_url->link_relative_p)
	  {
	    DEBUGP (("It doesn't really look like a relative link.\n"));
	    string_set_add (undesirable_urls, constr);
	    inl = 1;
	  }
      /* If its domain is not to be accepted/looked-up, chuck it out.  */
      if (!inl)
	if (!accept_domain (u))
	  {
	    DEBUGP (("I don't like the smell of that domain.\n"));
	    string_set_add (undesirable_urls, constr);
	    inl = 1;
	  }
      /* Check for parent directory.  */
      if (!inl && opt.no_parent
	  /* If the new URL is FTP and the old was not, ignore
             opt.no_parent.  */
	  && !(!this_url_ftp && u->proto == URLFTP))
	{
	  /* Check for base_dir first.  */
	  if (!(base_dir && frontcmp (base_dir, u->dir)))
	    {
	      /* Failing that, check for parent dir.  */
	      struct urlinfo *ut = newurl ();
	      if (parseurl (this_url, ut, 0) != URLOK)
		DEBUGP (("Double yuck!  The *base* URL is broken.\n"));
	      else if (!frontcmp (ut->dir, u->dir))
		{
		  /* Failing that too, kill the URL.  */
		  DEBUGP (("Trying to escape parental guidance with no_parent on.\n"));
		  string_set_add (undesirable_urls, constr);
		  inl = 1;
		}
	      freeurl (ut, 1);
	    }
	}
      /* If the file does not match the acceptance list, or is on the
	 rejection list, chuck it out.  The same goes for the
	 directory exclude- and include- lists.  */
      if (!inl && (opt.includes || opt.excludes))
	{
	  if (!accdir (u->dir, ALLABS))
	    {
	      DEBUGP (("%s (%s) is excluded/not-included.\n", constr, u->dir));
	      string_set_add (undesirable_urls, constr);
	      inl = 1;
	    }
	}
      if (!inl)
	{
	  char *suf = NULL;
	  /* We check for acceptance/rejection rules only for non-HTML
	     documents.  Since we don't know whether they really are
	     HTML, it will be deduced from (an OR-ed list):

	     1) u->file is "" (meaning it is a directory)
	     2) suffix exists, AND:
	     a) it is "html", OR
	     b) it is "htm"

	     If the file *is* supposed to be HTML, it will *not* be
            subject to acc/rej rules, unless a finite maximum depth has
            been specified and the current depth is the maximum depth. */
	  if (!
	      (!*u->file
	       || (((suf = suffix (constr)) != NULL)
                  && ((!strcmp (suf, "html") || !strcmp (suf, "htm"))
                      && ((opt.reclevel != INFINITE_RECURSION) &&
			  (depth != opt.reclevel))))))
	    {
	      if (!acceptable (u->file))
		{
		  DEBUGP (("%s (%s) does not match acc/rej rules.\n",
			  constr, u->file));
		  string_set_add (undesirable_urls, constr);
		  inl = 1;
		}
	    }
	  FREE_MAYBE (suf);
	}
      /* Optimize the URL (which includes possible DNS lookup) only
	 after all other possibilities have been exhausted.  */
      if (!inl)
	{
	  if (!opt.simple_check)
	    opt_url (u);
	  else
	    {
	      char *p;
	      /* Just lowercase the hostname.  */
	      for (p = u->host; *p; p++)
		*p = TOLOWER (*p);
	      xfree (u->url);
	      u->url = str_url (u, 0);
	    }
	  xfree (constr);
	  constr = xstrdup (u->url);
	  string_set_add (undesirable_urls, constr);
	  if (!inl && !((u->proto == URLFTP) && !this_url_ftp))
	    if (!opt.spanhost && this_url && !same_host (this_url, constr))
	      {
		DEBUGP (("This is not the same hostname as the parent's.\n"));
		string_set_add (undesirable_urls, constr);
		inl = 1;
	      }
	}
      /* What about robots.txt?  */
      if (!inl && opt.use_robots && u->proto == URLHTTP)
	{
	  /* Since Wget knows about only one set of robot rules at a
	     time, /robots.txt must be reloaded whenever a new host is
	     accessed.

	     robots_host holds the host the current `forbid' variable
	     is assigned to.  */
	  if (!robots_host || !same_host (robots_host, u->host))
	    {
	      FREE_MAYBE (robots_host);
	      /* Now make robots_host the new host, no matter what the
		 result will be.  So if there is no /robots.txt on the
		 site, Wget will not retry getting robots all the
		 time.  */
	      robots_host = xstrdup (u->host);
	      free_vec (forbidden);
	      forbidden = NULL;
	      err = retrieve_robots (constr, ROBOTS_FILENAME);
	      if (err == ROBOTSOK)
		{
		  rurl = robots_url (constr, ROBOTS_FILENAME);
		  rfile = url_filename (rurl);
		  forbidden = parse_robots (rfile);
		  freeurl (rurl, 1);
		  xfree (rfile);
		}
	    }

	  /* Now that we have (or don't have) robots, we can check for
	     them.  */
	  if (!robots_match (u, forbidden))
	    {
	      DEBUGP (("Stuffing %s because %s forbids it.\n", this_url,
		       ROBOTS_FILENAME));
	      string_set_add (undesirable_urls, constr);
	      inl = 1;
	    }
	}

      filename = NULL;
      /* If it wasn't chucked out, do something with it.  */
      if (!inl)
	{
	  DEBUGP (("I've decided to load it -> "));
	  /* Add it to the list of already-loaded URL-s.  */
	  string_set_add (undesirable_urls, constr);
	  /* Automatically followed FTPs will *not* be downloaded
	     recursively.  */
	  if (u->proto == URLFTP)
	    {
	      /* Don't you adore side-effects?  */
	      opt.recursive = 0;
	    }
	  /* Reset its type.  */
	  dt = 0;
	  /* Retrieve it.  */
	  retrieve_url (constr, &filename, &newloc,
		       canon_this_url ? canon_this_url : this_url, &dt);
	  if (u->proto == URLFTP)
	    {
	      /* Restore...  */
	      opt.recursive = 1;
	    }
	  if (newloc)
	    {
	      xfree (constr);
	      constr = newloc;
	    }
	  /* If there was no error, and the type is text/html, parse
	     it recursively.  */
	  if (dt & TEXTHTML)
	    {
	      if (dt & RETROKF)
		recursive_retrieve (filename, constr);
	    }
	  else
	    DEBUGP (("%s is not text/html so we don't chase.\n",
		     filename ? filename: "(null)"));

	  if (opt.delete_after || (filename && !acceptable (filename)))
	    /* Either --delete-after was specified, or we loaded this otherwise
	       rejected (e.g. by -R) HTML file just so we could harvest its
	       hyperlinks -- in either case, delete the local file. */
	    {
	      DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
		       opt.delete_after ? "--delete-after" :
		       "recursive rejection criteria"));
	      logprintf (LOG_VERBOSE,
			 (opt.delete_after ? _("Removing %s.\n")
			  : _("Removing %s since it should be rejected.\n")),
			 filename);
	      if (unlink (filename))
		logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
	      dt &= ~RETROKF;
	    }

	  /* If everything was OK, and links are to be converted, let's
	     store the local filename.  */
	  if (opt.convert_links && (dt & RETROKF) && (filename != NULL))
	    {
	      cur_url->convert = CO_CONVERT_TO_RELATIVE;
	      cur_url->local_name = xstrdup (filename);
	    }
	}
      else
	DEBUGP (("%s already in list, so we don't load.\n", constr));
      /* Free filename and constr.  */
      FREE_MAYBE (filename);
      FREE_MAYBE (constr);
      freeurl (u, 1);
      /* Increment the pbuf for the appropriate size.  */
    }
  if (opt.convert_links && !opt.delete_after)
    /* This is merely the first pass: the links that have been
       successfully downloaded are converted.  In the second pass,
       convert_all_links() will also convert those links that have NOT
       been downloaded to their canonical form.  */
    convert_links (file, url_list);
  /* Free the linked list of URL-s.  */
  free_urlpos (url_list);
  /* Free the canonical this_url.  */
  FREE_MAYBE (canon_this_url);
  /* Decrement the recursion depth.  */
  --depth;
  if (downloaded_exceeds_quota ())
    return QUOTEXC;
  else
    return RETROK;
}
Exemple #7
0
static char* 
snum (const char *str, const struct numfmt* pnf,
      struct canform* pcform)
{
  char *ptr = (char*)str;
  char *it = pcform->ipart;
  int nonnull_digit_found = 0;
  long digits, strscale, nzeros, expn;
  char* tail;

  digits = strscale = 0;
  if ( (*ptr == pnf->pos_sign) )
    {
      pcform->sgn = POS_SIGN;
      move_ahead(ptr);
    }
  else if ( (*ptr == pnf->neg_sign) )
    {
      pcform->sgn = NEG_SIGN;
      move_ahead(ptr);
    }
  if (pnf->grouping > 0)
    {
      while ( (is_digit((int)*ptr)) || *ptr == pnf->thsep )
	{
	  int first_sep = 1;

	  if (*ptr == pnf->thsep)
	    {
	      unsigned long i;
	      char* ptr2;

	      if ((first_sep))
		{
		  for (ptr2 = ptr;
		       ptr2 > str && (is_digit ((int)*(ptr2-1)));
		       ptr2--);
		  if ((i=ptr-ptr2) == 0 || i > pnf->grouping)
		    {
		      return (char*) str;
		    }
		  first_sep = 0;
		}
	      for (ptr2 = ptr + 1; (is_digit((int)*ptr2)); ptr2++);
	      if ((i=ptr2-ptr-1) != pnf->grouping)
		{
		  return (char*) str;
		}
	      ptr++;
	    }
	  else
	    {
	      if ((nonnull_digit_found |= *ptr > CHAR_ZERO))
		*it++ = *ptr;
	      move_ahead(ptr), digits++;
	    }
	}
    }
  else
    {
      while ( (is_digit((int)*ptr)) )
	{
	  if ((nonnull_digit_found |= *ptr > CHAR_ZERO))
	    *it++ = *ptr;
	  move_ahead(ptr), digits++;
	}
    }
  if (*ptr == pnf->dp)
    {
      it = pcform->dpart;
      move_ahead(ptr);
    }
  while ( (is_digit((int)*ptr)) )
    {
      for (nzeros = 0; *ptr == CHAR_ZERO; nzeros++, move_ahead(ptr));
      strscale += nzeros;
      if ( (is_digit((int)*ptr)) )
	{
	  nonnull_digit_found = 1;
	  for (; nzeros > 0; nzeros--)
	    *it++ = CHAR_ZERO;
	  *it++ = *ptr;
	  move_ahead(ptr), strscale++;
	}
    }
  if (digits+strscale == 0)
    return (char*)str;
  if (!nonnull_digit_found)
    {
      free((void*)pcform->ipart);
      free((void*)pcform->dpart);
      pcform->ipart = pcform->dpart = NULL;
      pcform->sgn = 0;
    }
  if (TOLOWER(*ptr) == TOLOWER(pnf->ech) && !is_space (*(ptr+1)))
    {
      expn = strtol (ptr + 1, &tail, 10);
      if (tail != ptr + 1)
	{
	  if (expn < MIN_EXPN)
	    {
	      fprintf (stderr, _("%s: a number with a too small exponent has been found,\nnamely \"%s\".\n"), PACKAGE, str);
	      fprintf (stderr, _("Exponents smaller than %ld are not accepted,\n"), MIN_EXPN);
	      fprintf (stderr, _("the execution of the program ends now\n"));
	      exit (EXIT_TROUBLE);
	    }
	  if (expn > MAX_EXPN)
	    {
	      fprintf (stderr, _("%s: a number with a too large exponent has been found,\nnamely \"%s\".\n"), PACKAGE, str);
	      fprintf (stderr, _("Exponents larger than %ld are not accepted,\n"), MAX_EXPN);
	      fprintf (stderr, _("the execution of the program ends now\n"));
	      exit (EXIT_TROUBLE);
	    }
	  pcform->expn = (nonnull_digit_found) ? expn : 0;
	  return tail;
	}
      else
	{
	  pcform->expn = 0;
	  return ptr;
	}
    }
  else
    return ptr;
}
Exemple #8
0
/*
 * cut --
 *	Put a range of lines/columns into a TEXT buffer.
 *
 * There are two buffer areas, both found in the global structure.  The first
 * is the linked list of all the buffers the user has named, the second is the
 * unnamed buffer storage.  There is a pointer, too, which is the current
 * default buffer, i.e. it may point to the unnamed buffer or a named buffer
 * depending on into what buffer the last text was cut.  Logically, in both
 * delete and yank operations, if the user names a buffer, the text is cut
 * into it.  If it's a delete of information on more than a single line, the
 * contents of the numbered buffers are rotated up one, the contents of the
 * buffer named '9' are discarded, and the text is cut into the buffer named
 * '1'.  The text is always cut into the unnamed buffer.
 *
 * In all cases, upper-case buffer names are the same as lower-case names,
 * with the exception that they cause the buffer to be appended to instead
 * of replaced.  Note, however, that if text is appended to a buffer, the
 * default buffer only contains the appended text, not the entire contents
 * of the buffer.
 *
 * !!!
 * The contents of the default buffer would disappear after most operations
 * in historic vi.  It's unclear that this is useful, so we don't bother.
 *
 * When users explicitly cut text into the numeric buffers, historic vi became
 * genuinely strange.  I've never been able to figure out what was supposed to
 * happen.  It behaved differently if you deleted text than if you yanked text,
 * and, in the latter case, the text was appended to the buffer instead of
 * replacing the contents.  Hopefully it's not worth getting right, and here
 * we just treat the numeric buffers like any other named buffer.
 *
 * PUBLIC: int cut __P((SCR *, ARG_CHAR_T *, MARK *, MARK *, int));
 */
int
cut(SCR *sp, ARG_CHAR_T *namep, MARK *fm, MARK *tm, int flags)
{
	CB *cbp;
	ARG_CHAR_T name = '\0';
	db_recno_t lno;
	int append, copy_one, copy_def;

	/*
	 * If the user specified a buffer, put it there.  (This may require
	 * a copy into the numeric buffers.  We do the copy so that we don't
	 * have to reference count and so we don't have to deal with things
	 * like appends to buffers that are used multiple times.)
	 *
	 * Otherwise, if it's supposed to be put in a numeric buffer (usually
	 * a delete) put it there.  The rules for putting things in numeric
	 * buffers were historically a little strange.  There were three cases.
	 *
	 *	1: Some motions are always line mode motions, which means
	 *	   that the cut always goes into the numeric buffers.
	 *	2: Some motions aren't line mode motions, e.g. d10w, but
	 *	   can cross line boundaries.  For these commands, if the
	 *	   cut crosses a line boundary, it goes into the numeric
	 *	   buffers.  This includes most of the commands.
	 *	3: Some motions aren't line mode motions, e.g. d`<char>,
	 *	   but always go into the numeric buffers, regardless.  This
	 *	   was the commands: % ` / ? ( ) N n { } -- and nvi adds ^A.
	 *
	 * Otherwise, put it in the unnamed buffer.
	 */
	append = copy_one = copy_def = 0;
	if (namep != NULL) {
		name = *namep;
		if (LF_ISSET(CUT_NUMREQ) || (LF_ISSET(CUT_NUMOPT) &&
		    (LF_ISSET(CUT_LINEMODE) || fm->lno != tm->lno))) {
			copy_one = 1;
			cb_rotate(sp);
		}
		if ((append = ISUPPER(name)) == 1) {
			if (!copy_one)
				copy_def = 1;
			name = TOLOWER(name);
		}
namecb:		CBNAME(sp, cbp, name);
	} else if (LF_ISSET(CUT_NUMREQ) || (LF_ISSET(CUT_NUMOPT) &&
	    (LF_ISSET(CUT_LINEMODE) || fm->lno != tm->lno))) {
		name = '1';
		cb_rotate(sp);
		goto namecb;
	} else
		cbp = &sp->wp->dcb_store;

copyloop:
	/*
	 * If this is a new buffer, create it and add it into the list.
	 * Otherwise, if it's not an append, free its current contents.
	 */
	if (cbp == NULL) {
		CALLOC_RET(sp, cbp, CB *, 1, sizeof(CB));
		cbp->name = name;
		CIRCLEQ_INIT(&cbp->textq);
		LIST_INSERT_HEAD(&sp->wp->cutq, cbp, q);
	} else if (!append) {
Exemple #9
0
PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
{
    if (me->state == MIME_TRANSPARENT) {
    	(*me->targetClass.put_character)(me->target, c);/* MUST BE FAST */
	return;
    }
    
    /* This slightly simple conversion just strips CR and turns LF to
    ** newline. On unix LF is \n but on Mac \n is CR for example.
    ** See NetToText for an implementation which preserves single CR or LF.
    */
    if (me->net_ascii) {
        c = FROMASCII(c);
	if (c == CR) return;
	else if (c == LF) c = '\n';
    }
    
    switch(me->state) {

    case MIME_IGNORE:
    	return;

    case MIME_TRANSPARENT:		/* Not reached see above */
    	(*me->targetClass.put_character)(me->target, c);
	return;
	
    case MIME_NET_ASCII:
    	(*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
	return;

    case NEWLINE:
	if (c != '\n' && WHITE(c)) {		/* Folded line */
	    me->state = me->fold_state;	/* pop state before newline */
	    break;
	}
	
	/*	else Falls through */
	
    case BEGINNING_OF_LINE:
        switch(c) {
	case 'c':
	case 'C':
	    me->check_pointer = "ontent-t";
	    me->if_ok = CONTENT_T;
	    me->state = CHECK;
	    break;
	case '\n':			/* Blank line: End of Header! */
	    {
	        if (TRACE) fprintf(stderr,
			"HTMIME: MIME content type is %s, converting to %s\n",
			HTAtom_name(me->format), HTAtom_name(me->targetRep));
		me->target = HTStreamStack(me->format, me->request, NO);
		if (!me->target) {
		    if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
		    me->target = me->sink;	/* Cheat */
		}
		if (me->target) {
		    me->targetClass = *me->target->isa;
		/* Check for encoding and select state from there @@ */
		
		    me->state = MIME_TRANSPARENT; /* From now push straigh through */
		} else {
		    me->state = MIME_IGNORE;		/* What else to do? */
		}
	    }
	    break;
	    
	default:
	   goto bad_field_name;
	   break;
	   
	} /* switch on character */
        break;
	
    case CHECK:				/* Check against string */
        if (TOLOWER(c) == *(me->check_pointer)++) {
	    if (!*me->check_pointer) me->state = me->if_ok;
	} else {		/* Error */
	    if (TRACE) fprintf(stderr,
	    	"HTMIME: Bad character `%c' found where `%s' expected\n",
		c, me->check_pointer - 1);
	    goto bad_field_name;
	}
	break;
	
    case CONTENT_T:
        switch(c) {
	case 'r':
	case 'R':
	    me->check_pointer = "ansfer-encoding:";
	    me->if_ok = CONTENT_TRANSFER_ENCODING;
	    me->state = CHECK;
	    break;
	    
	case 'y':
	case 'Y':
	    me->check_pointer = "pe:";
	    me->if_ok = CONTENT_TYPE;
	    me->state = CHECK;
	    break;
	    
	default:
	    goto bad_field_name;
	    
	} /* switch on character */
	break;
	
    case CONTENT_TYPE:
    case CONTENT_TRANSFER_ENCODING:
        me->field = me->state;		/* remember it */
	me->state = SKIP_GET_VALUE;
				/* Fall through! */
    case SKIP_GET_VALUE:
    	if (c == '\n') {
	   me->fold_state = me->state;
	   me->state = NEWLINE;
	   break;
	}
	if (WHITE(c)) break;	/* Skip white space */
	
	me->value_pointer = me->value;
	me->state = GET_VALUE;   
	/* Fall through to store first character */
	
    case GET_VALUE:
    	if (WHITE(c)) {			/* End of field */
	    *me->value_pointer = 0;
	    switch (me->field) {
	    case CONTENT_TYPE:
	        me->format = HTAtom_for(me->value);
		break;
	    case CONTENT_TRANSFER_ENCODING:
	        me->encoding = HTAtom_for(me->value);
		break;
	    default:		/* Should never get here */
	    	break;
	    }
	} else {
	    if (me->value_pointer < me->value + VALUE_SIZE - 1) {
	        *me->value_pointer++ = c;
		break;
	    } else {
	        goto value_too_long;
	    }
	}
	/* Fall through */
	
    case JUNK_LINE:
        if (c == '\n') {
	    me->state = NEWLINE;
	    me->fold_state = me->state;
	}
	break;
	
	
    } /* switch on state*/
    
    return;
    
value_too_long:
    if (TRACE) fprintf(stderr,
    	"HTMIME: *** Syntax error. (string too long)\n");
    
bad_field_name:				/* Ignore it */
    me->state = JUNK_LINE;
    return;
    
}
Exemple #10
0
int main(int argc, char **argv)
{
    static const char *first_ifdefs[] =
    {
	"/*",
	" * Compile-in this chunk of code unless we've turned it off specifically",
	" * or in general (id=%s).",
	" */",
	"",
	"#ifndef INCL_CHARSET%s",
	"#define INCL_CHARSET%s 1",
	"",
	"/*ifdef NO_CHARSET*/",
	"#ifdef  NO_CHARSET",
	"#undef  NO_CHARSET",
	"#endif",
	"#define NO_CHARSET 0 /* force default to always be active */",
	"",
	"/*ifndef NO_CHARSET%s*/",
	"#ifndef NO_CHARSET%s",
	"",
	"#if    ALL_CHARSETS",
	"#define NO_CHARSET%s 0",
	"#else",
	"#define NO_CHARSET%s 1",
	"#endif",
	"",
	"#endif /* ndef(NO_CHARSET%s) */",
	"",
	"#if NO_CHARSET%s",
	"#define UC_CHARSET_SETUP%s /*nothing*/",
	"#else"
    };
    static const char *last_ifdefs[] =
    {
	"",
	"#endif /* NO_CHARSET%s */",
	"",
	"#endif /* INCL_CHARSET%s */"
    };

    FILE *ctbl;
    char buffer[65536];
    char *outname = 0;
    unsigned n;
    int fontlen;
    int i, nuni, nent;
    int fp0 = 0, fp1 = 0, un0, un1;
    char *p, *p1;
    char *tbuf, ch;

    if (argc < 2 || argc > 5) {
	usage();
    }

    if (!strcmp(argv[1], "-")) {
	ctbl = stdin;
	tblname = "stdin";
    } else {
	ctbl = fopen(tblname = argv[1], "r");
	if (!ctbl) {
	    perror(tblname);
	    done(EX_NOINPUT);
	}
    }

    if (argc > 2) {
	if (!strcmp(argv[2], "-")) {
	    chdr = stdout;
	    hdrname = "stdout";
	} else {
	    hdrname = argv[2];
	}
    } else if (ctbl == stdin) {
	chdr = stdout;
	hdrname = "stdout";
    } else if ((outname = (char *) malloc(strlen(tblname) + 3)) != 0) {
	strcpy(outname, tblname);
	hdrname = outname;
	if ((p = strrchr(outname, '.')) == 0)
	    p = outname + strlen(outname);
	strcpy(p, ".h");
    } else {
	perror("malloc");
	done(EX_NOINPUT);
    }

    if (chdr == 0) {
	chdr = fopen(hdrname, "w");
	if (!chdr) {
	    perror(hdrname);
	    done(EX_NOINPUT);
	}
    }

    /*
     *  For now we assume the default font is always 256 characters.
     */
    fontlen = 256;

    /*
     *  Initialize table.
     */
    for (i = 0; i < fontlen; i++) {
	unicount[i] = 0;
    }

    /*
     *  Now we comes to the tricky part.  Parse the input table.
     */
    while (fgets(buffer, sizeof(buffer), ctbl) != NULL) {
	if ((p = strchr(buffer, '\n')) != NULL) {
	    *p = '\0';
	} else {
	    fprintf(stderr,
		    "%s: Warning: line too long or incomplete.\n",
		    tblname);
	}

	/*
	 *  Syntax accepted:
	 *      <fontpos>       <unicode> <unicode> ...
	 *      <fontpos>       <unicode range> <unicode range> ...
	 *      <fontpos>       idem
	 *      <range>         idem
	 *      <range>         <unicode range>
	 *      <unicode>       :<replace>
	 *      <unicode range> :<replace>
	 *      <unicode>       "<C replace>"
	 *      <unicode range> "<C replace>"
	 *
	 *  where <range> ::= <fontpos>-<fontpos>
	 *  and <unicode> ::= U+<h><h><h><h>
	 *  and <h> ::= <hexadecimal digit>
	 *  and <replace> any string not containing '\n' or '\0'
	 *  and <C replace> any string with C backslash escapes.
	 */
	p = buffer;
	while (*p == ' ' || *p == '\t') {
	    p++;
	}
	if (!(*p) || *p == '#') {
	    /*
	     *  Skip comment or blank line.
	     */
	    continue;
	}

	switch (*p) {
	    /*
	     *  Raw Unicode?  I.e. needs some special
	     *  processing.  One digit code.
	     */
	case 'R':
	    if (p[1] == 'a' || p[1] == 'A') {
		buffer[sizeof(buffer) - 1] = '\0';
		if (!strncasecomp(p, "RawOrEnc", 8)) {
		    p += 8;
		}
	    }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    RawOrEnc = strtol(p, 0, 10);
	    Raw_found = 1;
	    continue;

	    /*
	     *  Is this the default table?
	     */
	case 'D':
	    if (p[1] == 'e' || p[1] == 'E') {
		buffer[sizeof(buffer) - 1] = '\0';
		if (!strncasecomp(p, "Default", 7)) {
		    p += 7;
		}
	    }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    this_isDefaultMap = (*p == '1' || TOLOWER(*p) == 'y');
	    continue;

	    /*
	     *  Is this the default table?
	     */
	case 'F':
	    if (p[1] == 'a' || p[1] == 'A') {
		buffer[sizeof(buffer) - 1] = '\0';
		if (!strncasecomp(p, "FallBack", 8)) {
		    p += 8;
		}
	    }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    useDefaultMap = (*p == '1' || TOLOWER(*p) == 'y');
	    continue;

	case 'M':
	    if (p[1] == 'i' || p[1] == 'I') {
		buffer[sizeof(buffer) - 1] = '\0';
		if (!strncasecomp(p, "MIMEName", 8)) {
		    p += 8;
		}
	    }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    sscanf(p, "%40s", this_MIMEcharset);
	    continue;

	    /*
	     *  Display charset name for options screen.
	     */
	case 'O':
	    if (p[1] == 'p' || p[1] == 'P') {
		buffer[sizeof(buffer) - 1] = '\0';
		if (!strncasecomp(p, "OptionName", 10)) {
		    p += 10;
		}
	    }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    for (i = 0; *p && i < UC_MAXLEN_LYNXCSNAME; p++, i++) {
		this_LYNXcharset[i] = *p;
	    }
	    this_LYNXcharset[i] = '\0';
	    continue;

	    /*
	     *  Codepage number.  Three or four digit code.
	     */
	case 'C':
	    if (p[1] == 'o' || p[1] == 'O') {
		buffer[sizeof(buffer) - 1] = '\0';
		if (!strncasecomp(p, "CodePage", 8)) {
		    p += 8;
		}
	    }
	    p++;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    CodePage = strtol(p, 0, 10);
	    continue;
	}

	if (*p == 'U') {
	    un0 = getunicode(&p);
	    if (un0 < 0) {
		fprintf(stderr, "Bad input line: %s\n", buffer);
		done(EX_DATAERR);
		fprintf(stderr,
			"%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n",
			tblname, fp0, fp1);
		done(EX_DATAERR);
	    }
	    un1 = un0;
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    if (*p == '-') {
		p++;
		while (*p == ' ' || *p == '\t') {
		    p++;
		}
		un1 = getunicode(&p);
		if (un1 < 0 || un1 < un0) {
		    fprintf(stderr,
			    "%s: Bad Unicode range U+%x-U+%x\n",
			    tblname, un0, un1);
		    fprintf(stderr, "Bad input line: %s\n", buffer);
		    done(EX_DATAERR);
		}
		while (*p == ' ' || *p == '\t') {
		    p++;
		}
	    }

	    if (*p != ':' && *p != '"') {
		fprintf(stderr, "No ':' or '\"' where expected: %s\n",
			buffer);
		continue;
	    }

	    /*
	     * Allocate a string large enough for the worst-case use in the
	     * loop using sprintf.
	     */
	    tbuf = (char *) malloc(5 * strlen(p));

	    if (!(p1 = tbuf)) {
		fprintf(stderr, "%s: Out of memory\n", tblname);
		done(EX_DATAERR);
	    }
	    if (*p == '"') {
		/*
		 *  Handle "<C replace>".
		 *  Copy chars verbatim until first '"' not \-escaped or
		 *  end of buffer.
		 */
		int escaped = 0;

		for (ch = *(++p); (ch = *p) != '\0'; p++) {
		    if (escaped) {
			escaped = 0;
		    } else if (ch == '"') {
			break;
		    } else if (ch == '\\') {
			escaped = 1;
		    }
		    *p1++ = ch;
		}
		if (escaped || ch != '"') {
		    fprintf(stderr, "Warning: String not terminated: %s\n",
			    buffer);
		    if (escaped)
			*p1++ = '\n';
		}
	    } else {
		/*
		 *  We had ':'.
		 */
		for (ch = *(++p); (ch = *p) != '\0'; p++, p1++) {
		    if (UCH(ch) < 32 || ch == '\\' || ch == '\"' ||
			UCH(ch) >= 127) {
			sprintf(p1, "\\%.3o", UCH(ch));
			p1 += 3;
		    } else {
			*p1 = ch;
		    }
		}
	    }
	    *p1 = '\0';
	    for (i = un0; i <= un1; i++) {
		addpair_str(tbuf, i);
	    }
	    continue;
	}

	/*
	 *  Input line (after skipping spaces) doesn't start with one
	 *  of the specially recognized characters, so try to interpret
	 *  it as starting with a fontpos.
	 */
	fp0 = strtol(p, &p1, 0);
	if (p1 == p) {
	    fprintf(stderr, "Bad input line: %s\n", buffer);
	    done(EX_DATAERR);
	}
	p = p1;

	while (*p == ' ' || *p == '\t') {
	    p++;
	}
	if (*p == '-') {
	    p++;
	    fp1 = strtol(p, &p1, 0);
	    if (p1 == p) {
		fprintf(stderr, "Bad input line: %s\n", buffer);
		done(EX_DATAERR);
	    }
	    p = p1;
	} else {
	    fp1 = 0;
	}

	if (fp0 < 0 || fp0 >= fontlen) {
	    fprintf(stderr,
		    "%s: Glyph number (0x%x) larger than font length\n",
		    tblname, fp0);
	    done(EX_DATAERR);
	}
	if (fp1 && (fp1 < fp0 || fp1 >= fontlen)) {
	    fprintf(stderr,
		    "%s: Bad end of range (0x%x)\n",
		    tblname, fp1);
	    done(EX_DATAERR);
	}

	if (fp1) {
	    /*
	     *  We have a range; expect the word "idem"
	     *  or a Unicode range of the same length.
	     */
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    if (!strncmp(p, "idem", 4)) {
		for (i = fp0; i <= fp1; i++) {
		    addpair(i, i);
		}
		p += 4;
	    } else {
		un0 = getunicode(&p);
		while (*p == ' ' || *p == '\t') {
		    p++;
		}
		if (*p != '-') {
		    fprintf(stderr,
			    "%s: Corresponding to a range of font positions,",
			    tblname);
		    fprintf(stderr,
			    " there should be a Unicode range.\n");
		    done(EX_DATAERR);
		}
		p++;
		un1 = getunicode(&p);
		if (un0 < 0 || un1 < 0) {
		    fprintf(stderr,
			    "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n",
			    tblname, fp0, fp1);
		    done(EX_DATAERR);
		}
		if (un1 - un0 != fp1 - fp0) {
		    fprintf(stderr,
			    "%s: Unicode range U+%x-U+%x not of the same length",
			    tblname, un0, un1);
		    fprintf(stderr,
			    " as font position range 0x%x-0x%x\n",
			    fp0, fp1);
		    done(EX_DATAERR);
		}
		for (i = fp0; i <= fp1; i++) {
		    addpair(i, un0 - fp0 + i);
		}
	    }
	} else {
	    /*
	     *  No range; expect a list of unicode values
	     *  or unicode ranges for a single font position,
	     *  or the word "idem"
	     */
	    while (*p == ' ' || *p == '\t') {
		p++;
	    }
	    if (!strncmp(p, "idem", 4)) {
		addpair(fp0, fp0);
		p += 4;
	    }
	    while ((un0 = getunicode(&p)) >= 0) {
		addpair(fp0, un0);
		while (*p == ' ' || *p == '\t') {
		    p++;
		}
		if (*p == '-') {
		    p++;
		    un1 = getunicode(&p);
		    if (un1 < un0) {
			fprintf(stderr,
				"%s: Bad Unicode range 0x%x-0x%x\n",
				tblname, un0, un1);
			done(EX_DATAERR);
		    }
		    for (un0++; un0 <= un1; un0++) {
			addpair(fp0, un0);
		    }
		}
	    }
	}
	while (*p == ' ' || *p == '\t') {
	    p++;
	}
	if (*p && *p != '#') {
	    fprintf(stderr, "%s: trailing junk (%s) ignored\n", tblname, p);
	}
    }

    /*
     *  Okay, we hit EOF, now output tables.
     */
    fclose(ctbl);

    /*
     *  Compute total size of Unicode list.
     */
    nuni = 0;
    for (i = 0; i < fontlen; i++) {
	nuni += unicount[i];
    }

    if (argc > 3) {
	strncpy(this_MIMEcharset, argv[3], UC_MAXLEN_MIMECSNAME);
    } else if (this_MIMEcharset[0] == '\0') {
	strncpy(this_MIMEcharset, tblname, UC_MAXLEN_MIMECSNAME);
	if ((p = strchr(this_MIMEcharset, '.')) != 0) {
	    *p = '\0';
	}
    }
    for (p = this_MIMEcharset; *p; p++) {
	*p = TOLOWER(*p);
    }
    if (argc > 4) {
	strncpy(this_LYNXcharset, argv[4], UC_MAXLEN_LYNXCSNAME);
    } else if (this_LYNXcharset[0] == '\0') {
	strncpy(this_LYNXcharset, this_MIMEcharset, UC_MAXLEN_LYNXCSNAME);
    }

    if (this_isDefaultMap == -1) {
	this_isDefaultMap = !strncmp(this_MIMEcharset, "iso-8859-1", 10);
    }
    fprintf(stderr,
	    "makeuctb: %s: %stranslation map",
	    this_MIMEcharset, (this_isDefaultMap ? "default " : ""));
    if (this_isDefaultMap == 1) {
	*id_append = '\0';
    } else {
	for (i = 0, p = this_MIMEcharset;
	     *p && (i < UC_MAXLEN_ID_APPEND - 1);
	     p++, i++) {
	    id_append[i + 1] = isalnum(UCH(*p)) ? *p : '_';
	}
	id_append[i + 1] = '\0';
    }
    fprintf(stderr, " (%s).\n", id_append);

    for (n = 0; n < TABLESIZE(first_ifdefs); n++) {
	fprintf(chdr, first_ifdefs[n], id_append);
	fprintf(chdr, "\n");
    }

    fprintf(chdr, "\n\
/*\n\
 *  uni_hash.tbl\n\
 *\n\
 *  Do not edit this file; it was automatically generated by\n\
 *\n\
 *  %s %s\n\
 *\n\
 */\n\
\n\
static const u8 dfont_unicount%s[%d] = \n\
{\n\t", argv[0], argv[1], id_append, fontlen);

    for (i = 0; i < fontlen; i++) {
	if (i >= 128 && unicount[i] > 0 && i < lowest_eight) {
	    lowest_eight = i;
	}
	fprintf(chdr, "%3d", unicount[i]);
	if (i == (fontlen - 1)) {
	    fprintf(chdr, "\n};\n");
	} else if ((i % 8) == 7) {
	    fprintf(chdr, ",\n\t");
	} else {
	    fprintf(chdr, ", ");
	}
    }

    /*
     *  If lowest_eightbit is anything else but 999,
     *  this can't be 7-bit only.
     */
    if (lowest_eight != 999 && !RawOrEnc) {
	RawOrEnc = UCT_ENC_8BIT;
    }

    if (nuni) {
	fprintf(chdr, "\nstatic const u16 dfont_unitable%s[%d] = \n{\n\t",
		id_append, nuni);
    } else {
	fprintf(chdr,
		"\nstatic const u16 dfont_unitable%s[1] = {0}; /* dummy */\n", id_append);
    }

    fp0 = 0;
    nent = 0;
    for (i = 0; i < nuni; i++) {
	while (nent >= unicount[fp0]) {
	    fp0++;
	    nent = 0;
	}
	fprintf(chdr, "0x%04x", unitable[fp0][nent++]);
	if (i == (nuni - 1)) {
	    fprintf(chdr, "\n};\n");
	} else if ((i % 8) == 7) {
	    fprintf(chdr, ",\n\t");
	} else {
	    fprintf(chdr, ", ");
	}
    }

    if (themap_str.entry_ct) {
	fprintf(chdr, "\n\
static struct unipair_str repl_map%s[%d] = \n\
{\n\t", id_append, themap_str.entry_ct);
    } else {
	fprintf(chdr, "\n\
/* static struct unipair_str repl_map%s[]; */\n", id_append);
    }

    for (i = 0; i < themap_str.entry_ct; i++) {
	fprintf(chdr, "{0x%x,\"%s\"}",
		themap_str.entries[i].unicode,
		themap_str.entries[i].replace_str);
	if (i == (themap_str.entry_ct - 1)) {
	    fprintf(chdr, "\n};\n");
	} else if ((i % 4) == 3) {
	    fprintf(chdr, ",\n\t");
	} else {
	    fprintf(chdr, ", ");
	}
    }
    if (themap_str.entry_ct) {
	fprintf(chdr, "\n\
static const struct unimapdesc_str dfont_replacedesc%s = {%d,repl_map%s,",
		id_append, themap_str.entry_ct, id_append);
    } else {
int
mbmemcasecmp (const char *s1, size_t n1, const char *s2, size_t n2)
{
  if (s1 == s2)
    return (n1 < n2 ? -1 : n1 > n2 ? 1 : 0);

  if (MB_CUR_MAX > 1)
    {
      mbi_iterator_t iter1;
      mbi_iterator_t iter2;

      mbi_init (iter1, s1, n1);
      mbi_init (iter2, s2, n2);

      while (mbi_avail (iter1) && mbi_avail (iter2))
        {
          int cmp = mb_casecmp (mbi_cur (iter1), mbi_cur (iter2));

          if (cmp != 0)
            return cmp;

          mbi_advance (iter1);
          mbi_advance (iter2);
        }
      if (mbi_avail (iter1))
        /* s2 terminated before s1.  */
        return 1;
      if (mbi_avail (iter2))
        /* s1 terminated before s2.  */
        return -1;
      return 0;
    }
  else
    {
      const unsigned char *s1_end = (const unsigned char *) (s1 + n1);
      const unsigned char *s2_end = (const unsigned char *) (s2 + n2);
      const unsigned char *p1 = (const unsigned char *) s1;
      const unsigned char *p2 = (const unsigned char *) s2;

      while (p1 < s1_end && p2 < s2_end)
        {
          unsigned char c1 = TOLOWER (*p1);
          unsigned char c2 = TOLOWER (*p2);
          if (c1 != c2)
            {
              if (UCHAR_MAX <= INT_MAX)
                return c1 - c2;
              else
                /* On machines where 'char' and 'int' are types of the same
                   size, the difference of two 'unsigned char' values
                   - including the sign bit - doesn't fit in an 'int'.  */
                return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
            }
          ++p1;
          ++p2;
        }
      if (p1 < s1_end)
        /* s2 terminated before s1.  */
        return 1;
      if (p2 < s2_end)
        /* s1 terminated before s2.  */
        return -1;
      return 0;
    }
}
Exemple #12
0
void
db_hangman(db_expr_t addr, int haddr, db_expr_t count, char *modif)
{
	char	*word;
	size_t	tries;
	size_t	len;
	struct _abc sabc[1];
	int	skill;

	if (modif[0] != 's' || (skill = modif[1] - '0') > 9U)
		skill = 3;
	word = NULL;
	tries = 0;
	for (;;) {

		if (word == NULL) {
			ABC_CLR();

			tries = skill + 1;
			word = db_randomsym(&len);
			if (word == NULL)
				break;

			db_plays++;
		}

		{
			int c;

			db_hang(tries, word, sabc);
			c = cngetc();
			c = TOLOWER(c);

			if (ISLOWALPHA(c) && ABC_ISCLR(c)) {
				char	*p;
				size_t	n;

					/* strchr(word,c) */
				for (n = 0, p = word; *p ; p++)
					if (TOLOWER(*p) == c)
						n++;

				if (n) {
					ABC_SETRIGHT(c);
					len -= n;
				} else {
					ABC_SETWRONG(c);
					tries--;
				}
			}
		}

		if (tries && len)
			continue;

		if (!tries && skill > 2) {
			char	*p = word;
			for (; *p; p++)
				if (ISALPHA(*p))
					ABC_SETRIGHT(TOLOWER(*p));
		}
		if (tries)
			db_guesses++;
		db_hang(tries, word, sabc);
		db_printf("\nScore: %lu/%lu\n", db_plays, db_guesses);
		word = NULL;
		if (tries)
			break;
	}
}
Exemple #13
0
static void build_ichars( void )
{
	int c ;
	for( c=0; c<256; c++)
		ichars[c] = TOLOWER(c) ;
}
Exemple #14
0
static char* 
anum (const char *str, const struct numfmt* pnf)
{
  char *ptr;
  long digits, strscale;
  ptr = (char*)str;
  digits = 0;
  strscale = 0;

  if ( (*ptr == pnf->pos_sign) || (*ptr == pnf->neg_sign) )
    move_ahead(ptr);
  if (pnf->grouping > 0)
    {
      while ( (is_digit((int)*ptr)) || *ptr == pnf->thsep )
	{
	  int first_sep = 1;

	  if (*ptr == pnf->thsep)
	    {
	      unsigned long i;
	      char* ptr2;

	      if ((first_sep))
		{
		  for (ptr2 = ptr;
		       ptr2 > str && (is_digit ((int)*(ptr2-1)));
		       ptr2--);
		  if ((i=ptr-ptr2) == 0 || i > pnf->grouping)
		    {
		      return (char*) str;
		    }
		  first_sep = 0;
		}
	      for (ptr2 = ptr + 1; (is_digit((int)*ptr2)); ptr2++);
	      if ((i=ptr2-ptr-1) != pnf->grouping)
		{
		  return (char*) str;
		}
	      ptr++;
	    }
	  else
	    {
	      move_ahead(ptr), digits++;
	    }
	}
    }
  else
    {
      while ( (is_digit((int)*ptr)) )
	{
	  move_ahead(ptr), digits++;
	}
    }
  if (*ptr == pnf->dp)
    move_ahead(ptr);

  while ( (is_digit((int)*ptr)) )
    move_ahead(ptr), strscale++;
  if (digits+strscale == 0)
    return (char*)str;
  if (TOLOWER(*ptr) == TOLOWER(pnf->ech) && !is_space (*(ptr+1)))
    {
      char *tail;
      long expn;

      expn = strtol (ptr + 1, &tail, 10);
      if (expn < MIN_EXPN && tail != ptr + 1)
	{
	  fprintf (stderr, _("%s: a number with a too small exponent has been found,\nnamely \"%s\".\n"), PACKAGE, str);
	  fprintf (stderr, _("Exponents smaller than %ld are not accepted,\n"), MIN_EXPN);
	  fprintf (stderr, _("the execution of the program ends now\n"));
	  exit (EXIT_TROUBLE);
	}
      if (expn > MAX_EXPN && tail != ptr + 1)
	{
	  fprintf (stderr, _("%s: a number with a too large exponent has been found,\nnamely \"%s\".\n"), PACKAGE, str);
	  fprintf (stderr, _("Exponents larger than %ld are not accepted,\n"), MAX_EXPN);
	  fprintf (stderr, _("the execution of the program ends now\n"));
	  exit (EXIT_TROUBLE);
	}
      return (tail != ptr + 1 ? tail : ptr);
    }
  else
    return ptr;
}
Exemple #15
0
/* slow, should be optimized */
int UTF8bvnormcmp(
	struct berval *bv1,
	struct berval *bv2,
	unsigned flags,
	void *ctx )
{
	int i, l1, l2, len, ulen, res = 0;
	char *s1, *s2, *done;
	ac_uint4 *ucs, *ucsout1, *ucsout2;

	unsigned casefold = flags & LDAP_UTF8_CASEFOLD;
	unsigned norm1 = flags & LDAP_UTF8_ARG1NFC;
	unsigned norm2 = flags & LDAP_UTF8_ARG2NFC;

	if (bv1 == NULL) {
		return bv2 == NULL ? 0 : -1;

	} else if (bv2 == NULL) {
		return 1;
	}

	l1 = bv1->bv_len;
	l2 = bv2->bv_len;

	len = (l1 < l2) ? l1 : l2;
	if (len == 0) {
		return l1 == 0 ? (l2 == 0 ? 0 : -1) : 1;
	}

	s1 = bv1->bv_val;
	s2 = bv2->bv_val;
	done = s1 + len;

	while ( (s1 < done) && LDAP_UTF8_ISASCII(s1) && LDAP_UTF8_ISASCII(s2) ) {
		if (casefold) {
			char c1 = TOLOWER(*s1);
			char c2 = TOLOWER(*s2);
			res = c1 - c2;
		} else {
			res = *s1 - *s2;
		}			
		s1++;
		s2++;
		if (res) {
			/* done unless next character in s1 or s2 is non-ascii */
			if (s1 < done) {
				if (!LDAP_UTF8_ISASCII(s1) || !LDAP_UTF8_ISASCII(s2)) {
					break;
				}
			} else if (((len < l1) && !LDAP_UTF8_ISASCII(s1)) ||
				((len < l2) && !LDAP_UTF8_ISASCII(s2)))
			{
				break;
			}
			return res;
		}
	}

	/* We have encountered non-ascii or strings equal up to len */

	/* set i to number of iterations */
	i = s1 - done + len;
	/* passed through loop at least once? */
	if (i > 0) {
		if (!res && (s1 == done) &&
		    ((len == l1) || LDAP_UTF8_ISASCII(s1)) &&
		    ((len == l2) || LDAP_UTF8_ISASCII(s2))) {
			/* all ascii and equal up to len */
			return l1 - l2;
		}

		/* rewind one char, and do normalized compare from there */
		s1--;
		s2--;
		l1 -= i - 1;
		l2 -= i - 1;
	}
			
	/* Should first check to see if strings are already in
	 * proper normalized form.
	 */
	ucs = malloc( ( ( norm1 || l1 > l2 ) ? l1 : l2 ) * sizeof(*ucs) );
	if ( ucs == NULL ) {
		return l1 > l2 ? 1 : -1; /* what to do??? */
	}
	
	/*
	 * XXYYZ: we convert to ucs4 even though -llunicode
	 * expects ucs2 in an ac_uint4
	 */
	
	/* convert and normalize 1st string */
	for ( i = 0, ulen = 0; i < l1; i += len, ulen++ ) {
		ucs[ulen] = ldap_x_utf8_to_ucs4( s1 + i );
		if ( ucs[ulen] == LDAP_UCS4_INVALID ) {
			free( ucs );
			return -1; /* what to do??? */
		}
		len = LDAP_UTF8_CHARLEN( s1 + i );
	}

	if ( norm1 ) {
		ucsout1 = ucs;
		l1 = ulen;
		ucs = malloc( l2 * sizeof(*ucs) );
		if ( ucs == NULL ) {
			free( ucsout1 );
			return l1 > l2 ? 1 : -1; /* what to do??? */
		}
	} else {
		uccompatdecomp( ucs, ulen, &ucsout1, &l1, ctx );
		l1 = uccanoncomp( ucsout1, l1 );
	}

	/* convert and normalize 2nd string */
	for ( i = 0, ulen = 0; i < l2; i += len, ulen++ ) {
		ucs[ulen] = ldap_x_utf8_to_ucs4( s2 + i );
		if ( ucs[ulen] == LDAP_UCS4_INVALID ) {
			free( ucsout1 );
			free( ucs );
			return 1; /* what to do??? */
		}
		len = LDAP_UTF8_CHARLEN( s2 + i );
	}

	if ( norm2 ) {
		ucsout2 = ucs;
		l2 = ulen;
	} else {
		uccompatdecomp( ucs, ulen, &ucsout2, &l2, ctx );
		l2 = uccanoncomp( ucsout2, l2 );
		free( ucs );
	}
	
	res = casefold
		? ucstrncasecmp( ucsout1, ucsout2, l1 < l2 ? l1 : l2 )
		: ucstrncmp( ucsout1, ucsout2, l1 < l2 ? l1 : l2 );
	free( ucsout1 );
	free( ucsout2 );

	if ( res != 0 ) {
		return res;
	}
	if ( l1 == l2 ) {
		return 0;
	}
	return l1 > l2 ? 1 : -1;
}
Exemple #16
0
char *
xstormy16_cgen_build_insn_regex (CGEN_INSN *insn)
{
  CGEN_OPCODE *opc = (CGEN_OPCODE *) CGEN_INSN_OPCODE (insn);
  const char *mnem = CGEN_INSN_MNEMONIC (insn);
  char rxbuf[CGEN_MAX_RX_ELEMENTS];
  char *rx = rxbuf;
  const CGEN_SYNTAX_CHAR_TYPE *syn;
  int reg_err;

  syn = CGEN_SYNTAX_STRING (CGEN_OPCODE_SYNTAX (opc));

  /* Mnemonics come first in the syntax string.  */
  if (! CGEN_SYNTAX_MNEMONIC_P (* syn))
    return _("missing mnemonic in syntax string");
  ++syn;

  /* Generate a case sensitive regular expression that emulates case
     insensitive matching in the "C" locale.  We cannot generate a case
     insensitive regular expression because in Turkish locales, 'i' and 'I'
     are not equal modulo case conversion.  */

  /* Copy the literal mnemonic out of the insn.  */
  for (; *mnem; mnem++)
    {
      char c = *mnem;

      if (ISALPHA (c))
	{
	  *rx++ = '[';
	  *rx++ = TOLOWER (c);
	  *rx++ = TOUPPER (c);
	  *rx++ = ']';
	}
      else
	*rx++ = c;
    }

  /* Copy any remaining literals from the syntax string into the rx.  */
  for(; * syn != 0 && rx <= rxbuf + (CGEN_MAX_RX_ELEMENTS - 7 - 4); ++syn)
    {
      if (CGEN_SYNTAX_CHAR_P (* syn))
	{
	  char c = CGEN_SYNTAX_CHAR (* syn);

	  switch (c)
	    {
	      /* Escape any regex metacharacters in the syntax.  */
	    case '.': case '[': case '\\':
	    case '*': case '^': case '$':

#ifdef CGEN_ESCAPE_EXTENDED_REGEX
	    case '?': case '{': case '}':
	    case '(': case ')': case '*':
	    case '|': case '+': case ']':
#endif
	      *rx++ = '\\';
	      *rx++ = c;
	      break;

	    default:
	      if (ISALPHA (c))
		{
		  *rx++ = '[';
		  *rx++ = TOLOWER (c);
		  *rx++ = TOUPPER (c);
		  *rx++ = ']';
		}
	      else
		*rx++ = c;
	      break;
	    }
	}
      else
	{
	  /* Replace non-syntax fields with globs.  */
	  *rx++ = '.';
	  *rx++ = '*';
	}
    }

  /* Trailing whitespace ok.  */
  * rx++ = '[';
  * rx++ = ' ';
  * rx++ = '\t';
  * rx++ = ']';
  * rx++ = '*';

  /* But anchor it after that.  */
  * rx++ = '$';
  * rx = '\0';

  CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
  reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB);

  if (reg_err == 0)
    return NULL;
  else
    {
      static char msg[80];

      regerror (reg_err, (regex_t *) CGEN_INSN_RX (insn), msg, 80);
      regfree ((regex_t *) CGEN_INSN_RX (insn));
      free (CGEN_INSN_RX (insn));
      (CGEN_INSN_RX (insn)) = NULL;
      return msg;
    }
}
Exemple #17
0
static int parseheader(FILE *f, char **headname, char **contents)
{
    char c;
    char name[80], body[1024];
    int off = 0;
    state s = HDR_NAME_START;


    /* there are two ways out of this loop, both via gotos:
       either we successfully read a character (got_header)
       or we hit an error (ph_error) */
    while ((c = getc(f))) {	/* examine each character */
	switch (s) {
	case HDR_NAME_START:
	    if (c == '\r' || c == '\n') {
		/* no header here! */
		goto ph_error;
	    }
	    if (!isalpha(c))
		goto ph_error;
	    name[0] = TOLOWER(c);
	    off = 1;
	    s = HDR_NAME;
	    break;

	case HDR_NAME:
	    if (c == ' ' || c == '\t' || c == ':') {
		name[off] = '\0';
		s = (c == ':' ? HDR_CONTENT_START : COLON);
		break;
	    }
	    if (iscntrl(c)) {
		goto ph_error;
	    }
	    name[off++] = TOLOWER(c);
	    break;

	case COLON:
	    if (c == ':') {
		s = HDR_CONTENT_START;
	    } else if (c != ' ' && c != '\t') {
		goto ph_error;
	    }
	    break;

	case HDR_CONTENT_START:
	    if (c == ' ' || c == '\t') /* eat the whitespace */
		break;
	    off = 0;
	    s = HDR_CONTENT;
	    /* falls through! */
	case HDR_CONTENT:
	    if (c == '\r' || c == '\n') {
		int peek = getc(f);

		/* we should peek ahead to see if it's folded whitespace */
		if (c == '\r' && peek == '\n') {
		    c = getc(f);
		} else {
		    c = peek; /* single newline seperator */
		}
		if (c != ' ' && c != '\t') {
		    /* this is the end of the header */
		    body[off] = '\0';
		    ungetc(c, f);
		    goto got_header;
		}
                /* http://www.faqs.org/rfcs/rfc2822.html
		 *
		 * > Unfolding is accomplished by simply removing any CRLF
		 * > that is immediately followed by WSP
		 *
		 * So keep the actual WSP character
		 */
	    }
	    /* just an ordinary character */
	    body[off++] = c;
	    break;
	}
    }

    /* if we fall off the end of the loop, we hit some sort of error
       condition */

 ph_error:
    if (headname != NULL) *headname = NULL;
    if (contents != NULL) *contents = NULL;
    return -1;

 got_header:
    if (headname != NULL) *headname = xstrdup(name);
    if (contents != NULL) *contents = xstrdup(body);

    return 0;
}
Exemple #18
0
static const char *
parse_insn_normal (CGEN_CPU_DESC cd,
		   const CGEN_INSN *insn,
		   const char **strp,
		   CGEN_FIELDS *fields)
{
  /* ??? Runtime added insns not handled yet.  */
  const CGEN_SYNTAX *syntax = CGEN_INSN_SYNTAX (insn);
  const char *str = *strp;
  const char *errmsg;
  const char *p;
  const CGEN_SYNTAX_CHAR_TYPE * syn;
#ifdef CGEN_MNEMONIC_OPERANDS
  /* FIXME: wip */
  int past_opcode_p;
#endif

  /* For now we assume the mnemonic is first (there are no leading operands).
     We can parse it without needing to set up operand parsing.
     GAS's input scrubber will ensure mnemonics are lowercase, but we may
     not be called from GAS.  */
  p = CGEN_INSN_MNEMONIC (insn);
  while (*p && TOLOWER (*p) == TOLOWER (*str))
    ++p, ++str;

  if (* p)
    return _("unrecognized instruction");

#ifndef CGEN_MNEMONIC_OPERANDS
  if (* str && ! ISSPACE (* str))
    return _("unrecognized instruction");
#endif

  CGEN_INIT_PARSE (cd);
  cgen_init_parse_operand (cd);
#ifdef CGEN_MNEMONIC_OPERANDS
  past_opcode_p = 0;
#endif

  /* We don't check for (*str != '\0') here because we want to parse
     any trailing fake arguments in the syntax string.  */
  syn = CGEN_SYNTAX_STRING (syntax);

  /* Mnemonics come first for now, ensure valid string.  */
  if (! CGEN_SYNTAX_MNEMONIC_P (* syn))
    abort ();

  ++syn;

  while (* syn != 0)
    {
      /* Non operand chars must match exactly.  */
      if (CGEN_SYNTAX_CHAR_P (* syn))
	{
	  /* FIXME: While we allow for non-GAS callers above, we assume the
	     first char after the mnemonic part is a space.  */
	  /* FIXME: We also take inappropriate advantage of the fact that
	     GAS's input scrubber will remove extraneous blanks.  */
	  if (TOLOWER (*str) == TOLOWER (CGEN_SYNTAX_CHAR (* syn)))
	    {
#ifdef CGEN_MNEMONIC_OPERANDS
	      if (CGEN_SYNTAX_CHAR(* syn) == ' ')
		past_opcode_p = 1;
#endif
	      ++ syn;
	      ++ str;
	    }
	  else if (*str)
	    {
	      /* Syntax char didn't match.  Can't be this insn.  */
	      static char msg [80];

	      /* xgettext:c-format */
	      sprintf (msg, _("syntax error (expected char `%c', found `%c')"),
		       CGEN_SYNTAX_CHAR(*syn), *str);
	      return msg;
	    }
	  else
	    {
	      /* Ran out of input.  */
	      static char msg [80];

	      /* xgettext:c-format */
	      sprintf (msg, _("syntax error (expected char `%c', found end of instruction)"),
		       CGEN_SYNTAX_CHAR(*syn));
	      return msg;
	    }
	  continue;
	}

#ifdef CGEN_MNEMONIC_OPERANDS
      (void) past_opcode_p;
#endif
      /* We have an operand of some sort.  */
      errmsg = cd->parse_operand (cd, CGEN_SYNTAX_FIELD (*syn), &str, fields);
      if (errmsg)
	return errmsg;

      /* Done with this operand, continue with next one.  */
      ++ syn;
    }

  /* If we're at the end of the syntax string, we're done.  */
  if (* syn == 0)
    {
      /* FIXME: For the moment we assume a valid `str' can only contain
	 blanks now.  IE: We needn't try again with a longer version of
	 the insn and it is assumed that longer versions of insns appear
	 before shorter ones (eg: lsr r2,r3,1 vs lsr r2,r3).  */
      while (ISSPACE (* str))
	++ str;

      if (* str != '\0')
	return _("junk at end of line"); /* FIXME: would like to include `str' */

      return NULL;
    }

  /* We couldn't parse it.  */
  return _("unrecognized instruction");
}
Exemple #19
0
/* ARGSUSED */
int
desckey(int f, int n)
{
	KEYMAP	*curmap;
	PF	 funct;
	int	 c, m, i, num;
	char	*pep;
	char	 dprompt[80];

#ifndef NO_MACRO
	if (inmacro)
		return (TRUE);	/* ignore inside keyboard macro */
#endif /* !NO_MACRO */
	num = strlcpy(dprompt, "Describe key briefly: ", sizeof(dprompt));
	if (num >= sizeof(dprompt))
		num = sizeof(dprompt) - 1;
	pep = dprompt + num;
	key.k_count = 0;
	m = curbp->b_nmodes;
	curmap = curbp->b_modes[m]->p_map;
	for (;;) {
		for (;;) {
			ewprintf("%s", dprompt);
			pep[-1] = ' ';
			pep = getkeyname(pep, sizeof(dprompt) - (pep - dprompt),
			    key.k_chars[key.k_count++] = c = getkey(FALSE));
			if ((funct = doscan(curmap, c, &curmap)) != NULL)
				break;
			*pep++ = '-';
			*pep = '\0';
		}
		if (funct != rescan)
			break;
		if (ISUPPER(key.k_chars[key.k_count - 1])) {
			funct = doscan(curmap,
			    TOLOWER(key.k_chars[key.k_count - 1]), &curmap);
			if (funct == NULL) {
				*pep++ = '-';
				*pep = '\0';
				continue;
			}
			if (funct != rescan)
				break;
		}
nextmode:
		if (--m < 0)
			break;
		curmap = curbp->b_modes[m]->p_map;
		for (i = 0; i < key.k_count; i++) {
			funct = doscan(curmap, key.k_chars[i], &curmap);
			if (funct != NULL) {
				if (i == key.k_count - 1 && funct != rescan)
					goto found;
				funct = rescan;
				goto nextmode;
			}
		}
		*pep++ = '-';
		*pep = '\0';
	}
found:
	if (funct == rescan || funct == selfinsert)
		ewprintf("%k is not bound to any function");
	else if ((pep = (char *)function_name(funct)) != NULL)
		ewprintf("%k runs the command %s", pep);
	else
		ewprintf("%k is bound to an unnamed function");
	return (TRUE);
}
static int XLateKey( XKeyEvent *ev )
{

	int key;
	char buf[ 64 ];
	KeySym keysym;

	key = 0;

	XLookupString( ev, buf, sizeof buf, &keysym, 0 );

	switch( keysym )
	{
		case XK_KP_Page_Up:	key = K_KP_PGUP;	break;
		case XK_Page_Up:	key = K_PGUP;		break;

		case XK_KP_Page_Down:	key = K_KP_PGDN; 	break;
		case XK_Page_Down:	key = K_PGDN; 		break;

		case XK_KP_Home:	key = K_KP_HOME; 	break;
		case XK_Home:		key = K_HOME; 		break;

		case XK_KP_End:		key = K_KP_END; 	break;
		case XK_End:		key = K_END; 		break;

		case XK_KP_Left: 	key = K_KP_LEFTARROW;	break;
		case XK_Left:	 	key = K_LEFTARROW; 	break;

		case XK_KP_Right:	key = K_KP_RIGHTARROW;	break;
		case XK_Right:		key = K_RIGHTARROW;	break;

		case XK_KP_Down: 	key = K_KP_DOWNARROW; 	break;
		case XK_Down:	 	key = K_DOWNARROW; 	break;

		case XK_KP_Up:		key = K_KP_UPARROW;	break;
		case XK_Up:			key = K_UPARROW;	break;

		case XK_Escape:		key = K_ESCAPE;		break;

		case XK_KP_Enter:	key = K_KP_ENTER;	break;
		case XK_Return:		key = K_ENTER;		break;

		case XK_Tab:		key = K_TAB;		break;

		case XK_F1:		key = K_F1;		break;

		case XK_F2:		key = K_F2;		break;

		case XK_F3:		key = K_F3;		break;

		case XK_F4:		key = K_F4;		break;

		case XK_F5:		key = K_F5;		break;

		case XK_F6:		key = K_F6;		break;

		case XK_F7:		key = K_F7;		break;

		case XK_F8:		key = K_F8;		break;

		case XK_F9:		key = K_F9;		break;

		case XK_F10:		key = K_F10;		break;

		case XK_F11:		key = K_F11;		break;

		case XK_F12:		key = K_F12;		break;

		case XK_BackSpace: 	key = K_BACKSPACE;	break;

		case XK_KP_Delete: 	key = K_KP_DEL;		break;
		case XK_Delete: 	key = K_DEL;		break;

		case XK_Pause:		key = K_PAUSE;		break;

		case XK_Shift_L:
		case XK_Shift_R:	key = K_SHIFT;		break;

		case XK_Execute: 
		case XK_Control_L: 
		case XK_Control_R:	key = K_CTRL;		break;

		case XK_Alt_L:	
		case XK_Meta_L: 
		case XK_Alt_R:	
		case XK_Meta_R: 	key = K_ALT;		break;

		case XK_KP_Begin: 	key = K_KP_5;		break;

		case XK_Insert:		key = K_INS;		break;
		case XK_KP_Insert: 	key = K_KP_INS;		break;

		case XK_KP_Multiply:	key = '*';		break;
		case XK_KP_Add:		key = K_KP_PLUS;	break;
		case XK_KP_Subtract:	key = K_KP_MINUS;	break;
		case XK_KP_Divide:	key = K_KP_SLASH;	break;

#if 0
		case 0x021: key = '1';break;/* [!] */
		case 0x040: key = '2';break;/* [@] */
		case 0x023: key = '3';break;/* [#] */
		case 0x024: key = '4';break;/* [$] */
		case 0x025: key = '5';break;/* [%] */
		case 0x05e: key = '6';break;/* [^] */
		case 0x026: key = '7';break;/* [&] */
		case 0x02a: key = '8';break;/* [*] */
		case 0x028: key = '9';;break;/* [(] */
		case 0x029: key = '0';break;/* [)] */
		case 0x05f: key = '-';break;/* [_] */
		case 0x02b: key = '=';break;/* [+] */
		case 0x07c: key = '\'';break;/* [|] */
		case 0x07d: key = '[';break;/* [}] */
		case 0x07b: key = ']';break;/* [{] */
		case 0x022: key = '\'';break;/* ["] */
		case 0x03a: key = ';';break;/* [:] */
		case 0x03f: key = '/';break;/* [?] */
		case 0x03e: key = '.';break;/* [>] */
		case 0x03c: key = ',';break;/* [<] */
#endif

		default:
			key = *(PW8)buf;
			key = TOLOWER( key );
			break;
	} 

	return key;
}