Example #1
0
static void
tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
{
  size_t i;
  int attrind;
  int first = -1;

  for (i = 0; i < countof (tag_url_attributes); i++)
    if (tag_url_attributes[i].tagid == tagid)
      {
        /* We've found the index of tag_url_attributes where the
           attributes of our tag begin.  */
        first = i;
        break;
      }
  assert (first != -1);

  /* Loop over the "interesting" attributes of this tag.  In this
     example, it will loop over "src" and "lowsrc".

       <img src="foo.png" lowsrc="bar.png">

     This has to be done in the outer loop so that the attributes are
     processed in the same order in which they appear in the page.
     This is required when converting links.  */

  for (attrind = 0; attrind < tag->nattrs; attrind++)
    {
      /* Find whether TAG/ATTRIND is a combination that contains a
         URL. */
      char *link = tag->attrs[attrind].value;
      const size_t size = countof (tag_url_attributes);

      /* If you're cringing at the inefficiency of the nested loops,
         remember that they both iterate over a very small number of
         items.  The worst-case inner loop is for the IMG tag, which
         has three attributes.  */
      for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
        {
          if (0 == strcasecmp (tag->attrs[attrind].name,
                               tag_url_attributes[i].attr_name))
            {
              struct urlpos *up = append_url (link, ATTR_POS(tag,attrind,ctx),
                                              ATTR_SIZE(tag,attrind), ctx);
              if (up)
                {
                  int flags = tag_url_attributes[i].flags;
                  if (flags & ATTR_INLINE)
                    up->link_inline_p = 1;
                  if (flags & ATTR_HTML)
                    up->link_expect_html = 1;
                }
            }
        }
    }
}
Example #2
0
static void
tag_handle_form (int tagid, struct taginfo *tag, struct map_context *ctx)
{
  int attrind;
  char *action = find_attr (tag, "action", &attrind);

  if (action)
    {
      struct urlpos *up = append_url (action, ATTR_POS(tag,attrind,ctx),
                                      ATTR_SIZE(tag,attrind), ctx);
      if (up)
        up->ignore_when_downloading = 1;
    }
}
Example #3
0
static void
tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
{
  int attrind;
  char *href = find_attr (tag, "href", &attrind);

  /* All <link href="..."> link references are external, except those
     known not to be, such as style sheet and shortcut icon:

     <link rel="stylesheet" href="...">
     <link rel="shortcut icon" href="...">
  */
  if (href)
    {
      struct urlpos *up = append_url (href, ATTR_POS(tag,attrind,ctx),
                                      ATTR_SIZE(tag,attrind), ctx);
      if (up)
        {
          char *rel = find_attr (tag, "rel", NULL);
          if (rel)
            {
              if (0 == strcasecmp (rel, "stylesheet"))
                {
                  up->link_inline_p = 1;
                  up->link_expect_css = 1;
                }
              else if (0 == strcasecmp (rel, "shortcut icon"))
                {
                  up->link_inline_p = 1;
                }
              else
                {
                  /* The external ones usually point to HTML pages, such as
                     <link rel="next" href="...">
                     except when the type attribute says otherwise:
                     <link rel="alternate" type="application/rss+xml" href=".../?feed=rss2" />
                  */
                  char *type = find_attr (tag, "type", NULL);
                  if (!type || strcasecmp (type, "text/html") == 0)
                    up->link_expect_html = 1;
                }
            }
        }
    }
}
Example #4
0
static void
tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
{
  struct urlpos *base_urlpos;
  int attrind;
  char *newbase = find_attr (tag, "href", &attrind);
  if (!newbase)
    return;

  base_urlpos = append_url (newbase, ATTR_POS(tag,attrind,ctx),
                            ATTR_SIZE(tag,attrind), ctx);
  if (!base_urlpos)
    return;
  base_urlpos->ignore_when_downloading = 1;
  base_urlpos->link_base_p = 1;

  if (ctx->base)
    xfree (ctx->base);
  if (ctx->parent_base)
    ctx->base = uri_merge (ctx->parent_base, newbase);
  else
    ctx->base = xstrdup (newbase);
}
Example #5
0
void
get_urls_css (struct map_context *ctx, int offset, int buf_length)
{
  int token;
  /*char tmp[2048];*/
  int buffer_pos = 0;
  int pos, length;
  char *uri;

  /*
  strncpy(tmp,ctx->text + offset, buf_length);
  tmp[buf_length] = '\0';
  DEBUGP (("get_urls_css: \"%s\"\n", tmp));
  */

  /* tell flex to scan from this buffer */
  yy_scan_bytes (ctx->text + offset, buf_length);

  while((token = yylex()) != CSSEOF)
    {
      /*DEBUGP (("%s ", token_names[token]));*/
      /* @import "foo.css"
         or @import url(foo.css)
      */
      if(token == IMPORT_SYM)
        {
          do {
            buffer_pos += yyleng;
          } while((token = yylex()) == S);

          /*DEBUGP (("%s ", token_names[token]));*/

          if (token == STRING || token == URI)
            {
              /*DEBUGP (("Got URI "));*/
              pos = buffer_pos + offset;
              length = yyleng;

              if (token == URI)
                {
                  uri = get_uri_string (ctx->text, &pos, &length);
                }
              else
                {
                  /* cut out quote characters */
                  pos++;
                  length -= 2;
                  uri = xmalloc (length + 1);
                  strncpy (uri, yytext + 1, length);
                  uri[length] = '\0';
                }

              if (uri)
                {
                  struct urlpos *up = append_url (uri, pos, length, ctx);
                  DEBUGP (("Found @import: [%s] at %d [%s]\n", yytext, buffer_pos, uri));

                  if (up)
                    {
                      up->link_inline_p = 1;
                      up->link_css_p = 1;
                      up->link_expect_css = 1;
                    }

                  xfree(uri);
                }
            }
        }
      /* background-image: url(foo.png)
         note that we don't care what
         property this is actually on.
      */
      else if(token == URI)
        {
          pos = buffer_pos + offset;
          length = yyleng;
          uri = get_uri_string (ctx->text, &pos, &length);

          if (uri)
            {
              struct urlpos *up = append_url (uri, pos, length, ctx);
              DEBUGP (("Found URI: [%s] at %d [%s]\n", yytext, buffer_pos, uri));
              if (up)
                {
                  up->link_inline_p = 1;
                  up->link_css_p = 1;
                }

              xfree (uri);
            }
        }
      buffer_pos += yyleng;
    }
  DEBUGP (("\n"));
}
Example #6
0
static void
tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
{
  char *name = find_attr (tag, "name", NULL);
  char *http_equiv = find_attr (tag, "http-equiv", NULL);

  if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
    {
      /* Some pages use a META tag to specify that the page be
         refreshed by a new page after a given number of seconds.  The
         general format for this is:

           <meta http-equiv=Refresh content="NUMBER; URL=index2.html">

         So we just need to skip past the "NUMBER; URL=" garbage to
         get to the URL.  */

      struct urlpos *entry;
      int attrind;
      int timeout = 0;
      char *p;

      char *refresh = find_attr (tag, "content", &attrind);
      if (!refresh)
        return;

      for (p = refresh; c_isdigit (*p); p++)
        timeout = 10 * timeout + *p - '0';
      if (*p++ != ';')
        return;

      while (c_isspace (*p))
        ++p;
      if (!(   c_toupper (*p)       == 'U'
            && c_toupper (*(p + 1)) == 'R'
            && c_toupper (*(p + 2)) == 'L'
            &&          *(p + 3)  == '='))
        return;
      p += 4;
      while (c_isspace (*p))
        ++p;

      entry = append_url (p, ATTR_POS(tag,attrind,ctx),
                          ATTR_SIZE(tag,attrind), ctx);
      if (entry)
        {
          entry->link_refresh_p = 1;
          entry->refresh_timeout = timeout;
          entry->link_expect_html = 1;
        }
    }
  else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
    {
      /* Handle stuff like:
         <meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */

      char *mcharset;
      char *content = find_attr (tag, "content", NULL);
      if (!content)
        return;

      mcharset = parse_charset (content);
      if (!mcharset)
        return;

      xfree_null (meta_charset);
      meta_charset = mcharset;
    }
  else if (name && 0 == strcasecmp (name, "robots"))
    {
      /* Handle stuff like:
         <meta name="robots" content="index,nofollow"> */
      char *content = find_attr (tag, "content", NULL);
      if (!content)
        return;
      if (!strcasecmp (content, "none"))
        ctx->nofollow = true;
      else
        {
          while (*content)
            {
              char *end;
              /* Skip any initial whitespace. */
              content += strspn (content, " \f\n\r\t\v");
              /* Find the next occurrence of ',' or whitespace,
               * or the end of the string.  */
              end = content + strcspn (content, ", \f\n\r\t\v");
              if (!strncasecmp (content, "nofollow", end - content))
                ctx->nofollow = true;
              /* Skip past the next comma, if any. */
              if (*end == ',')
                ++end;
              else
                {
                  end = strchr (end, ',');
                  if (end)
                    ++end;
                  else
                    end = content + strlen (content);
                }
              content = end;
            }
        }
    }
}
Example #7
0
/* Invoke the browser on the given URL */
const char *
invoke(const char *browser, const char *url)
{
    int did_subst = 0;
    const char *point = browser;
    int quote_count = 0;
    int status;

    /* Reset the buffer */
    cmd_index = 0;

    /* Copy from the browser string */
    for (;;) {
        int ch = *point;

        switch (ch) {
        case '\0':
        case ':':
            /* End of the browser string.  Insert the URL if we
             * haven't done so already */
            if (!did_subst) {
                append_char(' ');
                append_url(url, quote_count);
            }

            /* Null-terminate the command */
            append_char('\0');

            /* Invoke the command */
            xdprintf(1, "exec: %s\n", cmd_buffer);

            status = system(cmd_buffer);
            if (status < 0) {
                perror("fork() failed");
                exit(1);
            }

            /* If successful return NULL */
            if (WEXITSTATUS(status) == 0) {
                xdprintf(2, "ok\n");
                return NULL;
            }

            xdprintf(2, "failed: %d\n", WEXITSTATUS(status));
            return ch == '\0' ? point : point + 1;

        case '"':
            /* Toggle double-quotes if appropriate */
            if (quote_count == 2) {
                quote_count = 0;
            } else if (quote_count == 0) {
                quote_count = 2;
            }

            append_char(ch);
            break;

        case '\'':
            /* Toggle single-quotes if appropriate */
            if (quote_count == 1) {
                quote_count = 0;
            } else if (quote_count == 0) {
                quote_count = 1;
            }

            append_char(ch);
            break;

        case '%':
            /* %-escapes */
            ch = point[1];

            /* Watch for the URL substitution */
            if (ch == 's') {
                append_url(url, quote_count);

                /* Skip ahead */
                did_subst = 1;
                point++;
                break;
            }

            /* Watch for odd EOF */
            if (ch == '\0') {
                append_char('%');
                break;
            }

            /* Otherwise drop the initial % */
            append_char(ch);
            point++;
            break;

        default:
            append_char(ch);
            break;
        }

        point++;
    }
}