Ejemplo n.º 1
0
/* Parse the res file */
bool resLoad(const char *pResFile, SDWORD blockID)
{
	bool retval = true;
	lexerinput_t input;

	sstrcpy(aCurrResDir, aResDir);

	// Note the block id number
	resBlockID = blockID;

	debug(LOG_WZ, "resLoad: loading [directory: %s] %s", PHYSFS_getRealDir(pResFile), pResFile);

	// Load the RES file; allocate memory for a wrf, and load it
	input.type = LEXINPUT_PHYSFS;
	input.input.physfsfile = openLoadFile(pResFile, true);
	if (!input.input.physfsfile)
	{
		debug(LOG_FATAL, "Could not open file %s", pResFile);
		return false;
	}

	// and parse it
	res_set_extra(&input);
	if (res_parse() != 0)
	{
		debug(LOG_FATAL, "Failed to parse %s", pResFile);
		retval = false;
	}

	res_lex_destroy();
	PHYSFS_close(input.input.physfsfile);

	return retval;
}
Ejemplo n.º 2
0
struct robot_specs *
res_parse_from_file (const char *filename)
{
  struct robot_specs *specs;
  struct file_memory *fm = wget_read_file (filename);
  if (!fm)
    {
      logprintf (LOG_NOTQUIET, _("Cannot open %s: %s"),
                 filename, strerror (errno));
      return NULL;
    }
  specs = res_parse (fm->content, fm->length);
  wget_read_file_free (fm);
  return specs;
}
Ejemplo n.º 3
0
// Perform a DNS query and parse the results.  Follows CNAME records.
void SipSrvLookup::res_query_and_parse(const char* in_name,
                                       int type,
                                       res_response* in_response,
                                       const char*& out_name,
                                       res_response*& out_response
   )
{
   OsSysLog::add(FAC_SIP, PRI_DEBUG,
                 "SipSrvLookup::res_query_and_parse in_name = '%s', "
                 "type = %d (%s)",
                 in_name,type,
                 type == T_CNAME ? "CNAME" :
                 type == T_SRV ? "SRV" :
                 type == T_A ? "A" :
                 type == T_NAPTR ? "NAPTR" :
                 "unknown");

   // The number of CNAMEs we have followed.
   int cname_count = 0;
   // The response currently being examined.
   res_response* response = in_response;
   // The name currently being examined.
   const char* name = in_name;
   // TRUE if 'response' was a lookup for 'name' and 'type'.
   UtlBoolean response_for_this_name = FALSE;
   // Buffer into which to read DNS replies.
   char answer[DNS_RESPONSE_SIZE];
   union u_rdata* p;

   // Loop until we find a reason to exit.  Each turn around the loop does
   // another DNS lookup.
   while (1)
   {
      // While response != NULL and there is a CNAME record for name
      // in response.
      while (response != NULL &&
             (p = look_for(response, name, T_CNAME)) != NULL)
      {
         cname_count++;
         if (cname_count > SipSrvLookup::getOption(SipSrvLookup::OptionCodeCNAMELimit))
         {
            break;
         }
         // If necessary, free the current 'name'.
         if (name != in_name)
         {
            free((void*) name);
         }
         // Copy the canonical name from the CNAME record into 'name', so
         // we can still use it after freeing 'response'.
         name = strdup(p->string);
         // Remember that we are now looking for a name that was not the one
         // that we searched for to produce this response.  Hence, if we don't
         // find any RRs for it, that is not authoritative and we have to do
         // another DNS query.
         response_for_this_name = FALSE;
         // Go back and check whether the result name of the CNAME is listed
         // in this response.
      }
      // This response does not contain a CNAME for 'name'.  So it is either
      // a final response that gives us the RRs we are looking for, or
      // we need to do a lookup on 'name'.

      // Check whether the response was for this name, or contains records
      // of the type we are looking for.  If either, then any records we
      // are looking for are in this response, so we can return.
      if (response_for_this_name ||
          (response != NULL && look_for(response, name, type) != NULL))
      {
         break;
      }

      // We must do another lookup.
      // Start by freeing 'response' if we need to.
      if (response != in_response)
      {
         res_free(response);
      }
      response = NULL;
      // Now, 'response' will be from a query for 'name'.
      response_for_this_name = TRUE;
      // Debugging print.
      if (SipSrvLookup::getOption(SipSrvLookup::OptionCodePrintAnswers))
      {
         printf("res_nquery(\"%s\", class = %d, type = %d)\n",
                name, C_IN, type);
      }

      // Initialize the res state struct and set the timeout to
      // 3 secs and retries to 2
      struct __res_state res;
      res_ninit(&res);
      res.retrans = mTimeout;
      res.retry = mRetries;

      if (!mNameserverIP.isNull())
      {
          res.nscount = 1;
          inet_aton(mNameserverIP.data(), &res.nsaddr_list[0].sin_addr);

          if (mNameserverPort > 1)
          {
             res.nsaddr_list[0].sin_port = htons(mNameserverPort);
          }
      }

      // Use res_nquery, not res_search or res_query, so defaulting rules are not
      // applied to the domain, and so that the query is thread-safe.
      int r = res_nquery(&res, name, C_IN, type,
                         (unsigned char*) answer, sizeof (answer));
      // Done with res state struct, so cleanup.
      // Must close once and only once per res_ninit, after res_nquery.
      res_nclose(&res);

      if (r == -1)
      {
         // res_query failed, return.
         OsSysLog::add(FAC_SIP, PRI_WARNING,
                       "DNS query for name '%s', "
                       "type = %d (%s): returned error",
                       name, type,
                       type == T_CNAME ? "CNAME" :
                       type == T_SRV ? "SRV" :
                       type == T_A ? "A" :
                       type == T_NAPTR ? "NAPTR" :
                       "unknown");
         break;
      }

      response = res_parse((char*) &answer);
      if (response == NULL)
      {
         // res_parse failed, return.
         OsSysLog::add(FAC_SIP, PRI_WARNING,
                       "DNS query for name '%s', "
                       "type = %d (%s): response could not be parsed",
                       name, type,
                       type == T_CNAME ? "CNAME" :
                       type == T_SRV ? "SRV" :
                       type == T_A ? "A" :
                       type == T_NAPTR ? "NAPTR" :
                       "unknown");
         break;
      }
      // If requested for testing purposes, sort the query and print it.
      // Sort first, so we see how sorting came out.
      if (SipSrvLookup::getOption(SipSrvLookup::OptionCodeSortAnswers))
      {
         sort_answers(response);
      }
      if (SipSrvLookup::getOption(SipSrvLookup::OptionCodePrintAnswers))
      {
         res_print(response);
      }
      // Now that we have a fresh DNS query to analyze, go back and check it
      // for a CNAME for 'name' and then for records of the requested type.
   }

   // Final processing:  Copy the working name and response to the output
   // variables.
   out_name = name;
   out_response = response;
   OsSysLog::add(FAC_SIP, PRI_DEBUG,
                 "SipSrvLookup::res_query_and_parse out_name = '%s', out_response = %p",
                 out_name, out_response);

}
Ejemplo n.º 4
0
static int
download_child_p (const struct urlpos *upos, struct url *parent, int depth,
		  struct url *start_url_parsed, struct hash_table *blacklist)
{
  struct url *u = upos->url;
  const char *url = u->url;
  int u_scheme_like_http;

  DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));

  if (string_set_contains (blacklist, url))
    {
      DEBUGP (("Already on the black list.\n"));
      goto out;
    }

  /* Several things to check for:
     1. if scheme is not http, and we don't load it
     2. check for relative links (if relative_only is set)
     3. check for domain
     4. check for no-parent
     5. check for excludes && includes
     6. check for suffix
     7. check for same host (if spanhost is unset), with possible
     gethostbyname baggage
     8. check for robots.txt

     Addendum: If the URL is FTP, and it is to be loaded, only the
     domain and suffix settings are "stronger".

     Note that .html files will get loaded regardless of suffix rules
     (but that is remedied later with unlink) unless the depth equals
     the maximum depth.

     More time- and memory- consuming tests should be put later on
     the list.  */

  /* Determine whether URL under consideration has a HTTP-like scheme. */
  u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);

  /* 1. Schemes other than HTTP are normally not recursed into. */
  if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
    {
      DEBUGP (("Not following non-HTTP schemes.\n"));
      goto out;
    }

  /* 2. If it is an absolute link and they are not followed, throw it
     out.  */
  if (u_scheme_like_http)
    if (opt.relative_only && !upos->link_relative_p)
      {
	DEBUGP (("It doesn't really look like a relative link.\n"));
	goto out;
      }

  /* 3. If its domain is not to be accepted/looked-up, chuck it
     out.  */
  if (!accept_domain (u))
    {
      DEBUGP (("The domain was not accepted.\n"));
      goto out;
    }

  /* 4. Check for parent directory.

     If we descended to a different host or changed the scheme, ignore
     opt.no_parent.  Also ignore it for documents needed to display
     the parent page when in -p mode.  */
  if (opt.no_parent
      && schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
      && 0 == strcasecmp (u->host, start_url_parsed->host)
      && u->port == start_url_parsed->port
      && !(opt.page_requisites && upos->link_inline_p))
    {
      if (!frontcmp (start_url_parsed->dir, u->dir))
	{
	  DEBUGP (("Going to \"%s\" would escape \"%s\" with no_parent on.\n",
		   u->dir, start_url_parsed->dir));
	  goto out;
	}
    }

  /* 5. If the file does not match the acceptance list, or is on the
     rejection list, chuck it out.  The same goes for the directory
     exclusion and inclusion lists.  */
  if (opt.includes || opt.excludes)
    {
      if (!accdir (u->dir, ALLABS))
	{
	  DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
	  goto out;
	}
    }

  /* 6. Check for acceptance/rejection rules.  We ignore these rules
     for directories (no file name to match) and for HTML documents,
     which might lead to other files that do need to be downloaded.
     That is, unless we've exhausted the recursion depth anyway.  */
  if (u->file[0] != '\0'
      && !(has_html_suffix_p (u->file)
	   && depth != INFINITE_RECURSION
	   && depth < opt.reclevel - 1))
    {
      if (!acceptable (u->file))
	{
	  DEBUGP (("%s (%s) does not match acc/rej rules.\n",
		   url, u->file));
	  goto out;
	}
    }

  /* 7. */
  if (schemes_are_similar_p (u->scheme, parent->scheme))
    if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
      {
	DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
		 u->host, parent->host));
	goto out;
      }

  /* 8. */
  if (opt.use_robots && u_scheme_like_http)
    {
      struct robot_specs *specs = res_get_specs (u->host, u->port);
      if (!specs)
	{
	  char *rfile;
	  if (res_retrieve_file (url, &rfile))
	    {
	      specs = res_parse_from_file (rfile);
	      xfree (rfile);
	    }
	  else
	    {
	      /* If we cannot get real specs, at least produce
		 dummy ones so that we can register them and stop
		 trying to retrieve them.  */
	      specs = res_parse ("", 0);
	    }
	  res_register_specs (u->host, u->port, specs);
	}

      /* Now that we have (or don't have) robots.txt specs, we can
	 check what they say.  */
      if (!res_match_path (specs, u->path))
	{
	  DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
	  string_set_add (blacklist, url);
	  goto out;
	}
    }

  /* The URL has passed all the tests.  It can be placed in the
     download queue. */
  DEBUGP (("Decided to load it.\n"));

  return 1;

 out:
  DEBUGP (("Decided NOT to load it.\n"));

  return 0;
}