コード例 #1
0
void test_find_urls()
{
    int test_counter = 1;
    std::deque<st_url> test_urls, correct_urls;
    std::fstream correct_file;
    std::string test1 = "http://www.google.com/test/test1", 
                test2 = "https://www.reddit.com/test", 
                test3 = "test3",
                test4 = "http://www.test4.com",
                test5 = "http://www.test5.com/one/",
                test6 = "www.example.com",
                test7 = "www.example.com/",
                test8 = "http://fonts.googleapis.com/css?family=Fenix",
                test_string;
                
    find_urls(test1, test_urls);
    find_urls(test2, test_urls);
    find_urls(test3, test_urls);
    find_urls(test4, test_urls);
    find_urls(test5, test_urls);
    find_urls(test6, test_urls);
    find_urls(test7, test_urls);  
    find_urls(test8, test_urls);
    
    correct_file.open("testing/correctfind_urls.txt");    
    
    std::cout << std::right << std::setw(18) << "EXPECTED" << std::setw(32)
              << "ACTUAL " << std::setw(28) << " STATUS\n" << std::endl;
    
    while(correct_file >> test_string)
    {
        std::cout << "\nTest " << test_counter << ":  " << std::endl;
        if(test_string == "#")
        {
            std::cout << std::right << std::setw(75) << "PASS" << std::endl;
            test_counter++;
            continue;
        }
        if(test_string == test_urls.front().hostname)
        { 
            std::cout << "          " << std::left << std::setw(33) 
                      << test_string << std::setw(28) 
                      << test_urls.front().hostname << "PASS" << std::endl;
            correct_file >> test_string;

            if(test_string == test_urls.front().subdirectory)
                std::cout << "          " << std::left << std::setw(33) 
                          << test_string << std::setw(28) 
                          << test_urls.front().subdirectory << "PASS"
                          << std::endl;
            else
                std::cout << "           " << std::left << std::setw(33) 
                      << test_string << std::setw(28) 
                      << test_urls.front().subdirectory << "FAIL" << std::endl;
        }
        else
        {
コード例 #2
0
ファイル: spider.c プロジェクト: codeape/polyorc
/* Find urls in a page and keep them */
static void analyze_page(global_info *global, conn_info *conn) {
    /* Analyze */
    int matches = 0;
    global->input.url = conn->url;
    if(-1 == (matches = find_urls(conn->memory, &(global->input))))
    {
        if (0 != global->input.ret_len) {
            free_array_of_charptr_incl(&(global->input.ret),
                                       global->input.ret_len);
        }
        exit(EXIT_FAILURE);
    }
    global->input.url = 0;

    /* Save */
    int i;
    for (i = 0; i < matches; i++) {
        url_add(global, global->input.ret[i]);
        global->input.ret[i] = 0;
    }
}
コード例 #3
0
ファイル: bti.c プロジェクト: thesues/bti
static char *shrink_urls(char *text)
{
	int *ranges;
	int rcount;
	int i;
	int inofs = 0;
	int outofs = 0;
	const char *const shrink_args[] = {
		"bti-shrink-urls",
		NULL
	};
	int shrink_pid;
	int shrink_pipe[3];
	int inlen = strlen(text);

	dbg("before len=%u\n", inlen);

	shrink_pid = popenRWE(shrink_pipe, shrink_args[0], shrink_args);
	if (shrink_pid < 0)
		return text;

	rcount = find_urls(text, &ranges);
	if (!rcount)
		return text;

	for (i = 0; i < rcount; i += 2) {
		int url_start = ranges[i];
		int url_end = ranges[i + 1];
		int long_url_len = url_end - url_start;
		char *url = strndup(text + url_start, long_url_len);
		int short_url_len;
		int not_url_len = url_start - inofs;

		dbg("long  url[%u]: %s\n", long_url_len, url);
		url = shrink_one_url(shrink_pipe, url);
		short_url_len = url ? strlen(url) : 0;
		dbg("short url[%u]: %s\n", short_url_len, url);

		if (!url || short_url_len >= long_url_len) {
			/* The short url ended up being too long
			 * or unavailable */
			if (inofs) {
				strncpy(text + outofs, text + inofs,
					not_url_len + long_url_len);
			}
			inofs += not_url_len + long_url_len;
			outofs += not_url_len + long_url_len;

		} else {
			/* copy the unmodified block */
			strncpy(text + outofs, text + inofs, not_url_len);
			inofs += not_url_len;
			outofs += not_url_len;

			/* copy the new url */
			strncpy(text + outofs, url, short_url_len);
			inofs += long_url_len;
			outofs += short_url_len;
		}

		free(url);
	}

	/* copy the last block after the last match */
	if (inofs) {
		int tail = inlen - inofs;
		if (tail) {
			strncpy(text + outofs, text + inofs, tail);
			outofs += tail;
		}
	}

	free(ranges);

	(void)pcloseRWE(shrink_pid, shrink_pipe);

	text[outofs] = 0;
	dbg("after len=%u\n", outofs);
	return text;
}
コード例 #4
0
/*
  Takes a single line as input (\n included)
  Translate tabs to spaces; translate the characters that are not
  allowed in a QTextEdit; highlight searched text;
  optionally put URLs inside href tags
*/
QString
mail_displayer::expand_body_line(const QString& line,
				 const display_prefs& prefs)
{
  int len = line.length();
  const int tabsize=8;
  QString exp_s;
  static const QChar tab = QChar('\t');
  static const QChar lbracket = QChar('<');
  static const QChar rbracket = QChar('>');
  static const QChar equal_sign = QChar('=');
  static const QChar amp = QChar('&');
  static const QChar ctrl_92 = QChar((ushort)0x92);
  int col = 0;			// current column
  int incr = 0;			// increment in source to next character
  int pos = 0; 			// position in the source
  QChar last_src_char;
  // positions of the occurrences of a searched text within the original string
  std::list<uint> hilight_list;

  std::pair<int,int> cur_url;
  std::list<std::pair<int,int> > urls;
  if (prefs.m_clickable_urls) {
    find_urls(line, &urls);
  }
  if (!urls.empty()) {
    cur_url = urls.front();
    urls.pop_front();
  }
  else
    cur_url = std::pair<int,int>(-1,-1);

  while (pos < len) {
    incr = 1;			// default increment in the source

    // TODO: accelerate the most common case by processing first [a-zA-Z0-9] chars

    if (pos == cur_url.first) {
      /* TODO: adjust cur_url.second for the delta of sequences that expand
	 or shrink between source and destination */
      exp_s.append(QString("<a href=\"%1\">").arg(line.mid(pos, cur_url.second)));
    }
    // contents
    const QChar c = line.at(pos);

    if (c == ' ') {
      // repeated spaces are ignored as in html, so we use &nbsp; instead
      // (but only in the case of repeated spaces to limit the overhead)
      if (last_src_char==QChar(' ') || last_src_char==tab) {
	exp_s.append("&nbsp;");
      }
      else {
	exp_s.append(c);
      }
      col++;
    }
    else if (c=='\n') {
      exp_s.append("<br>");
      col=0;
    }
    else if (c == tab) {
      int j;
      for (j=0; j<(int)(tabsize-(col%tabsize)); j++)
	exp_s.append("&nbsp;");
      col += j;
    }
    else if (c == lbracket) {
      exp_s.append("&lt;");
      col++;
    }
    else if (c == rbracket) {
      exp_s.append("&gt;");
      col++;
    }
    else if (c == amp) {
      exp_s.append("&amp;");
      col++;
    }
    else if (c == ctrl_92) {
      // Unicode 0x92 is not displayed by QTextBrowser but is produced
      // by Outlook so we replace it by a basic simple quote
      exp_s.append(QChar(0x27));
      col++;
    }
    else if (c.unicode()>=(ushort)0x80 && c.unicode()<=(ushort)0x9F) {
      /* Unicode characters from the "other, Control category" and
	 whose codes are higher than 0x80 are expressed as HTML codes
	 because QTextBrowser doesn't render them correctly */
      exp_s.append(QString("&#x%1;").arg(c.unicode(), 0, 16));
    }
    else {
      // check for quoted printable =XY sequence
      if (prefs.m_decode_qp && (c == equal_sign)) {
	QString decoded_qp = consume_qp(line, &pos, len, prefs);
	if (!decoded_qp.isEmpty()) {
	  exp_s.append(decoded_qp);
	  col += decoded_qp.length();
	  incr = 0;		// pos has already been adjusted by consume_qp
	}
      }
      else {
	// default case
	exp_s.append(c);
	col++;
      }
    }

    if (pos == (cur_url.first+cur_url.second-1)) {
      exp_s.append("</a>");
      if (!urls.empty()) {
	cur_url = urls.front();	// next URL
	urls.pop_front();
      }
      else
	cur_url = std::pair<int,int>(-1,-1);
    }
    last_src_char = c;
    pos += incr;
  }
  return exp_s;
}