void url_check_line (char *buf, int len) { char *po = buf; char *start; int wlen; if (buf[0] == ':' && buf[1] != 0) po++; start = po; /* check each "word" (space separated) */ while (1) { switch (po[0]) { case 0: case ' ': wlen = po - start; if (wlen > 2) { /* HACK! :( */ /* This is to work around not being able to detect URLs that are at the start of messages. */ if (start[0] == ':') { start++; wlen--; } if (start[0] == '+' || start[0] == '-') { start++; wlen--; } if (wlen > 2 && url_check_word (start, wlen) == WORD_URL) { url_add (start, wlen); } } if (po[0] == 0) return; po++; start = po; break; default: po++; } } }
void url_check_line (char *buf, int len) { GRegex *re(void); GMatchInfo *gmi; char *po = buf; int i; /* Skip over message prefix */ if (*po == ':') { po = strchr (po, ' '); if (!po) return; po++; } /* Allow only commands from the above list */ for (i = 0; i < ARRAY_SIZE (commands); i++) { char *cmd = commands[i]; int len = strlen (cmd); if (strncmp (cmd, po, len) == 0) { po += len; break; } } if (i == ARRAY_SIZE (commands)) return; /* Skip past the channel name or user nick */ po = strchr (po, ' '); if (!po) return; po++; g_regex_match(re_url(), po, 0, &gmi); while (g_match_info_matches(gmi)) { int start, end; g_match_info_fetch_pos(gmi, 0, &start, &end); while (end > start && (po[end - 1] == '\r' || po[end - 1] == '\n')) end--; if (g_strstr_len (po + start, end - start, "://")) url_add(po + start, end - start); g_match_info_next(gmi, NULL); } g_match_info_free(gmi); }
/* Find urls in a page and keep them */ static void analyze_page(global_info *global, conn_info *conn) { /* Analyze */ int matches = 0; global->input.url = conn->url; if(-1 == (matches = find_urls(conn->memory, &(global->input)))) { if (0 != global->input.ret_len) { free_array_of_charptr_incl(&(global->input.ret), global->input.ret_len); } exit(EXIT_FAILURE); } global->input.url = 0; /* Save */ int i; for (i = 0; i < matches; i++) { url_add(global, global->input.ret[i]); global->input.ret[i] = 0; } }
void url_check_line (char *buf, int len) { char *po = buf; char *start; int wlen; if (buf[0] == ':' && buf[1] != 0) po++; start = po; /* check each "word" (space separated) */ while (1) { switch (po[0]) { case 0: case ' ': wlen = po - start; if (wlen > 2) { if (url_check_word (start, wlen) == WORD_URL) { url_add (start, wlen); } } if (po[0] == 0) return; po++; start = po; break; default: po++; } } }