string HtmlParser::buscarTextoInTag(char *html, Tags *tag) { GumboOutput* output = gumbo_parse(html); string res = search_text(output->root, tag); gumbo_destroy_output(&kGumboDefaultOptions, output); return res; }
/* Exported function documented in search.h */ void search_step(struct search_context *context, search_flags_t flags, const char *string) { int string_len; int i = 0; if (context == NULL) { warn_user("SearchError", 0); return; } guit->search->add_recent(string, context->gui_p); string_len = strlen(string); for (i = 0; i < string_len; i++) if (string[i] != '#' && string[i] != '*') break; if (i >= string_len) { union content_msg_data msg_data; free_matches(context); guit->search->status(true, context->gui_p); guit->search->back_state(false, context->gui_p); guit->search->forward_state(false, context->gui_p); msg_data.scroll.area = false; msg_data.scroll.x0 = 0; msg_data.scroll.y0 = 0; content_broadcast(context->c, CONTENT_MSG_SCROLL, msg_data); return; } search_text(string, string_len, context, flags); }
void HtmlParser::search_for_links(GumboNode* node, Tags *tag) { if (node->type != GUMBO_NODE_ELEMENT) { return; } GumboAttribute* attribute; if (node->v.element.tag == tag->htmlTag){ map<string, string> attrList; attrValue attrElement; if ( (attribute = gumbo_get_attribute(&node->v.element.attributes, tag->attr.c_str()))) { attrElement.attrList.insert( make_pair(attribute->name, attribute->value)); } else { //Recogemos el resto de atributos y los incluimos en la variable tag for (unsigned int i=0; i<node->v.element.attributes.length; i++){ attribute = ((GumboAttribute *)node->v.element.attributes.data[i]); attrElement.attrList.insert( make_pair(attribute->name, attribute->value)); } } attrElement.content = isObtainContentTag() ? search_text(node, tag) : ""; tag->tagElement.push_back(attrElement); } GumboVector* children = &node->v.element.children; // std::cout << "****** elementos: " << children->length << endl; for (unsigned int i = 0; i < children->length; ++i) { search_for_links(static_cast<GumboNode*>(children->data[i]), tag); } }
string HtmlParser::search_text(GumboNode* node, Tags *tag) { if (node->type == GUMBO_NODE_TEXT) { return std::string(node->v.text.text); } else if (node->type == GUMBO_NODE_ELEMENT && node->v.element.tag != GUMBO_TAG_STYLE) { std::string contents = ""; GumboVector* children = &node->v.element.children; for (int i = 0; i < children->length; ++i) { const std::string text = search_text((GumboNode*) children->data[i], tag); if (i != 0 && !text.empty()) { contents.append(" "); } contents.append(text); } return contents; } else { return ""; } }
DataStore Catalog::text_process(std::string input_text){ int flag = 0; int flag_case = 1; //used to distinguish call from filemanager and text_process std::string search_txt, sub1, sub2; std::size_t pos; std::vector<std::string>::iterator itr_patt; if (input_text.find("\"") == 0){ //Search text is saperated if given in "quotes" int i = input_text.find("\"", 1); sub1 = input_text.substr(1, i - 1); int len = input_text.length(); sub2 = input_text.substr(i + 1, len); search_txt = sub1; flag = 1; } else{ sub2 = input_text; } std::istringstream iss(sub2); do{ std::string sub; //search text is filtered if the text is only one word iss >> sub; if (flag == 0){ search_txt = sub; flag = 1; } else{ if (sub.find("*") == 0){ //storing valit extensions if (sub.size() != 0){ pos = sub.find("."); sub = sub.substr(pos + 1); if (sub == "*")flag_case = 0; patt.push_back(sub); } } else{ if (int(sub.find(".")) > 0) //To store filename that may contain text patt.push_back(sub); } } } while (iss); if (patt.size() == 0) flag_case = 0; return search_text(search_txt, flag_case); }
bool wildcard_samples() { // wildcard search examples strref search_text("in 2 out of 5 cases the trees will outnumber the carrots, willoutnumber"); // basic wildcard find substring (*) strref search("the*will"); strref substr = search_text.find_wildcard(search); printf("found \"" STRREF_FMT "\" matching \"" STRREF_FMT "\"\n", STRREF_ARG(substr), STRREF_ARG(search)); if (!substr.same_str_case("the trees will")) return false; // find word beginning with c substr = search_text.find_wildcard("<c*>"); if (!substr.same_str_case("cases")) return false; // find word ending with e substr = search_text.find_wildcard("<*%e>"); if (!substr.same_str_case("the")) return false; // find whole line substr = search_text.find_wildcard("@*^"); if (!substr.same_str_case(search_text)) return false; // using ranges and single character number substr = search_text.find_wildcard(strref("[0-9] out of #")); if (!substr.same_str_case("2 out of 5")) return false; // searching any substring between two ranged characters substr = search_text.find_wildcard(strref("#*[0-9]")); if (!substr.same_str_case("2 out of 5")) return false; // searching for substring but spaces are not allowed substr = search_text.find_wildcard("will*{! }number"); printf("substr = " STRREF_FMT "\n", STRREF_ARG(substr)); if (!substr.same_str_case("willoutnumber")) return false; // find r folloed by numbers without another substring to find substr = strref("no k12 r13 [99]").find_wildcard("r*{0-9}"); if (!substr.same_str_case("r13")) return false; // search wildcard iteratively search = "radio, gorilla, zebra, monkey, human, rat, car, ocelot, conrad, butler"; substr.clear(); printf("Words with 'r': "); while ((substr = search.wildcard_after("<*$r*$>", substr))) printf("\"" STRREF_FMT "\" ", STRREF_ARG(substr)); printf("\n"); // search for a more complex expression search = strref(aSomeFiles, sizeof(aSomeFiles)-1); substr.clear(); printf("Lines with two subsequent characters d-e (dd, de, ed, ee) and only alphanumeric or dot after:\n"); while ((substr = search.wildcard_after("@*@[d-e][d-e]*{0-9A-Za-z.}^", substr))) { printf("\"" STRREF_FMT "\"\n", STRREF_ARG(substr)); } substr.clear(); printf("\n.json files:\n\n"); while ((substr = search.wildcard_after("<*{!/}.json^", substr))) { printf("\"" STRREF_FMT "\"\n", STRREF_ARG(substr)); } printf("\nsome words with a character a-f followed by a character that is not a-f:\n\n"); search = strref(aTextSample, sizeof(aTextSample)-1); substr.clear(); while ((substr = search.wildcard_after("<*$[a-f][!a-f\\1-\\x40]*>", substr))) { printf("\"" STRREF_FMT "\" ", STRREF_ARG(substr)); } printf("\n"); return true; }
static void search_button_clicked(GtkWidget *widget, viewer_t *viewer) { search_text(viewer, 1); }
static void search_entry_validated(GtkWidget *widget, viewer_t *viewer) { search_text(viewer, 0); }
static void read_commands(struct client *client) { char buf[1024]; int pos = 0; if (!client->authenticated) client->authenticated = addr.sa.sa_family == AF_UNIX; while (1) { int rc, s, i; rc = read(client->fd, buf + pos, sizeof(buf) - pos); if (rc == -1) { if (errno == EINTR) continue; if (errno == EAGAIN) return; goto close; } if (rc == 0) goto close; pos += rc; s = 0; for (i = 0; i < pos; i++) { const char *line, *msg; char *cmd, *arg; int ret; if (buf[i] != '\n') continue; buf[i] = 0; line = buf + s; s = i + 1; if (!client->authenticated) { if (!server_password) { msg = "password is unset, tcp/ip disabled"; d_print("%s\n", msg); ret = send_answer(client->fd, "%s\n\n", msg); goto close; } if (strncmp(line, "passwd ", 7) == 0) line += 7; client->authenticated = !strcmp(line, server_password); if (!client->authenticated) { msg = "authentication failed"; d_print("%s\n", msg); ret = send_answer(client->fd, "%s\n\n", msg); goto close; } ret = write_all(client->fd, "\n", 1); continue; } while (isspace((unsigned char)*line)) line++; if (*line == '/') { int restricted = 0; line++; search_direction = SEARCH_FORWARD; if (*line == '/') { line++; restricted = 1; } search_text(line, restricted, 1); ret = write_all(client->fd, "\n", 1); } else if (*line == '?') { int restricted = 0; line++; search_direction = SEARCH_BACKWARD; if (*line == '?') { line++; restricted = 1; } search_text(line, restricted, 1); ret = write_all(client->fd, "\n", 1); } else if (parse_command(line, &cmd, &arg)) { if (!strcmp(cmd, "status")) { ret = cmd_status(client); } else { if (strcmp(cmd, "passwd") != 0) { set_client_fd(client->fd); run_parsed_command(cmd, arg); set_client_fd(-1); } ret = write_all(client->fd, "\n", 1); } free(cmd); free(arg); } else { // don't hang cmus-remote ret = write_all(client->fd, "\n", 1); } if (ret < 0) { d_print("write: %s\n", strerror(errno)); goto close; } } memmove(buf, buf + s, pos - s); pos -= s; } return; close: close(client->fd); list_del(&client->node); free(client); }
int wmain(int argc, wchar_t* argv[]) { std::string search_text(argv[2], argv[2] + wcslen(argv[2])); Test(argv[1], search_text); return 0; }