void LinkExtractor::safeHandleStartTag(const std::string& tag_name, attr_list_t& attrs, bool empty_element_tag) { LinkExtractor::link_tag_map_t::const_iterator _i; std::string _attr; std::string name = tag_name; to_lower(name); // Base should be treat separately if ((name == "base") and (attrs.find("href") != attrs.end()) ) { this->base = attrs["href"].str(); } else if ( name == "meta" ){ this->handleMetaTag(tag_name, attrs); } // Extract Links // - Is this a tag link? // - If so, is the corresponding attribute of this tag present? _i = this->LINK_TAGS.find(name); if ( (_i != this->LINK_TAGS.end()) and ( attrs.find( _attr = _i->second ) != attrs.end() )) { this->links.insert( attrs[_attr].str() ); } }
void FindEncParser::handleProcessingInstruction(const std::string& name, attr_list_t& attrs) { if ( name == "xml" and attrs.find("encoding") != attrs.end()) { this->enc = attrs["encoding"].str(); this->enc = strip(this->enc); throw CharsetDetectedException("Found in a Processing Instruction"); } }
void FindEncParser::safeHandleStartTag(const std::string& name, attr_list_t& attrs, bool empty_element_tag) { std::string val; if ( name == "meta" and attrs.find("http-equiv") != attrs.end() ) { val = attrs["http-equiv"].str(); to_lower(val); if (val == "content-type" and attrs.find("content") != attrs.end() ) { val = attrs["content"].str(); to_lower(val); std::string charset = get_charset_from_content_type(val); if (not charset.empty()) { this->enc = strip(charset); throw CharsetDetectedException("Found in a Meta Tag"); } } } }
bool zmq::locator_t::register_endpoint (const char *name_, attr_list_t &attrs_) { // If 0MQ is used for in-process messaging, we shouldn't even get here. assert (global_locator); assert (strlen (name_) <= 255); // Send to 'create' command. unsigned char cmd = create_id; global_locator->write (&cmd, 1); unsigned char size = (unsigned char) strlen (name_); global_locator->write (&size, 1); global_locator->write (name_, size); for (attr_list_t::iterator it = attrs_.begin (); it != attrs_.end (); it ++) { const std::string &key = (*it).first; const std::string &value = (*it).second; assert (key.size () < 256); size = key.size (); global_locator->write (&size, 1); global_locator->write (key.c_str (), size); assert (value.size () < 256); size = value.size (); global_locator->write (&size, 1); global_locator->write (value.c_str (), size); } // Write terminator. size = 0; global_locator->write (&size, 1); // Read the response. global_locator->read (&cmd, 1); // Could not register global object. if (cmd != create_ok_id) return false; return true; }
void LinkExtractor::handleMetaTag( const std::string& tag_name, attr_list_t& attrs) { std::string value; std::string content; attr_list_t::const_iterator name_attr = attrs.find("name"); if (name_attr != attrs.end()){ value = name_attr->second.str(); to_lower(value); if (value == "robots"){ if ( attrs.count("content") ) { content = attrs["content"].str(); handleRobotsMetaContent(content); } } } }