Esempio n. 1
0
void LinkExtractor::safeHandleStartTag(const std::string& tag_name,
		attr_list_t& attrs, bool empty_element_tag)
{
	LinkExtractor::link_tag_map_t::const_iterator _i;
	std::string _attr;

	std::string name = tag_name;
	to_lower(name);
	// Base should be treat separately
	if ((name == "base") and (attrs.find("href") != attrs.end()) ) {
		this->base = attrs["href"].str();
	} else if ( name == "meta" ){
		this->handleMetaTag(tag_name, attrs);
	}
	// Extract Links
	//  - Is this a tag link?
	//  - If so, is the corresponding attribute of this tag present?
	_i = this->LINK_TAGS.find(name);
	if ( (_i != this->LINK_TAGS.end()) and 
	     ( attrs.find( _attr = _i->second ) != attrs.end() ))
	{
		this->links.insert( attrs[_attr].str() );
	}


}
Esempio n. 2
0
void FindEncParser::handleProcessingInstruction(const std::string& name,
		attr_list_t& attrs)
{
	if ( name == "xml" and attrs.find("encoding") != attrs.end()) {
		this->enc = attrs["encoding"].str();
		this->enc = strip(this->enc);
		throw CharsetDetectedException("Found in a Processing Instruction");
	}

}
Esempio n. 3
0
void FindEncParser::safeHandleStartTag(const std::string& name,
	attr_list_t& attrs, bool empty_element_tag)
{
	std::string val;

	if ( name == "meta" and attrs.find("http-equiv") != attrs.end() ) {
		val = attrs["http-equiv"].str();
		to_lower(val);
		if (val == "content-type" and attrs.find("content") != attrs.end() ) {
			val = attrs["content"].str();
			to_lower(val);
			std::string charset = get_charset_from_content_type(val);
			if (not charset.empty()) {
				this->enc = strip(charset);
				throw CharsetDetectedException("Found in a Meta Tag");
			}
		}
	}
}
Esempio n. 4
0
bool zmq::locator_t::register_endpoint (const char *name_, attr_list_t &attrs_)
{
    //  If 0MQ is used for in-process messaging, we shouldn't even get here.
    assert (global_locator);
    assert (strlen (name_) <= 255);

    //  Send to 'create' command.
    unsigned char cmd = create_id;
    global_locator->write (&cmd, 1);
    unsigned char size = (unsigned char) strlen (name_);
    global_locator->write (&size, 1);
    global_locator->write (name_, size);

    for (attr_list_t::iterator it = attrs_.begin ();
            it != attrs_.end (); it ++) {

        const std::string &key = (*it).first;
        const std::string &value = (*it).second;

        assert (key.size () < 256);
        size = key.size ();
        global_locator->write (&size, 1);
        global_locator->write (key.c_str (), size);

        assert (value.size () < 256);
        size = value.size ();
        global_locator->write (&size, 1);
        global_locator->write (value.c_str (), size);
    }

    //  Write terminator.
    size = 0;
    global_locator->write (&size, 1);

    //  Read the response.
    global_locator->read (&cmd, 1);

    //  Could not register global object.
    if (cmd != create_ok_id)
        return false;

    return true;
}
Esempio n. 5
0
void LinkExtractor::handleMetaTag( const std::string& tag_name,
attr_list_t& attrs)
{
	std::string value;
	std::string content;

	attr_list_t::const_iterator name_attr = attrs.find("name");
	

	if (name_attr != attrs.end()){
		value = name_attr->second.str();
		to_lower(value);
		if (value == "robots"){
			if ( attrs.count("content") ) {
				content = attrs["content"].str();
				handleRobotsMetaContent(content);
			}
		}
	}
}