Example #1
0
void CContentParser::GeneratePhonesXml(const TPhones& mpPhones, boost::property_tree::ptree& xmlResult)
{
    xmlResult.clear();
    boost::property_tree::ptree& xmlRoot = xmlResult.put(std::string(PHONES_ROOT_NAME), "");

    TPhones::const_iterator it = mpPhones.begin();
    const TPhones::const_iterator itEnd = mpPhones.end();
    for (; it != itEnd; ++it)
    {
        if (it->second < 2)
            continue;

        xmlRoot.push_back(std::make_pair(std::string(PHONES_ITEM_NAME), boost::property_tree::ptree()));
        boost::property_tree::ptree& xmlPhone = xmlRoot.back().second;
        xmlPhone.put(std::string("<xmlattr>.") + PHONES_NUMBER_NAME, it->first);
        xmlPhone.put(std::string("<xmlattr>.") + PHONES_COUNT_NAME, boost::lexical_cast< std::string > (it->second));
    }
}
Example #2
0
bool CContentParser::ParseSite(const std::string& sURL, boost::property_tree::ptree& xmlResult)
{
    bool bNotEmpty = false;
    xmlResult.clear();
    boost::property_tree::ptree& xmlRoot = xmlResult.put("data", "");

    std::size_t nPageCount = 1;
    std::size_t nEmptyPages = 0;
    while(1)
    {
        try
        {
            const std::string sPage = (boost::format(sURL) % nPageCount++).str();

            std::cout << (boost::format("Processing page: [%s]...") % sPage).str();

            CDownloader Dwnldr;
            Dwnldr.Open(sPage);

            std::string sBuffer;
            Dwnldr.Read(sBuffer);

            const std::string::size_type encoding = sBuffer.find("charset=");
            if (encoding != std::string::npos)
            {
                const std::string::size_type encodingEnd = sBuffer.find("\"", encoding);
                if (encodingEnd != std::string::npos)
                {
                    const std::string encodingValue(sBuffer.substr(encoding + 8, encodingEnd - encoding - 8));

                    if (boost::algorithm::iequals(encodingValue, "utf-8"))
                    {
                        const std::wstring out = boost::locale::conv::utf_to_utf<wchar_t, char>(sBuffer);
                        sBuffer = boost::locale::conv::from_utf<wchar_t>(out, "cp1251");
                    }
                }
            }

            std::vector< std::string > vecItems;
            ParsePage(sBuffer, vecItems);

            std::cout << (boost::format("\titems parsed: [%s]") % vecItems.size()).str() << std::endl;

            if (vecItems.empty())
                ++nEmptyPages;
            else
                nEmptyPages = 0;

            if (nEmptyPages > 5)
                break;

            bNotEmpty = true;

            std::vector< std::string >::iterator it = vecItems.begin();
            const std::vector< std::string >::iterator itEnd = vecItems.end();

            BOOST_FOREACH(std::string& sCurrent, vecItems)
            {
                xmlRoot.push_back(std::pair< std::string, boost::property_tree::ptree > ("item", boost::property_tree::ptree()));
                boost::property_tree::ptree& xmlItem = xmlRoot.back().second;
                ParseItem(sCurrent, xmlItem);
            }
        }
        catch (std::exception& e)
        {
            std::cout << e.what() << std::endl;
        }
    }
    return bNotEmpty;
}