Пример #1
0
bool PageRankReducer::reduce(const void* key, KeyValueIterator& values, Collector& collector) const
{
    //<url, pagerank, linkoutlist> as reducer intput
    //<url, newpagerank, linkoutlist> as reducer output
    
    //the formula of pagerank:
    //pr(a) - (1-d) = d * (pr(b)/l(b) + pr(c)/l(c) + ...)
    //d is the damping factor

    PageRankValue value;
    int count = 0;
    const double damping = 0.85;
    while(values.hasMore()){
        PageRankValue* pValue = (PageRankValue*)(values.next());
        value.pagerank += damping * pValue->pagerank;
        if(pValue->linkoutURLs.size() > 0){
            for(int i = 0; i < pValue->linkoutURLs.size(); i ++){
                value.linkoutURLs.push_back(pValue->linkoutURLs[i]);
            }
        }
        count ++;
    }
    
    collector.collect(key, &value);
    
    return true;
}
Пример #2
0
bool WordFrequencyMapper::map(const void* key, const void* value, Collector& collector) const
{
    //<char*, char*> as mapper intput
    //<char*, int> as mapper output

    int outValue = 1;
    int outputCnt = 0;
    int bodyLen = strlen(*(char**)value);

    //do parsing
    m_parser->init_page(*(char**)value);
    m_parser->convert_charset("skip");
    m_parser->delete_tags();
    m_parser->cut_words(1);
    m_parser->end_page();

    //processing with the keywords list
    char *p = m_parser->get_keywords();

    //cout << p << endl;
	p=strtok(p," ");
	
	while(p){
		switch(*p){
		case	DELI_WEIGHT:
		case	DELI_LOC:
		case	DELI_PARA:
			//skip these tokens
			break;
        case    '\0':
            cout << "+";
		default:{
            char** pData = &p;
            collector.collect( pData, &outValue);
            outputCnt ++;
            }
		}
		p=strtok(NULL," ");
	}

    //cout << *(char**)key << " : " << bodyLen << " : " << outputCnt << endl;

    return true;
}
Пример #3
0
extern "C" int startApp(thread_arg_t* arg) {
    int argc = arg->argc;
    char** argv = arg->argv;
    create_vm_t func = arg->func;
    free(arg);
#ifndef MULTIPLE_GC
    Collector::inject_my_thread(&argc);
    VirtualMachine* VM = func();
    VM->runApplication(argc, argv);
    Collector::remove_my_thread();
    Collector::collect();
#else
    Collector* GC = Collector::allocate();
    GC->inject_my_thread(&argc);
    func(argc, argv);
    GC->remove_my_thread();
    GC->collect();
#endif
    return 0;
}
Пример #4
0
bool PageRankReducer::reduce(const void* key, KeyValueIterator& values, Collector& collector) const
{
    //<url, pagerank, linkoutlist> as reducer intput
    //<url, newpagerank, linkoutlist> as reducer output
    
    //the formula of pagerank:
    //pr(a) - (1-d) = d * (pr(b)/l(b) + pr(c)/l(c) + ...)
    //d is the damping factor

    PageRankValue value;
    int count = 0;
    const double damping = 0.85;
    while(values.hasMore()){
        PageRankValue* pValue = (PageRankValue*)(values.next());
        value.pagerank += damping * pValue->pagerank;
        if(pValue->urlNumber > 0){
            if(value.linkoutURLs != NULL && value.urlNumber > 0){
                char** temp = new char*[value.urlNumber + pValue->urlNumber];
                memcpy(temp, value.linkoutURLs, value.urlNumber * sizeof(char*) );
                memcpy(temp + value.urlNumber, pValue->linkoutURLs, pValue->urlNumber * sizeof(char*) );
                value.urlNumber += pValue->urlNumber;
                delete[] value.linkoutURLs;
                value.linkoutURLs = temp;
            }
            else{
                value.urlNumber = pValue->urlNumber;
                value.linkoutURLs = pValue->linkoutURLs;
            }
        }
        count ++;
    }
    
    collector.collect(key, &value);
    
    return true;
}