bool PageRankReducer::reduce(const void* key, KeyValueIterator& values, Collector& collector) const { //<url, pagerank, linkoutlist> as reducer intput //<url, newpagerank, linkoutlist> as reducer output //the formula of pagerank: //pr(a) - (1-d) = d * (pr(b)/l(b) + pr(c)/l(c) + ...) //d is the damping factor PageRankValue value; int count = 0; const double damping = 0.85; while(values.hasMore()){ PageRankValue* pValue = (PageRankValue*)(values.next()); value.pagerank += damping * pValue->pagerank; if(pValue->linkoutURLs.size() > 0){ for(int i = 0; i < pValue->linkoutURLs.size(); i ++){ value.linkoutURLs.push_back(pValue->linkoutURLs[i]); } } count ++; } collector.collect(key, &value); return true; }
bool WordFrequencyMapper::map(const void* key, const void* value, Collector& collector) const { //<char*, char*> as mapper intput //<char*, int> as mapper output int outValue = 1; int outputCnt = 0; int bodyLen = strlen(*(char**)value); //do parsing m_parser->init_page(*(char**)value); m_parser->convert_charset("skip"); m_parser->delete_tags(); m_parser->cut_words(1); m_parser->end_page(); //processing with the keywords list char *p = m_parser->get_keywords(); //cout << p << endl; p=strtok(p," "); while(p){ switch(*p){ case DELI_WEIGHT: case DELI_LOC: case DELI_PARA: //skip these tokens break; case '\0': cout << "+"; default:{ char** pData = &p; collector.collect( pData, &outValue); outputCnt ++; } } p=strtok(NULL," "); } //cout << *(char**)key << " : " << bodyLen << " : " << outputCnt << endl; return true; }
extern "C" int startApp(thread_arg_t* arg) { int argc = arg->argc; char** argv = arg->argv; create_vm_t func = arg->func; free(arg); #ifndef MULTIPLE_GC Collector::inject_my_thread(&argc); VirtualMachine* VM = func(); VM->runApplication(argc, argv); Collector::remove_my_thread(); Collector::collect(); #else Collector* GC = Collector::allocate(); GC->inject_my_thread(&argc); func(argc, argv); GC->remove_my_thread(); GC->collect(); #endif return 0; }
bool PageRankReducer::reduce(const void* key, KeyValueIterator& values, Collector& collector) const { //<url, pagerank, linkoutlist> as reducer intput //<url, newpagerank, linkoutlist> as reducer output //the formula of pagerank: //pr(a) - (1-d) = d * (pr(b)/l(b) + pr(c)/l(c) + ...) //d is the damping factor PageRankValue value; int count = 0; const double damping = 0.85; while(values.hasMore()){ PageRankValue* pValue = (PageRankValue*)(values.next()); value.pagerank += damping * pValue->pagerank; if(pValue->urlNumber > 0){ if(value.linkoutURLs != NULL && value.urlNumber > 0){ char** temp = new char*[value.urlNumber + pValue->urlNumber]; memcpy(temp, value.linkoutURLs, value.urlNumber * sizeof(char*) ); memcpy(temp + value.urlNumber, pValue->linkoutURLs, pValue->urlNumber * sizeof(char*) ); value.urlNumber += pValue->urlNumber; delete[] value.linkoutURLs; value.linkoutURLs = temp; } else{ value.urlNumber = pValue->urlNumber; value.linkoutURLs = pValue->linkoutURLs; } } count ++; } collector.collect(key, &value); return true; }