/** * Exchange the statistics between instances. * @param[in|out] myStats starts with the local information and is populated with the aggregation of the global * information on instance 0. Not changed on other instances. * @param query the query context */ void exchangeStats(Stats& myStats, shared_ptr<Query>& query) { if (query->getInstanceID() != 0) { /* I am not instance 0, so send my stuff to instance 0 */ shared_ptr<SharedBuffer> buf = myStats.marshall(); /* Non-blocking send. Must be matched by a BufReceive call on the recipient */ BufSend(0, buf, query); } else { /*I am instance 0, receive stuff rom all other instances */ for (InstanceID i = 1; i<query->getInstancesCount(); ++i) { /* Blocking receive. */ shared_ptr<SharedBuffer> buf = BufReceive(i, query); Stats otherInstanceStats(buf); /* add data to myStats */ myStats.merge(otherInstanceStats); } } /* Note: at the moment instance 0 IS synonymous with "coordinator". In the future we may move to a more * advanced multiple-coordinator scheme. */ }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { shared_ptr<Array> outputArray(new MemArray(_schema, query)); shared_ptr<Array> inputArray = inputArrays[0]; ArrayDesc inputSchema = inputArray->getArrayDesc(); // Get descriptor of two dimensions d and n. DimensionDesc dimsN = inputSchema.getDimensions()[0]; DimensionDesc dimsD = inputSchema.getDimensions()[1]; size_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1; // Note: the input data set should have d+1 dimensions (including Y) size_t d = dimsD.getCurrEnd() - dimsD.getCurrStart(); size_t nStart = dimsN.getCurrStart(); size_t dStart = dimsD.getCurrStart(); // Get chunk size of n. size_t nChunkSize = dimsN.getChunkInterval(); // Helps to accumulate the n and L. z_i[0] = 1.0; shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0); Coordinates chunkPosition; size_t i, j, k, m; while(! inputArrayIter->end() ) { shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator(); chunkPosition = inputArrayIter->getPosition(); for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) { // In case the chunk is partially filled. if(i == n + nStart) { break; } for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) { // In case the chunk is partially filled. if(j == d + 1 + dStart) { break; } z_i[m] = chunkIter->getItem().getDouble(); ++(*chunkIter); } for(k=0; k<=d+1; ++k) { // This operator is not optimized for entries with value zero. // TODO: should use fabs(z_i[k]) < 10e-6 // if(z_i[k] == 0.0) { // continue; // } for(m=0; m<=k; ++m) { Gamma[k][m] += z_i[k]*z_i[m]; } } } ++(*inputArrayIter); } /** * The "logical" instance ID of the instance responsible for coordination of query. * COORDINATOR_INSTANCE if instance execute this query itself. */ if(query->getInstancesCount() > 1) { if(query->getInstanceID() != 0) { // I am not the coordinator, I should send my Gamma matrix out. shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d+3) * (d+2) / 2) ); double *Gammabuf = static_cast<double*> (buf->getData()); for(size_t i=0; i<d+2; ++i) { for(size_t j=0; j<=i; ++j) { *Gammabuf = Gamma[i][j]; ++Gammabuf; } } BufSend(0, buf, query); return outputArray; } else { // I am the coordinator, I should collect Gamma matrix from workers. for(InstanceID l = 1; l<query->getInstancesCount(); ++l) { shared_ptr<SharedBuffer> buf = BufReceive(l, query); double *Gammabuf = static_cast<double*> (buf->getData()); for(size_t i=0; i<d+2; ++i) { for(size_t j=0; j<=i; ++j) { Gamma[i][j] += *Gammabuf; ++Gammabuf; } } } } // end if getInstanceID() != 0 } //end if InstancesCount() > 1 return writeGamma(d, query); }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { shared_ptr<Array> outputArray(new MemArray(_schema, query)); shared_ptr<Array> inputArray = inputArrays[0]; ArrayDesc inputSchema = inputArray->getArrayDesc(); // Get descriptor of two dimensions d and n. DimensionDesc dimsN = inputSchema.getDimensions()[0]; DimensionDesc dimsD = inputSchema.getDimensions()[1]; int64_t n = dimsN.getCurrEnd() - dimsN.getCurrStart() + 1; // Note: the input data set should have d+1 dimensions (including Y) d = dimsD.getCurrEnd() - dimsD.getCurrStart(); idY = d+1; int64_t nStart = dimsN.getCurrStart(); int64_t dStart = dimsD.getCurrStart(); // Get chunk size of n. int64_t nChunkSize = dimsN.getChunkInterval(); k = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[0])->getExpression()->evaluate().getInt64(); if (_parameters.size() == 2) { idY = ((shared_ptr<OperatorParamPhysicalExpression>&)_parameters[1])->getExpression()->evaluate().getInt64(); } #ifdef DEBUG stringstream ss; ss << getenv("HOME") << "/groupdiagdensegamma-instance-" << query->getInstanceID() << ".log"; log.open(ss.str().c_str(), ios::out); log << "n = " << n << endl << "d = " << d << endl << "k = " << k << endl; log << "nStart = " << nStart << endl << "dStart = " << dStart << endl; log << "nChunkSize = " << nChunkSize << endl; log << "idY = " << idY << endl; #endif shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0); Coordinates chunkPosition; int64_t i, j, k, m, l; double value; NLQ tmp; map<double, struct NLQ>::iterator it; while(! inputArrayIter->end() ) { shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator(); chunkPosition = inputArrayIter->getPosition(); #ifdef DEBUG log << "Getting into chunk (" << chunkPosition[0] << ", " << chunkPosition[1] << ")." << endl; #endif for(i=chunkPosition[0]; i<chunkPosition[0] + nChunkSize; i++) { if(i == n + nStart) { #ifdef DEBUG log << "Reaching row " << i << ", exiting." << endl; #endif break; } for(j=chunkPosition[1], m=1; j<=chunkPosition[1]+d; j++, m++) { if(j == d + 1 + dStart) { #ifdef DEBUG log << "Reaching column " << j << ", exiting." << endl; #endif break; } value = chunkIter->getItem().getDouble(); tmp.L[m] = value; tmp.Q[m] = value * value; ++(*chunkIter); } double Y = tmp.L[idY]; it = nlq.find(Y); if (it == nlq.end()) { #ifdef DEBUG log << "Cannot find NLQ entry for class " << Y << ", creating new." << endl; #endif nlq[Y].N = 1; nlq[Y].groupId = Y; } else { nlq[Y].N++; } for (k=1, l=1; k<=d+1; k++) { if (k == idY) { continue; } nlq[Y].L[l] += tmp.L[k]; nlq[Y].Q[l] += tmp.Q[k]; l++; } nlq[Y].L[d+1] += tmp.L[idY]; nlq[Y].Q[d+1] += tmp.Q[idY]; } ++(*inputArrayIter); } /** * The "logical" instance ID of the instance responsible for coordination of query. * COORDINATOR_INSTANCE if instance execute this query itself. */ size_t localClassCount = nlq.size(); #ifdef DEBUG log << "localClassCount = " << localClassCount << endl; #endif if(query->getInstancesCount() > 1) { if(query->getInstanceID() != 0) { // I am not the coordinator, I should send my NLQ out. #ifdef DEBUG log << "I am not the coordinator, I should send my NLQ out." << endl; #endif shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(struct NLQ) * localClassCount )); struct NLQ *NLQbuf = static_cast<struct NLQ*> (buf->getData()); for(it = nlq.begin(); it != nlq.end(); it++) { *NLQbuf = it->second; ++NLQbuf; } BufSend(0, buf, query); #ifdef DEBUG log << "Exiting." << endl; #endif return outputArray; } else { // I am the coordinator, I should collect NLQ from workers. #ifdef DEBUG log << "I am the coordinator, I should collect NLQ from workers." << endl; #endif for(InstanceID l = 1; l<query->getInstancesCount(); ++l) { shared_ptr<SharedBuffer> buf = BufReceive(l, query); if(! buf) { #ifdef DEBUG log << "Nothing from instance " << l << ", continue." << endl; #endif continue; } int64_t remoteClassCount = buf->getSize() / sizeof(struct NLQ); struct NLQ* NLQbuf = static_cast<struct NLQ*> (buf->getData()); #ifdef DEBUG log << "Received " << remoteClassCount << " entries from instance " << l << endl; #endif for(i=0; i<remoteClassCount; ++i) { it = nlq.find(NLQbuf->groupId); if( it == nlq.end() ) { #ifdef DEBUG log << "Cannot find NLQ entry for class " << NLQbuf->groupId << ", creating new." << endl; #endif nlq[NLQbuf->groupId] = *NLQbuf; } else { it->second.N += NLQbuf->N; for(j=1; j<=d+1; ++j) { it->second.L[j] += NLQbuf->L[j]; it->second.Q[j] += NLQbuf->Q[j]; } } ++NLQbuf; } #ifdef DEBUG log << "Merge complete." << endl; #endif } }// end if getInstanceID() != 0 }//end if InstancesCount() > 1 return writeGamma(query); }
shared_ptr< Array > execute(vector< shared_ptr< Array> >& inputArrays, shared_ptr<Query> query) { shared_ptr<Array> outputArray(new MemArray(_schema, query)); shared_ptr<Array> inputArray = inputArrays[0]; ArrayDesc inputSchema = inputArray->getArrayDesc(); // Get descriptor of two dimensions d and n. DimensionDesc dimsN = inputSchema.getDimensions()[0]; DimensionDesc dimsD = inputSchema.getDimensions()[1]; size_t n = dimsN.getCurrLength(); // Note: the input data set should have d+1 dimensions (including Y) size_t d = dimsD.getCurrLength() - 1; nlq.N = n; nlq.d = d; shared_ptr<ConstArrayIterator> inputArrayIter = inputArray->getConstIterator(0); Coordinates cellPosition; size_t i; double value; while(! inputArrayIter->end() ) { shared_ptr<ConstChunkIterator> chunkIter = inputArrayIter->getChunk().getConstIterator(); // For each cell in the current chunk. // This will skip the empty cells. while(! chunkIter->end() ) { cellPosition = chunkIter->getPosition(); value = chunkIter->getItem().getDouble(); nlq.L[ cellPosition[1] ] += value; nlq.Q[ cellPosition[1] ] += value * value; ++(*chunkIter); } ++(*inputArrayIter); } /** * The "logical" instance ID of the instance responsible for coordination of query. * COORDINATOR_INSTANCE if instance execute this query itself. */ if(query->getInstancesCount() > 1) { if(query->getInstanceID() != 0) { // I am not the coordinator, I should send my Gamma matrix out. shared_ptr <SharedBuffer> buf ( new MemoryBuffer(NULL, sizeof(double) * (d*2+2) )); double *Gammabuf = static_cast<double*> (buf->getData()); for(i=1; i<=d+1; ++i) { *Gammabuf = nlq.L[i]; ++Gammabuf; } for(i=1; i<=d+1; ++i) { *Gammabuf = nlq.Q[i]; ++Gammabuf; } BufSend(0, buf, query); return outputArray; } else { // I am the coordinator, I should collect Gamma matrix from workers. for(InstanceID l = 1; l<query->getInstancesCount(); ++l) { shared_ptr<SharedBuffer> buf = BufReceive(l, query); double *Gammabuf = static_cast<double*> (buf->getData()); for(i=1; i<=d+1; ++i) { nlq.L[i] += *Gammabuf; ++Gammabuf; } for(i=1; i<=d+1; ++i) { nlq.Q[i] += *Gammabuf; ++Gammabuf; } } }// end if getInstanceID() != 0 }//end if InstancesCount() > 1 return writeGamma(query); }