void hex_volume_internal(Ioss::ElementBlock *block, const std::vector<double> &coordinates, const std::vector<T> &connectivity, size_t nelem) { const double one12th = 1.0 / 12.0; double gradop12x[24]; double x[8], y[8], z[8]; std::vector<double> volume(nelem); size_t t1 = timer(); size_t count = 0; for (size_t ielem = 0; ielem < nelem; ++ielem) { if (count++ >= nelem / 100) { OUTPUT << "."; count = 0; } for (size_t j = 0; j < 8; j++) { size_t node = connectivity[ielem * 8 + j] - 1; x[j] = coordinates[node * 3 + 0]; y[j] = coordinates[node * 3 + 1]; z[j] = coordinates[node * 3 + 2]; } comp_grad12x(&gradop12x[0], x, y, z); const double volume12x = dot8(x, &gradop12x[0]); volume[ielem] = volume12x * one12th; } size_t t2 = timer(); if (nelem > 0) { OUTPUT << "\n" << std::setw(12) << block->name() << "\tMin volume = " << std::setw(12) << *std::min_element(volume.begin(), volume.end()) << " Max volume = " << std::setw(12) << *std::max_element(volume.begin(), volume.end()) << " Elements = " << std::setw(12) << nelem << " Time/Elem = " << double(t2 - t1) / nelem << " micro-sec\n"; } }
int main(int argc, char *argv[]) { int i, j; unsigned char *z; unsigned char r[100*1000]; double drand48(); unsigned char **threaded_topics, **threaded_docs; float t0, t1; float tmp[280]; init_compand(); threaded_docs = calloc(100, sizeof(threaded_docs[0])); for (i=0;i<100;i++) { threaded_docs[i] = calloc(280, sizeof(threaded_docs[0][0])); for (j=0;j<280;j++) { tmp[j] = gauss_dev(0, 1); } scv_normalize(tmp); compress_vector(threaded_docs[i], tmp, 280); j = drand48() * (i+1); z = threaded_docs[i]; threaded_docs[i] = threaded_docs[j]; threaded_docs[j] = z; } threaded_topics = calloc(1000, sizeof(threaded_topics[0])); for (i=0;i<1000;i++) { threaded_topics[i] = calloc(280, sizeof(threaded_topics[0][0])); for (j=0;j<280;j++) { tmp[j] = gauss_dev(0, 1); } scv_normalize(tmp); compress_vector(threaded_topics[i], tmp, 280); j = drand48() * (i+1); z = threaded_topics[i]; threaded_topics[i] = threaded_topics[j]; threaded_topics[j] = z; } /* convectis style threaded matrix by vector by matrix */ t0 = millitime(); for (i=0;i<100;i++) { for (j=0;j<1000;j++) { r[i*1000+j] = dot8(threaded_topics[j], threaded_docs[i], 280); } } t1 = millitime(); printf("%.3f seconds for %d convectis style categorizations\n", t1-t0, 100); t0 = millitime(); for (j=0;j<1000;j++) { for (i=0;i<100;i++) { r[i*1000+j] = dot8(threaded_topics[j], threaded_docs[i], 280); } } t1 = millitime(); printf("%.3f seconds for %d convectis matrix categorizations\n", t1-t0, 100); return 0; }