buffer::buffer(driver::Context const & context, std::string const & scalartype, unsigned int id, const tuple &shape, tuple const & strides) : array(context, scalartype, id), dim_(numgt1(shape)) { //Attributes attributes_["off"] = process("#name_off"); for(unsigned int i = 0 ; i < dim_ ; ++i){ std::string inc = "inc" + tools::to_string(i); attributes_[inc] = process("#name_" + inc); } //Access std::vector<std::string> args; for(unsigned int i = 0 ; i < dim_ ; ++i) args.push_back("x" + tools::to_string(i)); std::string off = "#off"; for(unsigned int i = 0 ; i < dim_ ; ++i) { std::string inc = "#inc" + tools::to_string(i); off += " + (" + args[i] + ")*" + inc; } macros_.insert("at(" + tools::join(args, ",") + "): #pointer[" + off + "]"); //Broadcast if(numgt1(shape)==0) macros_.insert("at(i): at()"); if(dim_!=shape.size()) macros_.insert(make_broadcast(shape)); add_base("buffer"); add_load(strides[0]==1 && shape[0]>1); }
placeholder::placeholder(driver::Context const & context, unsigned int level) : leaf(context, "int", "sforidx" + tools::to_string(level)) { macros_.insert("at(): #name"); macros_.insert("at(i): #name"); macros_.insert("at(i,j): #name"); add_base("placebolder"); add_load(false); }
host_scalar::host_scalar(driver::Context const & context, std::string const & scalartype, unsigned int id) : leaf(context, scalartype, id) { macros_.insert("at(): #name_value"); macros_.insert("at(i): #name_value"); macros_.insert("at(i,j): #name_value"); add_base("host_scalar"); add_load(false); }
index_modifier::index_modifier(const std::string &scalartype, unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table) : array(tree.context(), scalartype, id), node(root, op, tree, table) { add_base("index_modifier"); add_load(false); }
int main(int argc, char *argv[]) { int load = 0; int k = 3; double epsilon = 0.001; int maxsamples = 100; int minparam = 11; int pcount = 14; int lcount = 1; int clear_cache = 0; int tod = 0; double slope, intercept; int i,p,bcount; int cpubench = 0; int compensate = 0; int sleeptime = 10; add_int_option("load", &load); add_int_option("k", &k); add_double_option("epsilon", &epsilon); add_int_option("maxsamples", &maxsamples); add_int_option("minparam", &minparam); add_int_option("clear", &clear_cache); add_int_option("pcount", &pcount); add_int_option("lcount", &lcount); add_int_option("cpubench", &cpubench); add_int_option("tod", &tod); add_int_option("compensate", &compensate); add_int_option("sleeptime", &sleeptime); parse_options(argc, argv, NULL); show_options(stdout); Mhz = mhz_full(1,sleeptime); /* Do callibrations */ find_abs_performance(&intercept, &slope, clear_cache, 1, cpubench); add_load(load, CACHE_LOAD); p = minparam; bcount = lcount; for (i = 0; i < pcount; i++) { double cycs; double expected = slope*p + intercept; double error; if (tod) { if (cpubench) cycs = fcyc_full_tod(cpufunct, p, clear_cache, k, epsilon, maxsamples, compensate); else { cycs = fcyc_full_tod(funct, p, clear_cache, k, epsilon, maxsamples, compensate); } } else { if (cpubench) cycs = fcyc_full(cpufunct, p, clear_cache, k, epsilon, maxsamples, compensate); else { cycs = fcyc_full(funct, p, clear_cache, k, epsilon, maxsamples, compensate); } } printf("Iters= %d, Err= %0.4f, Cycs= %.0f, ms= %.3f", has_converged(k, epsilon, maxsamples), err(k), cycs, cycs / (1e3 * Mhz)); error = (cycs-expected)/expected; printf(", Exp-cyc= %.1f, Exp-ms= %.3f, Actual-Err= %f\n", expected, expected / (Mhz * 1e3), error); p += minparam; bcount--; if (bcount == 0) { bcount = lcount; minparam += 2; } } kill_loads(); return 0; }