int main(void) { usart0_init(25, 8, 1, USART_PARITY_EVEN,1); //38400 usart1_init(25, 8, 1, USART_PARITY_DISABLED,0); //TODO check oi_alternative OI_ALTERNATE_BAUD_RATE _delay_ms(333); oi_switch_baud_rate(); _delay_ms(333); oi_init(); oi_full_mode(); int i; int val; for (i = 0; i < 10; i++) { val = i % 2; oi_set_leds(val, val, val, val, 0xFF * val, 0xFF); _delay_ms(50); } oi_init(); //input_capture_test(); printf0("Hello world!\r\n"); printf0(" 0x%02X 0x%02X 0x%02X", UCSR0A, UCSR0B, UCSR0C); _delay_ms(3000); doSweepLoop(); doPingLoop(); doIrLoop(); servo_test(); }
int main(int argc, char *argv[]) { int status = 1; int mu; const char *g_name; QDP_ColorMatrix *U[NDIM]; QLA_Real plaq; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 1) { printf0("ERROR: usage: %s Lx ... gauge-file\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) lattice[mu] = atoi(argv[1 + mu]); g_name = argv[1 + NDIM]; /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); /* read gauge field */ if (read_gauge(U, g_name) != 0) { printf0("ERROR: read_gauge(%s)\n", g_name); goto end; } /* Compute plaquette */ plaq = plaquette(U); /* delete the gauge field */ destroy_Mvector(U, NELEMS(U)); /* Display the value */ printf0("plaquette{%s} = %g\n", argv[1], plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); status = 0; end: /* shutdown QDP */ QDP_finalize(); return status; }
static int openMsrFile() { int fd = open("/dev/cpu/0/msr",O_RDONLY); if (fd == -1) { printf0("failed opening msr file: \"%s\". no rapl measurements will be done.\n", strerror(errno)); } return fd; }
static void show_dot(const char *name, QDP_DiracFermion *a, QDP_DiracFermion *b) { QLA_Complex v; QDP_c_eq_D_dot_D(&v, a, b, QDP_all); printf0(" <%s> = %30.20e %+30.20e\n", name, QLA_real(v), QLA_imag(v)); }
void doPingLoop() { timer_prescaler_t prescaler = TIMER_ONE_1024TH; ping_init(prescaler); while (1) { unsigned cm = ping_cm_busy_wait(prescaler); printf0("%d cm\r\n", cm); } }
void servo_test() { servo_data_t rservo; //fake malloc servo_data_t *servo = &rservo; servo_init(servo); servo_calibrate(servo, 8, 35); servo_set_position_deg(servo, 90); while (1) { if (isAvailable0()) { char c = getChar0(); switch (c) { case '0': servo_set_position_deg(servo, 0); break; case '4': servo_set_position_deg(servo, 45); break; case '9': servo_set_position_deg(servo, 90); break; case '3': servo_set_position_deg(servo, 135); break; case '8': servo_set_position_deg(servo, 180); break; case '+': servo_increment_degrees(servo, 1); break; case '-': servo_decrement_degrees(servo, 1); break; default: break; } double calcDeg = servo_calculate_position_deg(servo); char buff[300]; ftoa(buff, calcDeg); printf0("Cur pulse width: %u\t deg: %u\t calcDeg: %s\r\n", servo->cur_pulse_width, servo->desired_deg, buff); } } }
int main(int argc, char *argv[]) { int fpos[NDIM]; int c, d, ri; int gamma; int status = 1; int mu; QDP_DiracFermion *f; QDP_DiracFermion *g; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + 2 * NDIM + 4) { printf0("ERROR: usage: %s Lx ... x ... c d r/i gamma\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) lattice[mu] = atoi(argv[1 + mu]); for (mu = 0; mu < NDIM; mu++) fpos[mu] = atoi(argv[1 + NDIM + mu]); c = atoi(argv[1 + 2 * NDIM]); d = atoi(argv[1 + 2 * NDIM + 1]); ri = atoi(argv[1 + 2 * NDIM + 2]); gamma = atoi(argv[1 + 2 * NDIM + 3]); /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); f = QDP_create_D(); g = QDP_create_D(); point_fermion(f, fpos, c, d, ri); dump_fermion("check-gamma-f", f); QDP_D_eq_gamma_times_D(g, f, gamma, QDP_all); dump_fermion("check-gamma-g", g); QDP_destroy_D(g); QDP_destroy_D(f); status = 0; end: /* shutdown QDP */ QDP_finalize(); return status; }
void qopWilsonSolve(Layout *l, real *x, real *u[8], real mass, real *y, double rsq, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out, *in; in = QDP_create_D(); out = QDP_create_D(); unpackD(l, in, y); unpackD(l, out, x); for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); QOP_evenodd_t eo=QOP_EVENODD; if(sub[0]=='e') { eo = QOP_EVEN; } if(sub[0]=='o') { eo = QOP_ODD; } QOP_info_t info = QOP_INFO_ZERO; QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT; QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT; res_arg.rsqmin = rsq; inv_arg.max_iter = 1000; inv_arg.restart = 500; inv_arg.max_restarts = 5; inv_arg.evenodd = eo; inv_arg.mixed_rsq = 0; QDP_D_eq_zero(out, QDP_even); //QOP_verbose(3); QOP_wilson_invert_qdp(&info, fla, &inv_arg, &res_arg, mass, out, in); //QLA_Real n2; //QDP_r_eq_norm2_D(&n2, (QDP_DiracFermion*)out, QDP_all); printf0("QOP its: %i\n", res_arg.final_iter); packD(l, x, out); QDP_destroy_D(in); QDP_destroy_D(out); for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
int main(int argc, char **argv) { init_mpi(&argc, &argv); if (mpi.rank == 0) { init_gwclock(); } // xtry(MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)); double t = get_gwclock(); cpp_main(atoi(argv[1])); printf0("time_all=%.17g\n", get_gwclock() - t); MPI_Finalize(); return 0; }
void doIrLoop() { //adc_set_vref(ADC_INTERNAL_VREF);//apply 3.1 volts to ADC_AREF (see ) //TODO setup where to get ref voltage from //TODO create method find_good_prescaler_for_adc_based_on_f_cpu //TODO this table is for Jim's IR sensor with 3V applied to AREF //TODO this table's distance values is in inches * 10 (e.g. 36" = 360) //TODO convert to mm or cm list_t *lookup_table = create_jims_ir_sensor_lookup_table(); ir_init(ADC_ONE_64TH, ADC_AREF, 2); while (1) { unsigned voltage = ir_read_voltage_avg(5); unsigned calculatedDist = ir_lookup_distance(lookup_table, voltage); printf0("%d volts \t\t calculatedDist= %u\r\n", voltage, calculatedDist); } }
double bench_inv(QOP_info_t *info, QOP_invert_arg_t *inv_arg, QOP_resid_arg_t *res_arg, QDP_DiracFermion *out, QDP_DiracFermion *in) { static QLA_Real r2s=-1, r2; double sec=0, flop=0, mf=0; int i, iter=0; QOP_DiracFermion *qopout, *qopin; QDP_D_eq_zero(out, QDP_all); qopout = QOP_create_D_from_qdp(out); qopin = QOP_create_D_from_qdp(in); for(i=0; i<=nit; i++) { QMP_barrier(); QOP_wilson_invert(info, flw, inv_arg, res_arg, kappa, qopout, qopin); QMP_barrier(); printf("%i\t%i\t%g\t%i\n", i, res_arg->final_iter, info->final_sec, (int)info->final_flop); if(i>0) { iter += res_arg->final_iter; sec += info->final_sec; flop += info->final_flop; //mf += info->final_flop/(1e6*info->final_sec); } } QOP_destroy_D(qopout); QOP_destroy_D(qopin); QDP_r_eq_norm2_D(&r2, out, QDP_even); if(r2s<0) r2s = r2; if(fabs(1-r2/r2s)>1e-3) { printf0("first norm = %g this norn = %g\n", r2s, r2); } mf = 1; QMP_sum_double(&mf); QMP_sum_double(&sec); QMP_sum_double(&flop); res_arg->final_iter = iter/nit; info->final_sec = sec/(mf*nit); info->final_flop = flop/(mf*nit); mf = info->final_flop/(1e6*info->final_sec); return mf; }
void qopWilsonDslash(Layout *l, real *x, real *u[8], real mass, int sign, real *y, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out, *in; in = QDP_create_D(); out = QDP_create_D(); unpackD(l, in, y); unpackD(l, out, x); for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); QOP_evenodd_t eoOut=QOP_EVENODD, eoIn=QOP_EVENODD; if(sub[0]=='e') { eoOut = QOP_EVEN; eoIn = QOP_ODD; } if(sub[0]=='o') { eoOut = QOP_ODD; eoIn = QOP_EVEN; } real kappa = 0.5/(4+mass); QOP_wilson_dslash_qdp(NULL, fla, kappa, sign, out, in, eoOut, eoIn); QLA_Real n2; QDP_r_eq_norm2_D(&n2, out, QDP_all); printf0("out2: %g\n", n2); packD(l, x, out); QDP_destroy_D(in); QDP_destroy_D(out); for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
double bench_action(QOP_gauge_coeffs_t *coeffs, QOP_Force *out) { double sec=0, flop=0, mf=0; QLA_Real acts, actt; QOP_info_t info = QOP_INFO_ZERO; for(int i=0; i<=nit; i++) { QOP_symanzik_1loop_gauge_action(&info, gauge, &acts, &actt, coeffs); if(i>0) { sec += info.final_sec; flop += info.final_flop; mf += info.final_flop/(1e6*info.final_sec); } } #if 1 printf0("action s: %g t: %g tot: %g\n", acts, actt, acts+actt); coeffs->plaquette /= 4; coeffs->rectangle /= 6; coeffs->parallelogram /= 6; coeffs->adjoint_plaquette /= 8; QLA_Real eps=1; QOP_verbose(QOP_VERB_DEBUG); QOP_symanzik_1loop_gauge_force(&info, gauge, out, coeffs, eps); QOP_verbose(QOP_VERB_OFF); coeffs->plaquette *= 4; coeffs->rectangle *= 6; coeffs->parallelogram *= 6; coeffs->adjoint_plaquette *= 8; #endif secs = sec/nit; flops = flop/nit; return mf/nit; }
void doSweepLoop() { sei(); servo_data_t *servo = create_jim_servo(); ir_init(ADC_ONE_64TH, ADC_AREF, 2); timer_prescaler_t prescaler = TIMER_ONE_1024TH; ping_init(prescaler); oi_t *oiSensor = malloc(sizeof(oi_t)); oi_tare_encoders(&(oiSensor->left_encoder), &(oiSensor->right_encoder)); int velocity = 0; int radius = 0; int leftWheelVelocity = 0; int rightWheelVelocity = 0; oi_full_mode(); ir_enable_continous_mode(); sendPing(); printf0("creating stored ir sensor lookup table...\r\n"); list_t *lookup_table = create_jims_ir_sensor_lookup_table(); printf0("done...\r\n"); while (1) { char c = '\0'; char ping_available = 0; unsigned p_cm; if (volatile_ping_capture_complete) { ping_available = 1; unsigned long end_capture_count = tmr1_read_input_capture_count(); unsigned long end_time_cap = (volatile_timer1_overflows << 16) | end_capture_count; unsigned long delta = end_time_cap - volatile_ping_send_pulse_start_time; p_cm = ping_count_to_cm(prescaler, delta); //send again sendPing(); } int requestIrCalibration = 0; handleInput(servo, &leftWheelVelocity, &rightWheelVelocity, &requestIrCalibration); if (requestIrCalibration) { lfreefree(lookup_table); cli(); lookup_table = create_ir_lookup_table_from_ping(servo, prescaler); sei(); } unsigned voltage = ir_read_voltage_avg(1); unsigned calculatedDist = ir_lookup_distance(lookup_table, voltage); double ir_cm = calculatedDist * 0.254; ir_cm = calculatedDist / 10; char buff[200]; ftoa(buff, ir_cm); //unsigned p_cm; //= ping_cm_busy_wait(prescaler); //p_cm = ping_count_to_cm(prescaler, volatile_timer1_capture_count); unsigned curDeg = servo_calculate_position_deg(servo); //printf0("%d volts \t\t calculatedDist= %u \r\n", voltage, calculatedDist); printf0("%uº \t %s ir_cm [%u v]\t", curDeg, buff, voltage); printf0(" pw=%u\t", servo->cur_pulse_width); if (ping_available) printf0("%u p_cm\t", p_cm); printf0("\r\n"); } }
void handleInput(servo_data_t *servo, unsigned int *leftWheelVelocity, unsigned int *rightWheelVelocity, int *requestIrCalibration) { *requestIrCalibration = 0; char c = '\0'; if (isAvailable0()) { c = getChar0(); switch (c) { case '0': servo_set_position_deg(servo, 0); break; case '4': servo_set_position_deg(servo, 45); break; case '9': servo_set_position_deg(servo, 90); break; case '3': servo_set_position_deg(servo, 135); break; case '8': servo_set_position_deg(servo, 180); break; case '+': servo_increment_degrees(servo, 4); break; case '-': servo_decrement_degrees(servo, 4); break; case 'e': *leftWheelVelocity += 50; oi_set_wheels(*leftWheelVelocity, *rightWheelVelocity); break; case 'd': *leftWheelVelocity -= 50; oi_set_wheels(*leftWheelVelocity, *rightWheelVelocity); break; case 'r': *rightWheelVelocity += 50; oi_set_wheels(*leftWheelVelocity, *rightWheelVelocity); break; case 'f': *rightWheelVelocity -= 50; oi_set_wheels(*leftWheelVelocity, *rightWheelVelocity); break; case 'F': oi_full_mode(); printf0("Full Mode \r\n"); break; case ' ': *rightWheelVelocity = 0; *leftWheelVelocity = 0; oi_set_wheels(0, 0); break; case 'b': *rightWheelVelocity = -50; *leftWheelVelocity = -50; oi_set_wheels(*leftWheelVelocity, *rightWheelVelocity); _delay_ms(4000); // *rightWheelVelocity = 0; *leftWheelVelocity = 0; oi_set_wheels(*leftWheelVelocity, *rightWheelVelocity); break; case 'c': *requestIrCalibration = 1; break; case 's': oi_safe_mode(); printf0("safe mode\r\n"); break; case 'R': printf0("reset\r\n"); oi_reset(); break; default: break; } } }
void start(void) { double mf, best_mf; QLA_Real plaq; QDP_ColorMatrix **u; int i, bs, bsi, best_bs; u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *)); for(i=0; i<ndim; i++) u[i] = QDP_create_M(); get_random_links(u, ndim, 0.3); plaq = get_plaq(u); if(QDP_this_node==0) printf("plaquette = %g\n", plaq); QOP_layout_t qoplayout = QOP_LAYOUT_ZERO; qoplayout.latdim = ndim; qoplayout.latsize = (int *) malloc(ndim*sizeof(int)); for(i=0; i<ndim; i++) { qoplayout.latsize[i] = lattice_size[i]; } qoplayout.machdim = -1; if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); } QOP_init(&qoplayout); gauge = QOP_create_G_from_qdp(u); QOP_Force *force; QDP_ColorMatrix *cm[4]; for(i=0; i<4; i++) { cm[i] = QDP_create_M(); QDP_M_eq_zero(cm[i], QDP_all); } QOP_gauge_coeffs_t gcoeffs = QOP_GAUGE_COEFFS_ZERO; gcoeffs.plaquette = 0.2; gcoeffs.rectangle = 0.2; gcoeffs.parallelogram = 0.2; gcoeffs.adjoint_plaquette = 0.2; force = QOP_create_F_from_qdp(cm); mf = bench_action(&gcoeffs, force); QOP_destroy_F(force); printf0("action: sec%7.4f mflops = %g\n", secs, mf); if(QDP_this_node==0) { printf("begin force\n"); fflush(stdout); } best_mf = 0; best_bs = bsa[0]; for(bsi=0; bsi<bsn; bsi++) { bs = bsa[bsi]; QDP_set_block_size(bs); force = QOP_create_F_from_qdp(cm); mf = bench_force(&gcoeffs, force); QOP_destroy_F(force); printf0("GF: bs%5i sec%7.4f mflops = %g\n", bs, secs, mf); if(mf>best_mf) { best_mf = mf; best_bs = bs; } } QDP_set_block_size(best_bs); QDP_profcontrol(1); force = QOP_create_F_from_qdp(cm); mf = bench_force(&gcoeffs, force); QDP_profcontrol(0); printf0("prof: GF: bs%5i sec%7.4f mflops = %g\n", best_bs, secs, mf); printf0("best: GF: bs%5i mflops = %g\n", best_bs, best_mf); if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); } //QOP_asqtad_invert_unload_links(); if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); } QOP_finalize(); }
list_t *create_ir_lookup_table_from_ping(servo_data_t *servo, timer_prescaler_t prescaler) { int maxVoltage = 1023; int minVoltage = 130; int avg = 5; int speed = 50; //50 mm per second int delay_for_5_mm = 100; oi_set_wheels(0, 0); servo_set_position_deg(servo, 90); printf0("Put roomba in front of flat surface like a wall. Press 'c' when ready...\r\n"); while (getChar0() != 'c') printf0("Put roomba in front of flat surface like a wall. Press 'c' when ready...\r\n"); unsigned voltage = ir_read_voltage_avg(avg); printf0("start: %u\r\n", voltage); while (voltage != maxVoltage) { printf0("%u\r\n", voltage); oi_set_wheels(-speed, -speed); _delay_ms(2 * delay_for_5_mm); //1 cm oi_set_wheels(0, 0); voltage = ir_read_voltage_avg(avg); } _delay_ms(100); unsigned peakVoltage_distance_mm = ping_mm_busy_wait(prescaler); while (voltage == maxVoltage) { printf0("[%u v], %u p_mm \r\n", voltage, peakVoltage_distance_mm); oi_set_wheels(-speed, -speed); _delay_ms(delay_for_5_mm * 2); oi_set_wheels(0, 0); _delay_ms(100); peakVoltage_distance_mm = ping_mm_busy_wait(prescaler); voltage = ir_read_voltage_avg(avg); } list_t *ret = lalloc(); ladd(ret, (void *) new_ir_measurement(maxVoltage, peakVoltage_distance_mm)); unsigned oldMm = peakVoltage_distance_mm; while (voltage > minVoltage) { _delay_ms(100); unsigned mm = ping_mm_busy_wait(prescaler); printf0("reading [%u v], %u mm >? %u oldMm \r\n", voltage, mm, oldMm); while (mm <= oldMm) { printf0("Non increasing ping value: [OLD: %u] [NEW: %u]\r\n", oldMm, mm); _delay_ms(100); mm = ping_mm_busy_wait(prescaler); oi_set_wheels(-speed, -speed); _delay_ms(delay_for_5_mm); //5mm oi_set_wheels(0, 0); } printf0(" adding [%u v], %u mm \r\n", voltage, mm); ladd(ret, (void *) new_ir_measurement(voltage, mm)); oi_set_wheels(-speed, -speed); _delay_ms(delay_for_5_mm * 2); //1cm oi_set_wheels(0, 0); voltage = ir_read_voltage_avg(avg); oldMm = mm; } printf0("sorting...\r\n"); lmergesort(ret, 0, ret->length - 1, (int (*)(const void *, const void *)) compare_ir_measurements); printf0("done\r\n"); return ret; }
/* * Printing implemented parameters */ void read_init_arg(int argc, char *argv[] ) { int opt, p=0, fail=0, mu; optind = 0; while ((opt = getopt(argc, argv, options)) != -1) { switch (opt) { case 'c': conf_file = optarg; break; case 'f': conf_format = atoi(optarg); break; case 'i': init.init_file = optarg; break; case 'L': optind--; mu=0; for ( ; optind < argc && *argv[optind] != '-'; optind++){ if(mu > 3) { printf0("Error: too many arguments in -L.\n"); p++; fail++; break; } init.global_lattice[mu] = atoi(argv[optind]); mu++; } if(mu < 4) { printf0("Warning: too few arguments in -L.\n"); p++; fail++; } break; case 'p': optind--; mu=0; for ( ; optind < argc && *argv[optind] != '-'; optind++){ if(mu > 3) { printf0("Error: too many arguments in -p.\n"); p++; fail++; break; } init.procs[mu] = atoi(argv[optind]); mu++; } if(mu < 4) { printf0("Warning: too few arguments in -p.\n"); p++; } break; case 'B': optind--; mu=0; for ( ; optind < argc && *argv[optind] != '-'; optind++){ if(mu > 3) { printf0("Error: too many arguments in -B.\n"); p++; fail++; break; } init.block_lattice[mu] = atoi(argv[optind]); mu++; } if(mu < 4) { printf0("Warning: too few arguments in -B.\n"); p++; } break; case 'l': init.number_of_levels = atoi(optarg); break; case 'k': init.kappa = atof(optarg); break; case 'w': init.csw = atof(optarg); break; case 'u': init.mu = atof(optarg); break; case 't': init.number_openmp_threads = atoi(optarg); break; case '?': case 'h': p++; break; default: break; } } if(conf_file == NULL) { printf0("Error: configuration file is missing (use -c PATH).\n"); p++; fail++; } if(p) { help(argv[0]); MPI_Abort(MPI_COMM_WORLD,0); } if(fail) MPI_Abort(MPI_COMM_WORLD,0); }
void read_params_arg(int argc, char *argv[] ) { int opt, p=0, fail=0, mu; optind = 0; while ((opt = getopt(argc, argv, options)) != -1) { switch (opt) { case 'r': residual = atof(optarg); break; case 'K': params.kcycle_tolerance = atof(optarg); break; case 'C': params.coarse_tolerance = atof(optarg); break; case 'V': optind--; mu=0; for ( ; optind < argc && *argv[optind] != '-'; optind++){ if(mu > 2) { printf0("Error: too many arguments in -V.\n"); p++; fail++; break; } params.mg_basis_vectors[mu] = atoi(argv[optind]); mu++; } break; case 'm': optind--; params.mu_factor[0]=1; mu=1; for ( ; optind < argc && *argv[optind] != '-'; optind++){ if(mu > 3) { printf0("Error: too many arguments in -m.\n"); p++; fail++; break; } params.mu_factor[mu] = atof(argv[optind]); mu++; } break; case 's': optind--; mu=0; for ( ; optind < argc && *argv[optind] != '-'; optind++){ if(mu > 2) { printf0("Error: too many arguments in -s.\n"); p++; fail++; break; } params.setup_iterations[mu] = atoi(argv[optind]); mu++; } break; case 'v': params.print = 1; break; case '?': default: break; } } if(conf_file == NULL) { printf0("Error: configuration file is missing (use -c PATH).\n"); p++; fail++; } if(p) { help(argv[0]); MPI_Abort(MPI_COMM_WORLD,0); } if(fail) MPI_Abort(MPI_COMM_WORLD,0); }
// reservation server protocol: // send a MSG_RESERVE_REQUEST with the list of nodes you want to reserve // if blocking, you'll block until the nodes are all available, at which point you'll get an ok // if non-blocking, you'll immediately either get an ok or a fail, depending on whether the request is satisfiable // when you want to unreserve, send a MSG_RESERVE_RELEASE with the nodes you want to release; you'll get NOTHING BACK void reservation_server(void) { // allocate a list of blocked tasks and the requests they made BlockList bl[MAX_ENGINEERS]; // initialize all of the bl elements to invalid int blIndex; for (blIndex=0; blIndex<MAX_ENGINEERS; blIndex++) { bl[blIndex].tid = -1; // tid == -1 means invalid } // deadlock detection algorithm: // * maintain hold list (hl) in addition to bl. // * accomodateRequest also returns a list of engineers I'm waiting on // * if I do need to block, this list is necessarily non-empty // * then check the block lists for these same engineers and see if they're blocked on anything I hold // * if so, that's a deadlock. // but there's no way I can implement this tonight. RegisterAs("reservationserver"); FOREVER { // receive buffers int tid; MsgReservation request; MsgReservation reply; Receive(&tid, (char *)&request, sizeof(MsgReservation)); switch(request.type) { case MSG_RESERVE_REQUEST_BLOCKING: ; Node *rcNode = resConflict(request.numNodes, request.nodes, tid); if (rcNode == NULL) { // if the request can be accomodated, log it logReservation(request.numNodes, request.nodes, tid, request.iAmPacman, request.trainNum, request.curDir); } else { // else if the request cannot be accomodated // deadlock detection printf0("Res Serv.: Checking for deadlock...\n\r"); // the tid of the task that currently has this reservation int otherTid = (rcNode->type == NODE_SWITCH) ? ((Switch *)(rcNode))->reserverTid : ((Sensor *)(rcNode))->reserverTid; int blIndex = findTidInBlockList(otherTid, bl); printf2("Res Serv.: otherTid: %d, otherIsblocked? %d!\n\r", otherTid, blIndex != -1); // if this other tid is blocked and wants a node we own, that's a deadlock if (blIndex != -1 && blWantsNodeHeldBy(bl[blIndex], tid)) { // we now know we have a deadlock, so it's time to resolve it printf2("Res Serv.: DEADLOCK between tr. %d and tr. %d!\n\r", request.trainNum, bl[blIndex].trainNum); // if we're the pacman or // the other one isn't a pacman and we have the lower train number if (request.iAmPacman || (!((rcNode->type == NODE_SWITCH) ? ((Switch *)rcNode)->reserverIAmPacman : ((Sensor *)rcNode)->reserverIAmPacman) && (request.trainNum <= ((rcNode->type == NODE_SWITCH) ? ((Switch *)rcNode)->reserverTrainNum : ((Sensor *)rcNode)->reserverTrainNum) ))) { // find a node to move to Node *nodeToMoveTo = moveAway(request.trainLoc, request.curDir == 'F' ? 'B' : 'F'); printf1("Res Serv.: GTFO self (train %d) to ", request.trainNum); if (nodeToMoveTo == NULL) { printf0("NULL"); } else { printNode2(nodeToMoveTo); } printf1(", curDir %c!\n\r", (int)request.curDir); if (nodeToMoveTo == NULL) { // if we can't move away, it's the game over case printf0("res. serv.: case 1.\n\r"); int pacmanServerTid = WhoIs("pacmanserver"); char c = MSG_PACMAN_GAMEOVER; int retVal = Send(pacmanServerTid, &c, 1, NULL, 0); printf1("res. serv.: retVal from Reply to PMS: %d\n\r", retVal); reply.type = MSG_RESERVE_GAMEOVER; retVal = Reply(tid, (char *)&reply, sizeof(MsgReservation)); printf1("res. serv.: retVal from Reply to train: %d\n\r", retVal); int i; for (i=0; i<MAX_ENGINEERS; i++) { if (bl[i].tid != -1) { reply.type = MSG_RESERVE_GAMEOVER; retVal = Reply(bl[blIndex].tid, (char *)&reply, sizeof(MsgReservation)); printf1("res. serv.: retVal from Reply to train: %d\n\r", retVal); } } } else { // else if we can move away, send the move reply reply.type = MSG_RESERVE_MOVE; reply.numNodes = 1; reply.nodes[0] = nodeToMoveTo; printf0("res. serv.: about to reply case 2.\n\r"); int retVal = Reply(tid, (char *)&reply, sizeof(MsgReservation)); printf1("res. serv.: retVal from Reply to train: %d\n\r", retVal); } } else { // else the other train needs to move out of the way // find a node to move to Node *nodeToMoveTo = moveAway(bl[blIndex].trainLoc, bl[blIndex].curDir == 'F' ? 'B' : 'F'); printf1("Res Serv.: GTFO other (train %d) to ", bl[blIndex].trainNum); if (nodeToMoveTo == NULL) { printf0("NULL"); } else { printNode2(nodeToMoveTo); } printf1(", curDir %c!\n\r", (int)bl[blIndex].curDir); if (nodeToMoveTo == NULL) { // if we can't move away, it's the game over case printf0("res. serv.: case 1.\n\r"); int pacmanServerTid = WhoIs("pacmanserver"); char c = MSG_PACMAN_GAMEOVER; int retVal = Send(pacmanServerTid, &c, 1, NULL, 0); printf1("res. serv.: retVal from Reply to PMS: %d\n\r", retVal); reply.type = MSG_RESERVE_GAMEOVER; retVal = Reply(tid, (char *)&reply, sizeof(MsgReservation)); printf1("res. serv.: retVal from Reply to train: %d\n\r", retVal); int i; for (i=0; i<MAX_ENGINEERS; i++) { if (bl[i].tid != -1) { reply.type = MSG_RESERVE_GAMEOVER; retVal = Reply(bl[blIndex].tid, (char *)&reply, sizeof(MsgReservation)); printf1("res. serv.: retVal from Reply to train: %d\n\r", retVal); } } } else { // else if we can move away, send the move reply reply.type = MSG_RESERVE_MOVE; reply.numNodes = 1; reply.nodes[0] = nodeToMoveTo; printf0("res. serv.: about to reply case 2.\n\r"); int retVal = Reply(bl[blIndex].tid, (char *)&reply, sizeof(MsgReservation)); printf1("res. serv.: retVal from Reply to train: %d\n\r", retVal); // remove the other train from the block list, since he's no longer going to be blocked bl[blIndex].tid = -1; // now, block the current train, waiting for the other one to get ouf of the way blockTask(tid, request.iAmPacman, request.trainNum, request.trainLoc, request.curDir, request.numNodes, request.nodes, bl); } } } else { // else if it's not a deadlock, just a reservation conflict, handle it normally by blocking blockTask(tid, request.iAmPacman, request.trainNum, request.trainLoc, request.curDir, request.numNodes, request.nodes, bl); } } break; case MSG_RESERVE_REQUEST_NONBLOCKING: if (resConflict(request.numNodes, request.nodes, tid) == NULL) { // if the request can be accomodated, log it and reply logReservation(request.numNodes, request.nodes, tid, request.iAmPacman, request.trainNum, request.curDir); reply.type = MSG_RESERVE_OK; Reply(tid, (char *)&reply, sizeof(MsgReservation)); } else { // else if the request cannot be accomodated, send a fail message back reply.type = MSG_RESERVE_FAIL; Reply(tid, (char *)&reply, sizeof(MsgReservation)); } break; case MSG_RESERVE_RELEASE: logRelease(request.numNodes, request.nodes, request.trainNum); // log the release Reply(tid, NULL, 0); // reply with a NULL buffer // now, check if any waiting task can now be awoken due to the release for (blIndex = 0; blIndex<MAX_ENGINEERS; blIndex++) { // if the bl entry is valid and we can now accomodate it, do so if (bl[blIndex].tid != -1 && resConflict(bl[blIndex].numNodes, bl[blIndex].nodes, bl[blIndex].tid) == NULL) { // log the reservation logReservation(bl[blIndex].numNodes, bl[blIndex].nodes, bl[blIndex].tid, bl[blIndex].iAmPacman, bl[blIndex].trainNum, bl[blIndex].curDir); // reply to the task, telling it the reserve has finally been processed reply.type = MSG_RESERVE_OK; Reply(bl[blIndex].tid, (char *)&reply, sizeof(MsgReservation)); // ...and invalidate its entry in the block list bl[blIndex].tid = -1; } } break; default: bwprintf(COM2, "ERROR: illegal reservation request type %d! Halt!!!", request.type); Halt(); break; } // switch } // FOREVER return; // can't happen due to above FOREVER loop }
int main( int argc, char *argv[] ) { MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); standard_init(); read_init_arg(argc, argv); printf0("Running initialization...\n"); DDalphaAMG_initialize( &init, ¶ms, &status ); printf0("Initialized %d levels in %.2f sec\n", status.success, status.time); int nlvl = status.success; read_params_arg(argc, argv); comm_cart = DDalphaAMG_get_communicator(); MPI_Comm_rank( comm_cart, &rank ); printf0("Running updating\n"); DDalphaAMG_update_parameters( ¶ms, &status ); if (status.success) printf0("Updating time %.2f sec\n", status.time); /* * Reading the configuration. In plaq, it returns the plaquette value * if provided in the configuration header. */ double *gauge_field; int vol = init.global_lattice[T] * init.global_lattice[X] * init.global_lattice[Y] * init.global_lattice[Z] / init.procs[T] / init.procs[X] / init.procs[Y] / init.procs[Z]; gauge_field = (double *) malloc(18*4*vol*sizeof(double)); printf0("Reading config.\n"); DDalphaAMG_read_configuration( gauge_field, conf_file, conf_format, &status ); printf0("Reading configuration time %.2f sec\n", status.time); printf0("Desired plaquette %.13lf\n", status.info); printf0("Setting config.\n"); DDalphaAMG_set_configuration( gauge_field, &status ); printf0("Setting configuration time %.2f sec\n", status.time); printf0("Computed plaquette %.13lf\n", status.info); printf0("Running setup\n"); DDalphaAMG_setup( &status ); printf0("Run %d setup iterations in %.2f sec (%.1f %% on coarse grid)\n", status.success, status.time, 100.*(status.coarse_time/status.time)); printf0("Total iterations on fine grid %d\n", status.iter_count); printf0("Total iterations on coarse grids %d\n", status.coarse_iter_count); /* * Defining fine and coarse vector randomly. */ double *vector1[nlvl], *vector2[nlvl]; int vols[nlvl], vars[nlvl]; vols[0]=vol; vars[0]=3*4*2; for ( int i=1; i<nlvl; i++ ) { vols[i] = vols[i-1] / params.block_lattice[i-1][T] / params.block_lattice[i-1][X] / params.block_lattice[i-1][Y] / params.block_lattice[i-1][Z]; vars[i]=params.mg_basis_vectors[i-1]*2*2; // a factor of 2 is for the spin, the other for the complex } for ( int i=0; i<nlvl; i++ ) { vector1[i] = (double *) malloc(vars[i]*vols[i]*sizeof(double)); vector2[i] = (double *) malloc(vars[i]*vols[i]*sizeof(double)); } for ( int i=0; i<nlvl; i++ ) for ( int j=0; j<vars[i]*vols[i]; j++ ) vector1[i][j] = ((double)rand()/(double)RAND_MAX)-0.5; for ( int i=1; i<nlvl; i++ ) { printf0("Testing RP=1 on level %d\n",i); DDalphaAMG_prolongate(vector2[i-1], vector1[i], i-1, &status); DDalphaAMG_restrict(vector2[i], vector2[i-1], i-1, &status); double num=0, den=0; for ( int j=0; j<vars[i]*vols[i]; j++ ) { vector2[i][j] -= vector1[i][j]; num += vector2[i][j]*vector2[i][j]; den += vector1[i][j]*vector1[i][j]; } printf0("Restult (1-RP)v = %e\n\n", num/den); } for ( int i=1; i<nlvl; i++ ) { printf0("Testing coarse operator on level %d\n",i); DDalphaAMG_prolongate(vector1[i-1], vector1[i], i-1, &status); DDalphaAMG_apply_coarse_operator(vector2[i-1], vector1[i-1], i-1, &status); DDalphaAMG_restrict(vector2[i], vector2[i-1], i-1, &status); DDalphaAMG_apply_coarse_operator(vector1[i], vector1[i], i, &status); double num=0, den=0; for ( int j=0; j<vars[i]*vols[i]; j++ ) { vector2[i][j] -= vector1[i][j]; num += vector2[i][j]*vector2[i][j]; den += vector1[i][j]*vector1[i][j]; } printf0("Restult (D_c-RDP)v = %e\n\n", num/den); } // free(vector_in); // free(vector_out); //free(gauge_field); // DDalphaAMG_finalize(); MPI_Finalize(); }
void qopWilsonSolveMulti(Layout *l, real *x[], real *u[8], double masses[], real *y, int nmasses, double rsq, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out[nmasses], *in, **outp; outp = out; in = QDP_create_D(); unpackD(l, in, y); for(int i=0; i<nmasses; i++) { out[i] = QDP_create_D(); unpackD(l, out[i], x[i]); QDP_D_eq_zero(out[i], QDP_even); } for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); #if 0 QOP_evenodd_t eo = QOP_EVENODD; if(sub[0]=='e') { eo = QOP_EVEN; } if(sub[0]=='o') { eo = QOP_ODD; } #endif QOP_evenodd_t eo = QOP_EVEN; QOP_info_t info = QOP_INFO_ZERO; QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT; inv_arg.max_iter = 1000; inv_arg.restart = 500; inv_arg.max_restarts = 5; inv_arg.evenodd = eo; inv_arg.mixed_rsq = 0; QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT; res_arg.rsqmin = rsq; QOP_resid_arg_t *ra[nmasses]; QOP_resid_arg_t **rap = ra; real mf[nmasses], *mfp; mfp = mf; for(int i=0; i<nmasses; i++) { ra[i] = &res_arg; mf[i] = masses[i]; } //QOP_verbose(3); QOP_wilson_invert_multi_qdp(&info, fla, &inv_arg, &rap, &mfp, &nmasses, &outp, &in, 1); //QLA_Real n2; //QDP_r_eq_norm2_D(&n2, (QDP_DiracFermion*)out, QDP_all); printf0("QOP its: %i\n", res_arg.final_iter); QDP_destroy_D(in); for(int i=0; i<nmasses; i++) { packD(l, x[i], out[i]); QDP_destroy_D(out[i]); } for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
static void printContext(Context *c) { printf0("# context p %3d, \n", c->processCount); }
int main(int argc, char *argv[]) { int status = 1; int mu, i; struct QOP_CLOVER_State *clover_state; QDP_Int *I_seed; int i_seed; QDP_RandomState *state; QLA_Real plaq; QLA_Real n[NELEMS(F)]; struct QOP_CLOVER_Gauge *c_g; struct QOP_CLOVER_Fermion *c_f[NELEMS(F)]; double kappa; double c_sw; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 3) { printf0("ERROR: usage: %s Lx ... seed kappa c_sw\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) { lattice[mu] = atoi(argv[1 + mu]); } i_seed = atoi(argv[1 + NDIM]); kappa = atof(argv[2 + NDIM]); c_sw = atof(argv[3 + NDIM]); /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); primary = QMP_is_primary_node(); self = QMP_get_node_number(); get_vector(network, 1, QMP_get_logical_number_of_dimensions(), QMP_get_logical_dimensions()); get_vector(node, 0, QMP_get_logical_number_of_dimensions(), QMP_get_logical_coordinates()); printf0("network: "); for (i = 0; i < NDIM; i++) printf0(" %d", network[i]); printf0("\n"); printf0("node: "); for (i = 0; i < NDIM; i++) printf0(" %d", node[i]); printf0("\n"); printf0("kappa: %20.15f\n", kappa); printf0("c_sw: %20.15f\n", c_sw); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); create_Mvector(C, NELEMS(C)); create_Dvector(F, NELEMS(F)); I_seed = QDP_create_I(); QDP_I_eq_funci(I_seed, icoord, QDP_all); state = QDP_create_S(); QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all); for (mu = 0; mu < NELEMS(U); mu++) { QDP_M_eq_gaussian_S(U[mu], state, QDP_all); } for (i = 0; i < NELEMS(F); i++) { QDP_D_eq_gaussian_S(F[i], state, QDP_all); } /* build the clovers */ clover(C, U); /* initialize CLOVER */ if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary, sublattice, NULL)) { printf0("CLOVER_init() failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) { printf0("CLOVER_import_fermion(0) failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[1], clover_state, f_reader, F[1])) { printf0("CLOVER_import_fermion(1) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) { printf0("CLOVER_allocate_fermion(2) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) { printf0("CLOVER_allocate_fermion(3) failed\n"); goto end; } if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw, u_reader, c_reader, NULL)) { printf("CLOVER_import_gauge() failed\n"); goto end; } QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]); QOP_CLOVER_export_fermion(f_writer, F[2], c_f[2]); QOP_CLOVER_D_operator_conjugated(c_f[3], c_g, c_f[1]); QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]); /* free CLOVER */ QOP_CLOVER_free_gauge(&c_g); for (i = 0; i < NELEMS(c_f); i++) QOP_CLOVER_free_fermion(&c_f[i]); QOP_CLOVER_fini(&clover_state); /* Compute plaquette */ plaq = plaquette(U); /* field norms */ for (i = 0; i < NELEMS(F); i++) QDP_r_eq_norm2_D(&n[i], F[i], QDP_all); /* Display the values */ printf0("plaquette = %g\n", plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); for (i = 0; i < NELEMS(F); i++) printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i])); /* Compute and display <f[1] f[2]> */ show_dot("1|D0", F[1], F[2]); /* Compute and display <f[3] f[0]> */ show_dot("X1|0", F[3], F[0]); QDP_destroy_S(state); QDP_destroy_I(I_seed); destroy_Mvector(U, NELEMS(U)); destroy_Mvector(C, NELEMS(C)); destroy_Dvector(F, NELEMS(F)); status = 0; end: /* shutdown QDP */ printf0("end\n"); QDP_finalize(); return status; }
void start(void) { double mf, best_mf; QLA_Real plaq; QDP_ColorMatrix **u; QDP_DiracFermion *out, *in; int i, st, ns, nm, bs, sti, nsi, nmi, bsi, best_st, best_ns, best_nm, best_bs; u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *)); for(i=0; i<ndim; i++) u[i] = QDP_create_M(); get_random_links(u, ndim, 0.2); plaq = get_plaq(u); if(QDP_this_node==0) printf("plaquette = %g\n", plaq); out = QDP_create_D(); in = QDP_create_D(); QDP_D_eq_gaussian_S(in, rs, QDP_all); QOP_layout_t qoplayout = QOP_LAYOUT_ZERO; qoplayout.latdim = ndim; qoplayout.latsize = (int *) malloc(ndim*sizeof(int)); for(i=0; i<ndim; i++) { qoplayout.latsize[i] = lattice_size[i]; } qoplayout.machdim = -1; QOP_info_t info = QOP_INFO_ZERO; QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT; QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT; res_arg.rsqmin = rsqmin; inv_arg.max_iter = 600; inv_arg.restart = 200; inv_arg.evenodd = QOP_EVEN; if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); } QOP_init(&qoplayout); if(QDP_this_node==0) { printf("begin load links\n"); fflush(stdout); } //flw = QOP_wilson_create_L_from_qdp(u, NULL); if(QDP_this_node==0) { printf("begin invert\n"); fflush(stdout); } if(cgtype>=0) { QOP_opt_t optcg; optcg.tag = "cg"; optcg.value = cgtype; QOP_wilson_invert_set_opts(&optcg, 1); } best_mf = 0; best_st = sta[0]; best_ns = nsa[0]; best_nm = nma[0]; best_bs = bsa[0]; QOP_opt_t optst; optst.tag = "st"; QOP_opt_t optns; optns.tag = "ns"; QOP_opt_t optnm; optnm.tag = "nm"; for(sti=0; sti<stn; sti++) { if((style>=0)&&(sti!=style)) continue; st = sta[sti]; optst.value = st; if(QOP_wilson_invert_set_opts(&optst, 1)==QOP_FAIL) continue; for(nsi=0; nsi<nsn; nsi++) { ns = nsa[nsi]; optns.value = ns; if(QOP_wilson_invert_set_opts(&optns, 1)==QOP_FAIL) continue; for(nmi=0; nmi<nmn; nmi++) { nm = nma[nmi]; if(nm==0) nm = ns; optnm.value = nm; if(QOP_wilson_invert_set_opts(&optnm, 1)==QOP_FAIL) continue; for(bsi=0; bsi<bsn; bsi++) { bs = bsa[bsi]; QDP_set_block_size(bs); flw = QOP_wilson_create_L_from_qdp(u, NULL); mf = bench_inv(&info, &inv_arg, &res_arg, out, in); QOP_wilson_destroy_L(flw); printf0("CONGRAD: st%2i ns%2i nm%2i bs%5i iter%5i sec%7.4f mflops = %g\n", st, ns, nm, bs, res_arg.final_iter, info.final_sec, mf); if(mf>best_mf) { best_mf = mf; best_st = st; best_ns = ns; best_nm = nm; best_bs = bs; } } } } } flw = QOP_wilson_create_L_from_qdp(u, NULL); optst.value = best_st; optns.value = best_ns; optnm.value = best_nm; QOP_wilson_invert_set_opts(&optst, 1); QOP_wilson_invert_set_opts(&optns, 1); QOP_wilson_invert_set_opts(&optnm, 1); QDP_set_block_size(best_bs); QDP_profcontrol(1); mf = bench_inv(&info, &inv_arg, &res_arg, out, in); QDP_profcontrol(0); printf0("prof: CONGRAD: st%2i ns%2i nm%2i bs%5i iter%5i sec%7.4f mflops = %g\n", best_st, best_ns, best_nm, best_bs, res_arg.final_iter, info.final_sec, mf); printf0("best: CONGRAD: st%2i ns%2i nm%2i bs%5i mflops = %g\n", best_st, best_ns, best_nm, best_bs, best_mf); if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); } //QOP_wilson_invert_unload_links(); if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); } QOP_finalize(); }
int main(int argc, char *argv[]) { const char *msg; int status = 1; int mu, i; struct QOP_CLOVER_State *clover_state; QDP_Int *I_seed; int i_seed; QDP_RandomState *state; QLA_Real plaq; QLA_Real n[NELEMS(F)]; struct QOP_CLOVER_Gauge *c_g; struct QOP_CLOVER_Fermion *c_f[NELEMS(F)]; double kappa; double c_sw; double in_eps; int in_iter; int log_flag; double out_eps; int out_iter; int cg_status; double run_time; long long flops, sent, received; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 6) { printf0("ERROR: usage: %s Lx ... seed kappa c_sw iter eps log?\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) { lattice[mu] = atoi(argv[1 + mu]); } i_seed = atoi(argv[1 + NDIM]); kappa = atof(argv[2 + NDIM]); c_sw = atof(argv[3 + NDIM]); in_iter = atoi(argv[4 + NDIM]); in_eps = atof(argv[5 + NDIM]); log_flag = atoi(argv[6 + NDIM]) == 0? 0: QOP_CLOVER_LOG_EVERYTHING; /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); primary = QMP_is_primary_node(); self = QMP_get_node_number(); get_vector(network, 1, QMP_get_logical_number_of_dimensions(), QMP_get_logical_dimensions()); get_vector(node, 0, QMP_get_logical_number_of_dimensions(), QMP_get_logical_coordinates()); printf0("network: "); for (i = 0; i < NDIM; i++) printf0(" %d", network[i]); printf0("\n"); printf0("node: "); for (i = 0; i < NDIM; i++) printf0(" %d", node[i]); printf0("\n"); printf0("kappa: %20.15f\n", kappa); printf0("c_sw: %20.15f\n", c_sw); printf0("in_iter: %d\n", in_iter); printf0("in_eps: %15.2e\n", in_eps); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); create_Mvector(C, NELEMS(C)); create_Dvector(F, NELEMS(F)); I_seed = QDP_create_I(); QDP_I_eq_funci(I_seed, icoord, QDP_all); state = QDP_create_S(); QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all); for (mu = 0; mu < NELEMS(U); mu++) { QDP_M_eq_gaussian_S(U[mu], state, QDP_all); } for (i = 0; i < NELEMS(F); i++) { QDP_D_eq_gaussian_S(F[i], state, QDP_all); } /* build the clovers */ clover(C, U); /* initialize CLOVER */ if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary, sublattice, NULL)) { printf0("CLOVER_init() failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) { printf0("CLOVER_import_fermion(0) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[1], clover_state)) { printf0("CLOVER_allocate_fermion(1) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) { printf0("CLOVER_allocate_fermion(2) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) { printf0("CLOVER_allocate_fermion(3) failed\n"); goto end; } if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw, u_reader, c_reader, NULL)) { printf("CLOVER_import_gauge() failed\n"); goto end; } QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]); cg_status = QOP_CLOVER_D_CG(c_f[3], &out_iter, &out_eps, c_f[2], c_g, c_f[2], in_iter, in_eps, log_flag); msg = QOP_CLOVER_error(clover_state); QOP_CLOVER_performance(&run_time, &flops, &sent, &received, clover_state); QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]); printf0("CG status: %d\n", cg_status); printf0("CG error message: %s\n", msg? msg: "<NONE>"); printf0("CG iter: %d\n", out_iter); printf0("CG eps: %20.10e\n", out_eps); printf0("CG performance: runtime %e sec\n", run_time); printf0("CG performance: flops %.3e MFlop/s (%lld)\n", flops * 1e-6 / run_time, flops); printf0("CG performance: snd %.3e MB/s (%lld)\n", sent * 1e-6 / run_time, sent); printf0("CG performance: rcv %.3e MB (%lld)/s\n", received * 1e-6 / run_time, received); /* free CLOVER */ QOP_CLOVER_free_gauge(&c_g); for (i = 0; i < NELEMS(c_f); i++) QOP_CLOVER_free_fermion(&c_f[i]); QOP_CLOVER_fini(&clover_state); /* Compute plaquette */ plaq = plaquette(U); /* field norms */ for (i = 0; i < NELEMS(F); i++) QDP_r_eq_norm2_D(&n[i], F[i], QDP_all); /* Display the values */ printf0("plaquette = %g\n", plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); for (i = 0; i < NELEMS(F); i++) printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i])); /* Compute and display <f[1] f[0]> */ show_dot("1|orig", F[1], F[0]); /* Compute and display <f[1] f[3]> */ show_dot("1|solv", F[1], F[3]); QDP_destroy_S(state); QDP_destroy_I(I_seed); destroy_Mvector(U, NELEMS(U)); destroy_Mvector(C, NELEMS(C)); destroy_Dvector(F, NELEMS(F)); status = 0; end: /* shutdown QDP */ printf0("end\n"); QDP_finalize(); return status; }
/* * Printing implemented parameters */ void help( char * arg0 ) { static int printed = 0; if(!printed) { printf0("\n\n"); printf0("Usage: %s -c <conf> [<option(s)>]\n", arg0); printf0(" -c PATH Configuration to load\n"); printf0(" -f # Configuration format (0 -> DDalphaAMG, 1 -> Lime)\n"); printf0(" -i PATH Input file (optional)\n"); printf0(" -L T Y X Z Lattice size in each direction\n"); printf0(" -p T Y X Z Processors in each direction\n"); printf0(" -B T Y X Z Block size in each direction on first level.\n"); printf0(" -k # kappa for the configuration\n"); printf0(" -w # c_sw for the configuration\n"); printf0(" -u # mu for the configuration\n"); printf0(" -t # Number of OpenMp threads\n"); printf0(" -r # Relative residual\n"); printf0(" -K # K-cycle tolerance\n"); printf0(" -C # Tolerance on coarsest grid\n"); printf0(" -l # Number of levels, l (from 1 to 4)\n"); printf0(" -V 1 [2] [3] Basis vectors between each level (l-1)\n"); printf0(" -m 2 [3] [4] Factor for mu on coarse levels\n"); printf0(" -s 1 [2] [3] Setup iterations on each level (l-1)\n"); printf0(" -v Verbose\n"); } printf0("\n\n"); printed++; }