void fifo_send_char(uint8_t c) { #ifdef __AVR_XMEGA__ if ((FIFO_CTL_PORT.IN & _BV(FIFO_TXE_N)) != _BV(FIFO_TXE_N)) { FIFO_DATA_PORT.DIR = 0xff; #ifdef FIFO_BIT_REVERSE REVERSE(c); #endif FIFO_DATA_PORT.OUT = c; FIFO_DATA_PORT.DIR = 0xff; FIFO_CTL_PORT.OUTCLR = _BV(FIFO_WR_N); FIFO_DATA_PORT.DIR = 0; FIFO_CTL_PORT.OUTSET = _BV(FIFO_WR_N); } #else // __AVR_XMEGA__ if ((FIFO_CTL_PORT_PIN & _BV(FIFO_TXE_N)) != _BV(FIFO_TXE_N)) { FIFO_DATA_PORT_DDR = 0xff; #ifdef FIFO_BIT_REVERSE REVERSE(c); #endif FIFO_DATA_PORT = c; FIFO_DATA_PORT_DDR = 0xff; FIFO_CTL_PORT &= ~_BV(FIFO_WR_N); FIFO_DATA_PORT_DDR = 0; FIFO_CTL_PORT |= _BV(FIFO_WR_N); } #endif // __AVR_XMEGA__ }
void __attribute__ ((always_inline)) fifo_send_char(uint8_t c) { #ifdef __AVR_XMEGA__ if ((FIFO_CTL_PORT.IN & FIFO_TXE_N_bm) != FIFO_TXE_N_bm) { FIFO_DATA_PORT.DIR = 0xff; #ifdef FIFO_BIT_REVERSE REVERSE(c); #endif FIFO_DATA_PORT.OUT = c; FIFO_DATA_PORT.DIR = 0xff; FIFO_CTL_PORT.OUTCLR = FIFO_WR_N_bm; FIFO_DATA_PORT.DIR = 0; FIFO_CTL_PORT.OUTSET = FIFO_WR_N_bm; } #else // __AVR_XMEGA__ if ((FIFO_CTL_PORT_PIN & FIFO_TXE_N_bm) != FIFO_TXE_N_bm) { FIFO_DATA_PORT_DDR = 0xff; #ifdef FIFO_BIT_REVERSE REVERSE(c); #endif FIFO_DATA_PORT = c; FIFO_DATA_PORT_DDR = 0xff; FIFO_CTL_PORT &= ~FIFO_WR_N_bm; FIFO_DATA_PORT_DDR = 0; FIFO_CTL_PORT |= FIFO_WR_N_bm; } #endif // __AVR_XMEGA__ }
static int s353xxa_rtc_set_time(struct device *dev, struct rtc_time *t) { struct i2c_client *client = to_i2c_client(dev); struct i2c_msg msgs[1]; u8 buf[7]; int ret; DEBUG_FUNC(); DEBUG_INFO("SET : %d/%d/%d(%d) %d:%d:%d\n", t->tm_year + 2000, t->tm_mon + 1, t->tm_mday, t->tm_wday, t->tm_hour, t->tm_min, t->tm_sec); t->tm_year -= 100; t->tm_mon += 1; buf[0] = REVERSE(BIN2BCD(t->tm_year)); buf[1] = REVERSE(BIN2BCD(t->tm_mon)); buf[2] = REVERSE(BIN2BCD(t->tm_mday)); buf[3] = REVERSE(BIN2BCD(t->tm_wday)); buf[4] = REVERSE(BIN2BCD(t->tm_hour)); buf[5] = REVERSE(BIN2BCD(t->tm_min)); buf[6] = REVERSE(BIN2BCD(t->tm_sec)); set_i2c_msg(&msgs[0], client->addr | 0x2, 0, 7, buf); ret = i2c_transfer(client->adapter, msgs, 1); if (ret != 1) { return -EIO; } return 0; }
static int s353xxa_rtc_read_time(struct device *dev, struct rtc_time *t) { struct i2c_client *client = to_i2c_client(dev); struct i2c_msg msgs[1]; u8 buf[7]; int ret; DEBUG_FUNC(); set_i2c_msg(&msgs[0], client->addr | 0x2, I2C_M_RD, 7, buf); ret = i2c_transfer(client->adapter, msgs, 1); if (ret != 1) return -EIO; t->tm_year = BCD2BIN(REVERSE(buf[0])); t->tm_mon = BCD2BIN(REVERSE(buf[1])); t->tm_mday = BCD2BIN(REVERSE(buf[2])); t->tm_wday = BCD2BIN(REVERSE(buf[3])); t->tm_hour = BCD2BIN(REVERSE(buf[4]) & 0x3f); t->tm_min = BCD2BIN(REVERSE(buf[5])); t->tm_sec = BCD2BIN(REVERSE(buf[6])); t->tm_year += 100; t->tm_mon -= 1; DEBUG_INFO("READ: %d/%d/%d(%d) %d:%d:%d\n", t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, t->tm_wday, t->tm_hour, t->tm_min, t->tm_sec); return 0; }
uint8_t __attribute__ ((always_inline)) fifo_cur_char(void) { uint8_t ret; #ifdef __AVR_XMEGA__ FIFO_CTL_PORT.OUTCLR = FIFO_RD_N_bm; ret = FIFO_DATA_PORT.IN; #ifdef FIFO_BIT_REVERSE REVERSE(ret); #endif FIFO_CTL_PORT.OUTSET = FIFO_RD_N_bm; #else // __AVR_XMEGA__ FIFO_CTL_PORT &= ~FIFO_RD_N_bm; ret = FIFO_DATA_PORT_PIN; #ifdef FIFO_BIT_REVERSE REVERSE(ret); #endif FIFO_CTL_PORT |= FIFO_RD_N_bm; #endif // __AVR_XMEGA__ return ret; }
uint8_t fifo_cur_char(void) { uint8_t ret; #ifdef __AVR_XMEGA__ FIFO_CTL_PORT.OUTCLR = _BV(FIFO_RD_N); ret = FIFO_DATA_PORT.IN; #ifdef FIFO_BIT_REVERSE REVERSE(ret); #endif FIFO_CTL_PORT.OUTSET = _BV(FIFO_RD_N); #else // __AVR_XMEGA__ FIFO_CTL_PORT &= ~_BV(FIFO_RD_N); ret = FIFO_DATA_PORT_PIN; #ifdef FIFO_BIT_REVERSE REVERSE(ret); #endif FIFO_CTL_PORT |= _BV(FIFO_RD_N); #endif // __AVR_XMEGA__ return ret; }
static void FPath_Format (EncodeFlags f, const Packet* p, Packet* c, Layer* lyr) { FPathHdr* ch = (FPathHdr*)lyr->start; if ( REVERSE(f) ) { int i = lyr - c->layers; FPathHdr* ph = (FPathHdr*)p->layers[i].start; memcpy(ch->fpath_dst, ph->fpath_src, sizeof(ch->fpath_dst)); memcpy(ch->fpath_src, ph->fpath_dst, sizeof(ch->fpath_src)); } }
//Writes a byte of data to the LCD. void writeLcd(u08 data) { //Reverse the bit order of the data, due to LCD connections to data bus being backwards. REVERSE(data); //set the LCD's E (Enable) line high, so it can fall later sbi(PORTD, 6); //write the data to the bus PORTC = data; //delay to allow the data to fully propagate to the LCD delayUs(1); //set the LCD's E (Enable) line low to latch in the data cbi(PORTD, 6); }
static void Eth_Format (EncodeFlags f, const Packet* p, Packet* c, Layer* lyr) { EtherHdr* ch = (EtherHdr*)lyr->start; c->eh = ch; if ( REVERSE(f) ) { int i = lyr - c->layers; EtherHdr* ph = (EtherHdr*)p->layers[i].start; memcpy(ch->ether_dst, ph->ether_src, sizeof(ch->ether_dst)); memcpy(ch->ether_src, ph->ether_dst, sizeof(ch->ether_src)); } }
/* * Reed - Solomon Encoder. The Encoder uses a shift register algorithm, * as detailed in _Applied Modern Algebra_ by Dornhoff and Hohn (p.446). * Note that the message is reversed in the code array; this was done to * allow for (emergency) recovery of the message directly from the * data stream. */ extern void ecc_encode(uint8_t m[ECC_PAYLOAD], uint8_t c[ECC_CAPACITY]) { uint8_t r[ECC_OFFSET] = { 0x0 }; for (int i = 0; i < ECC_PAYLOAD; i++) { c[(ECC_CAPACITY - 1) - i] = m[i]; uint8_t rtmp = GF_ADD(m[i], r[5]); for (int j = ECC_OFFSET - 1; j > 0; j--) r[j] = GF_ADD(GF_MUL(rtmp, g[j]), r[j - 1]); r[0] = GF_MUL(rtmp, g[0]); } for (int i = 0; i < ECC_OFFSET; i++) c[i] = r[i]; REVERSE(c, ECC_CAPACITY); }
//Writes a byte of data to the LCD. void writeLcd(u08 data) { //Reverse the bit order of the data, due to LCD connections to data bus being backwards. //The variable has local scope, so this can be done before the interrupts are disabled. REVERSE(data); //Disable interrupts to prevent the servo ISR (which shares the same data bus) //from interrupting in the middle of the sequence and messing with the bus. cli(); //set the LCD's E (Enable) line high, so it can fall later sbi(PORTD, 6); //write the data to the bus PORTC = data; //delay to allow the data to fully propagate to the LCD delayUs(1); //set the LCD's E (Enable) line low to latch in the data cbi(PORTD, 6); //re-enable interrupts sei(); }
//! Writes a byte of data to the LCD. static void writeLcd(u08 data) { //Reverse the bit order of the data, due to LCD connections to the data bus being backwards. //This line doesn't affect the bus, so this can be done before the interrupts are disabled. REVERSE(data); //Disable interrupts in this block to prevent the servo ISR (which shares the same data bus) //from interrupting in the middle of the sequence and messing with the bus. //ATOMIC_RESTORESTATE is used so that this function can be called from //user code/ISRs without unexpected side effects. ATOMIC_BLOCK(ATOMIC_RESTORESTATE) { //set the LCD's E (Enable) line high, so it can fall later sbi(PORTD, PD6); //write the data to the bus PORTC = data; //brief delay to allow the data to fully propagate to the LCD delayUs(1); //set the LCD's E (Enable) line low to latch in the data cbi(PORTD, PD6); } }
static void IP4_Format (EncodeFlags f, const Packet* p, Packet* c, Layer* lyr) { // TBD handle nested ip layers IPHdr* ch = (IPHdr*)lyr->start; c->iph = ch; if ( REVERSE(f) ) { int i = lyr - c->layers; IPHdr* ph = (IPHdr*)p->layers[i].start; ch->ip_src.s_addr = ph->ip_dst.s_addr; ch->ip_dst.s_addr = ph->ip_src.s_addr; } if ( f & ENC_FLAG_DEF ) { int i = lyr - c->layers; if ( i + 1 == p->next_layer ) { lyr->length = sizeof(*ch); ch->ip_len = htons(lyr->length); SET_IP_HLEN(ch, lyr->length >> 2); }
/* * Full implementation of the three error correcting Peterson decoder. For * t<6, it is faster than Massey - Berlekamp. It is also somewhat more * intuitive. */ extern void ecc_decode(uint8_t code[ECC_CAPACITY], uint8_t mesg[ECC_CAPACITY], int *errcode) { REVERSE(code, ECC_CAPACITY); uint8_t syn[ECC_OFFSET + 1], deter, z[4], e0, e1, e2, n0, n1, n2, w0, w1, w2, x0, x[3]; int sols; *errcode = 0; /* * First, get the message out of the code, so that even if we can't correct * it, we return an estimate. */ for (int i = 0; i < ECC_PAYLOAD; i++) mesg[i] = code[(ECC_CAPACITY - 1) - i]; syndrome(code, syn); if (syn[0] == 0) return; /* * We now know we have at least one error. If there are no errors detected, * we assume that something funny is going on, and so return with errcode 4, * else pass the number of errors back via errcode. */ errnum(syn, &deter, errcode); if (*errcode == 4) return; /* Having obtained the syndrome, the number of errors, and the determinant, * we now proceed to correct the block. If we do not find exactly the * number of solutions equal to the number of errors, we have exceeded our * error capacity, and return with the block uncorrected, and errcode 4. */ switch (*errcode) { case 1: x0 = GF_MUL(syn[2], GF_INV(syn[1])); w0 = GF_MUL(GF_EXP(syn[1], 2), GF_INV(syn[2])); if (v2e[x0] > 5) mesg[(ECC_CAPACITY - 1) - v2e[x0]] = GF_ADD(mesg[(ECC_CAPACITY - 1) - v2e[x0]], w0); return; case 2: z[0] = GF_MUL(GF_ADD(GF_MUL(syn[1], syn[3]), GF_EXP(syn[2], 2)), GF_INV(deter)); z[1] = GF_MUL(GF_ADD(GF_MUL(syn[2], syn[3]), GF_MUL(syn[1], syn[4])), GF_INV(deter)); z[2] = 1; z[3] = 0; polysolve(z, x, &sols); if (sols != 2) { *errcode = 4; return; } w0 = GF_MUL(z[0], syn[1]); w1 = GF_ADD(GF_MUL(z[0], syn[2]), GF_MUL(z[1], syn[1])); n0 = (ECC_CAPACITY - 1) - v2e[GF_INV(x[0])]; n1 = (ECC_CAPACITY - 1) - v2e[GF_INV(x[1])]; e0 = GF_MUL(GF_ADD(w0, GF_MUL(w1, x[0])), GF_INV(z[1])); e1 = GF_MUL(GF_ADD(w0, GF_MUL(w1, x[1])), GF_INV(z[1])); if (n0 < ECC_PAYLOAD) mesg[n0] = GF_ADD(mesg[n0], e0); if (n1 < ECC_PAYLOAD) mesg[n1] = GF_ADD(mesg[n1], e1); return; case 3: z[3] = 1; z[2] = GF_MUL(syn[1], GF_MUL(syn[4], syn[6])); z[2] = GF_ADD(z[2], GF_MUL(syn[1], GF_MUL(syn[5], syn[5]))); z[2] = GF_ADD(z[2], GF_MUL(syn[5], GF_MUL(syn[3], syn[3]))); z[2] = GF_ADD(z[2], GF_MUL(syn[3], GF_MUL(syn[4], syn[4]))); z[2] = GF_ADD(z[2], GF_MUL(syn[2], GF_MUL(syn[5], syn[4]))); z[2] = GF_ADD(z[2], GF_MUL(syn[2], GF_MUL(syn[3], syn[6]))); z[2] = GF_MUL(z[2], GF_INV(deter)); z[1] = GF_MUL(syn[1], GF_MUL(syn[3], syn[6])); z[1] = GF_ADD(z[1], GF_MUL(syn[1], GF_MUL(syn[5], syn[4]))); z[1] = GF_ADD(z[1], GF_MUL(syn[4], GF_MUL(syn[3], syn[3]))); z[1] = GF_ADD(z[1], GF_MUL(syn[2], GF_MUL(syn[4], syn[4]))); z[1] = GF_ADD(z[1], GF_MUL(syn[2], GF_MUL(syn[3], syn[5]))); z[1] = GF_ADD(z[1], GF_MUL(syn[2], GF_MUL(syn[2], syn[6]))); z[1] = GF_MUL(z[1], GF_INV(deter)); z[0] = GF_MUL(syn[2], GF_MUL(syn[3], syn[4])); z[0] = GF_ADD(z[0], GF_MUL(syn[3], GF_MUL(syn[2], syn[4]))); z[0] = GF_ADD(z[0], GF_MUL(syn[3], GF_MUL(syn[5], syn[1]))); z[0] = GF_ADD(z[0], GF_MUL(syn[4], GF_MUL(syn[4], syn[1]))); z[0] = GF_ADD(z[0], GF_MUL(syn[3], GF_MUL(syn[3], syn[3]))); z[0] = GF_ADD(z[0], GF_MUL(syn[2], GF_MUL(syn[2], syn[5]))); z[0] = GF_MUL(z[0], GF_INV(deter)); polysolve (z, x, &sols); if (sols != 3) { *errcode = 4; return; } w0 = GF_MUL(z[0], syn[1]); w1 = GF_ADD(GF_MUL(z[0], syn[2]), GF_MUL(z[1], syn[1])); w2 = GF_ADD(GF_MUL(z[0], syn[3]), GF_ADD(GF_MUL(z[1], syn[2]), GF_MUL(z[2], syn[1]))); n0 = (ECC_CAPACITY - 1) - v2e[GF_INV(x[0])]; n1 = (ECC_CAPACITY - 1) - v2e[GF_INV(x[1])]; n2 = (ECC_CAPACITY - 1) - v2e[GF_INV(x[2])]; e0 = GF_ADD(w0, GF_ADD(GF_MUL(w1, x[0]), GF_MUL(w2, GF_EXP(x[0], 2)))); e0 = GF_MUL(e0, GF_INV(GF_ADD(z[1], GF_EXP(x[0], 2)))); e1 = GF_ADD(w0, GF_ADD(GF_MUL(w1, x[1]), GF_MUL(w2, GF_EXP(x[1], 2)))); e1 = GF_MUL(e1, GF_INV(GF_ADD(z[1], GF_EXP(x[1], 2)))); e2 = GF_ADD(w0, GF_ADD(GF_MUL(w1, x[2]), GF_MUL(w2, GF_EXP(x[2], 2)))); e2 = GF_MUL(e2, GF_INV(GF_ADD(z[1], GF_EXP(x[2], 2)))); if (n0 < ECC_PAYLOAD) mesg[n0] = GF_ADD(mesg[n0], e0); if (n1 < ECC_PAYLOAD) mesg[n1] = GF_ADD(mesg[n1], e1); if (n2 < ECC_PAYLOAD) mesg[n2] = GF_ADD(mesg[n2], e2); return; } }
static inline so_key_t so_dummykey(const key_t key) { return REVERSE(key); }
static inline so_key_t so_regularkey(const key_t key) { return REVERSE(key | MSB); }
int main(int argc, char **argv){ int iter, r; /* dummies */ int lsize; /* logarithmic linear size of grid */ int lsize2; /* logarithmic size of grid */ int size; /* linear size of grid */ s64Int size2; /* matrix order (=total # points in grid) */ int radius, /* stencil parameters */ stencil_size; s64Int row, col, first, last; /* dummies */ s64Int i, j; /* dummies */ int iterations; /* number of times the multiplication is done */ s64Int elm; /* sequence number of matrix nonzero */ s64Int nent; /* number of nonzero entries */ double sparsity; /* fraction of non-zeroes in matrix */ double sparse_time,/* timing parameters */ avgtime = 0.0, maxtime = 0.0, mintime = 366.0*24.0*3600.0; /* set the minimum time to a large value; one leap year should be enough */ double * RESTRICT matrix; /* sparse matrix entries */ double * RESTRICT vector; /* vector multiplying the sparse matrix */ double * RESTRICT result; /* computed matrix-vector product */ double temp; /* temporary scalar storing reduction data */ double vector_sum; /* checksum of result */ double reference_sum; /* checksum of "rhs" */ double epsilon = 1.e-8; /* error tolerance */ s64Int * RESTRICT colIndex; /* column indices of sparse matrix entries */ int nthread_input, /* thread parameters */ nthread; int num_error=0; /* flag that signals that requested and obtained numbers of threads are the same */ size_t vector_space, /* variables used to hold malloc sizes */ matrix_space, index_space; if (argc != 5) { printf("Usage: %s <# threads> <# iterations> <2log grid size> <stencil radius>\n",*argv); exit(EXIT_FAILURE); } /* Take number of threads to request from command line */ nthread_input = atoi(*++argv); if ((nthread_input < 1) || (nthread_input > MAX_THREADS)) { printf("ERROR: Invalid number of threads: %d\n", nthread_input); exit(EXIT_FAILURE); } omp_set_num_threads(nthread_input); iterations = atoi(*++argv); if (iterations < 1){ printf("ERROR: Iterations must be positive : %d \n", iterations); exit(EXIT_FAILURE); } lsize = atoi(*++argv); lsize2 = 2*lsize; size = 1<<lsize; if (lsize <0) { printf("ERROR: Log of grid size must be greater than or equal to zero: %d\n", (int) lsize); exit(EXIT_FAILURE); } /* compute number of points in the grid */ size2 = size*size; radius = atoi(*++argv); if (radius <0) { printf("ERROR: Stencil radius must be non-negative: %d\n", (int) size); exit(EXIT_FAILURE); } /* emit error if (periodic) stencil overlaps with itself */ if (size <2*radius+1) { printf("ERROR: Grid extent %d smaller than stencil diameter 2*%d+1= %d\n", size, radius, radius*2+1); exit(EXIT_FAILURE); } /* compute total size of star stencil in 2D */ stencil_size = 4*radius+1; /* sparsity follows from number of non-zeroes per row */ sparsity = (double)(4*radius+1)/(double)size2; /* compute total number of non-zeroes */ nent = size2*stencil_size; matrix_space = nent*sizeof(double); if (matrix_space/sizeof(double) != nent) { printf("ERROR: Cannot represent space for matrix: %ld\n", matrix_space); exit(EXIT_FAILURE); } matrix = (double *) malloc(matrix_space); if (!matrix) { printf("ERROR: Could not allocate space for sparse matrix: "FSTR64U"\n", nent); exit(EXIT_FAILURE); } vector_space = 2*size2*sizeof(double); if (vector_space/sizeof(double) != 2*size2) { printf("ERROR: Cannot represent space for vectors: %ld\n", vector_space); exit(EXIT_FAILURE); } vector = (double *) malloc(vector_space); if (!vector) { printf("ERROR: Could not allocate space for vectors: %d\n", (int)(2*size2)); exit(EXIT_FAILURE); } result = vector + size2; index_space = nent*sizeof(s64Int); if (index_space/sizeof(s64Int) != nent) { printf("ERROR: Cannot represent space for column indices: %ld\n", index_space); exit(EXIT_FAILURE); } colIndex = (s64Int *) malloc(index_space); if (!colIndex) { printf("ERROR: Could not allocate space for column indices: "FSTR64U"\n", nent*sizeof(s64Int)); exit(EXIT_FAILURE); } #pragma omp parallel private (row, col, elm, first, last, iter) { #pragma omp master { nthread = omp_get_num_threads(); printf("OpenMP Sparse matrix-vector multiplication\n"); if (nthread != nthread_input) { num_error = 1; printf("ERROR: number of requested threads %d does not equal ", nthread_input); printf("number of spawned threads %d\n", nthread); } else { printf("Number of threads = %16d\n",nthread_input); printf("Matrix order = "FSTR64U"\n", size2); printf("Stencil diameter = %16d\n", 2*radius+1); printf("Sparsity = %16.10lf\n", sparsity); #ifdef SCRAMBLE printf("Using scrambled indexing\n"); #else printf("Using canonical indexing\n"); #endif printf("Number of iterations = %16d\n", iterations); } } bail_out(num_error); /* initialize the input and result vectors */ #pragma omp for for (row=0; row<size2; row++) result[row] = vector[row] = 0.0; /* fill matrix with nonzeroes corresponding to difference stencil. We use the scrambling for reordering the points in the grid. */ #pragma omp for private (i,j,r) for (row=0; row<size2; row++) { j = row/size; i=row%size; elm = row*stencil_size; colIndex[elm] = REVERSE(LIN(i,j),lsize2); for (r=1; r<=radius; r++, elm+=4) { colIndex[elm+1] = REVERSE(LIN((i+r)%size,j),lsize2); colIndex[elm+2] = REVERSE(LIN((i-r+size)%size,j),lsize2); colIndex[elm+3] = REVERSE(LIN(i,(j+r)%size),lsize2); colIndex[elm+4] = REVERSE(LIN(i,(j-r+size)%size),lsize2); } // sort colIndex to make sure the compressed row accesses // vector elements in increasing order qsort(&(colIndex[row*stencil_size]), stencil_size, sizeof(s64Int), compare); for (elm=row*stencil_size; elm<(row+1)*stencil_size; elm++) matrix[elm] = 1.0/(double)(colIndex[elm]+1); } for (iter=0; iter<iterations; iter++) { #pragma omp barrier #pragma omp master { sparse_time = wtime(); } /* fill vector */ #pragma omp for for (row=0; row<size2; row++) vector[row] += (double) (row+1); /* do the actual matrix-vector multiplication */ #pragma omp for for (row=0; row<size2; row++) { temp = 0.0; first = stencil_size*row; last = first+stencil_size-1; #pragma simd reduction(+:temp) for (col=first; col<=last; col++) { temp += matrix[col]*vector[colIndex[col]]; } result[row] += temp; } #pragma omp master { sparse_time = wtime() - sparse_time; if (iter>0 || iterations==1) { /* skip the first iteration */ avgtime = avgtime + sparse_time; mintime = MIN(mintime, sparse_time); maxtime = MAX(maxtime, sparse_time); } } } } /* end of parallel region */ /* verification test */ reference_sum = 0.5 * (double) nent * (double) iterations * (double) (iterations +1); vector_sum = 0.0; for (row=0; row<size2; row++) vector_sum += result[row]; if (ABS(vector_sum-reference_sum) > epsilon) { printf("ERROR: Vector sum = %lf, Reference vector sum = %lf\n", vector_sum, reference_sum); exit(EXIT_FAILURE); } else { printf("Solution validates\n"); #ifdef VERBOSE printf("Reference sum = %lf, vector sum = %lf\n", reference_sum, vector_sum); #endif } avgtime = avgtime/(double)(MAX(iterations-1,1)); printf("Rate (MFlops/s): %lf, Avg time (s): %lf, Min time (s): %lf", 1.0E-06 * (2.0*nent)/mintime, avgtime, mintime); printf(", Max time (s): %lf\n", maxtime); exit(EXIT_SUCCESS); }
BOOL APIENTRY GuiDlgAbout::DlgProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) { #ifdef DEBUG //printf("GuiDlgAbout::DlgProc(): Message 0x%08X received.\n",message); #endif BYTE *logo = (BYTE *)LockResource(LoadResource(myInstance,FindResource(myInstance,MAKEINTRESOURCE(IDB_LOGO),RT_BITMAP))); char *license = (char *)LockResource(LoadResource(myInstance,FindResource(myInstance,MAKEINTRESOURCE(IDR_TEXT_LICENSE),"TEXT"))); char *history = (char *)LockResource(LoadResource(myInstance,FindResource(myInstance,MAKEINTRESOURCE(IDR_TEXT_HISTORY),"TEXT"))); TCITEM tci; switch (message) { case WM_INITDIALOG: tab_hwnd = NULL; // init tab control tci.mask = TCIF_TEXT; tci.pszText = " General "; SendDlgItemMessage(hwndDlg,IDC_ATABS,TCM_INSERTITEM,0,(LPARAM)&tci); tci.pszText = " License "; SendDlgItemMessage(hwndDlg,IDC_ATABS,TCM_INSERTITEM,1,(LPARAM)&tci); tci.pszText = " What's New "; SendDlgItemMessage(hwndDlg,IDC_ATABS,TCM_INSERTITEM,2,(LPARAM)&tci); // set default tab index SendDlgItemMessage(hwndDlg,IDC_ATABS,TCM_SETCURSEL,0,0); case WM_SYSCOLORCHANGE: case WM_UPDATE: // delete old tab window if (tab_hwnd) { DestroyWindow(tab_hwnd); tab_hwnd = NULL; } // display new tab window tab_index = (int)SendDlgItemMessage(hwndDlg,IDC_ATABS,TCM_GETCURSEL,0,0); switch (tab_index) { case 0: // fix logo *(DWORD *)&logo[0x28 + (logo[0x428] << 2)] = REVERSE(GetSysColor(COLOR_BTNFACE)); tab_hwnd = CreateDialogParam(myInstance,MAKEINTRESOURCE(IDD_ABT_ADPLUG),GetDlgItem(hwndDlg,IDC_ATABWND),(DLGPROC)TabDlgProc_Wrapper,(LPARAM)this); // plugin SetDlgItemText(tab_hwnd,IDC_PLUGIN_VER,PLUGIN_VER " (" __DATE__ /*" " __TIME__ */")"); break; case 1: tab_hwnd = CreateDialogParam(myInstance,MAKEINTRESOURCE(IDD_ABT_LICENSE),GetDlgItem(hwndDlg,IDC_ATABWND),(DLGPROC)TabDlgProc_Wrapper,(LPARAM)this); // license SetDlgItemText(tab_hwnd,IDC_LICENSE,license); break; case 2: tab_hwnd = CreateDialogParam(myInstance,MAKEINTRESOURCE(IDD_ABT_HISTORY),GetDlgItem(hwndDlg,IDC_ATABWND),(DLGPROC)TabDlgProc_Wrapper,(LPARAM)this); // history SetDlgItemText(tab_hwnd,IDC_HISTORY,history); break; } return FALSE; case WM_NOTIFY: switch (((NMHDR *)lParam)->code) { case TCN_SELCHANGE: PostMessage(hwndDlg,WM_UPDATE,0,0); return FALSE; } case WM_COMMAND: switch (LOWORD(wParam)) { case IDCANCEL: EndDialog(hwndDlg,wParam); return 0; } } return FALSE; }
int move_down(board_t src, board_t dst) { movefunc(src[REVERSE(j)][i], dst[REVERSE(j2 - 1)][i], dst[REVERSE(j2)][i]); }
int move_right(board_t src, board_t dst) { movefunc(src[i][REVERSE(j)], dst[i][REVERSE(j2 - 1)], dst[i][REVERSE(j2)]); }
int main(int argc, char **argv){ int Num_procs; /* Number of ranks */ int my_ID; /* rank */ int root=0; int iter, r; /* dummies */ int lsize; /* logarithmic linear size of grid */ int lsize2; /* logarithmic size of grid */ int size; /* linear size of grid */ s64Int size2; /* matrix order (=total # points in grid) */ int radius, /* stencil parameters */ stencil_size; s64Int row, col, first, last; /* dummies */ u64Int i, j; /* dummies */ int iterations; /* number of times the multiplication is done */ s64Int elm; /* sequence number of matrix nonzero */ s64Int elm_start; /* auxiliary variable */ int jstart, /* active grid rows parameters */ jend, nrows, row_offset; s64Int nent; /* number of nonzero entries */ double sparsity; /* fraction of non-zeroes in matrix */ double local_sparse_time,/* timing parameters */ sparse_time, avgtime; double * RESTRICT matrix; /* sparse matrix entries */ double * RESTRICT vector; /* vector multiplying the sparse matrix */ double * RESTRICT result; /* computed matrix-vector product */ double temp; /* temporary scalar storing reduction data */ #ifdef TESTDENSE double * RESTRICT rhs; /* known matrix-vector product */ double * RESTRICT dense; /* dense matrix equivalent of "matrix" */ #endif double vector_sum; /* checksum of result */ double reference_sum, /* local checksum of "rhs" */ check_sum; /* aggregate checksum of "rhs" */ double epsilon = 1.e-8; /* error tolerance */ s64Int * RESTRICT colIndex; /* column indices of sparse matrix entries */ int error=0; /* error flag */ size_t vector_space, /* variables used to hold malloc sizes */ matrix_space, index_space; int procsize; /* number of ranks per OS process */ /********************************************************************* ** Initialize the MPI environment *********************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); /********************************************************************* ** process, test and broadcast input parameters *********************************************************************/ if (my_ID == root){ if (argc != 4){ printf("Usage: %s <# iterations> <2log grid size> <stencil radius>\n",*argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if (iterations < 1){ printf("ERROR: Iterations must be positive : %d \n", iterations); error = 1; goto ENDOFTESTS; } lsize = atoi(*++argv); if (lsize <0) { printf("ERROR: Log of grid size must be non-negative: %d\n", (int) lsize); error = 1; goto ENDOFTESTS; } lsize2 = 2*lsize; size = 1<<lsize; if (size < Num_procs) { printf("ERROR: Grid size %d must be at least equal to # procs %d\n", (int) size, Num_procs); error = 1; goto ENDOFTESTS; } if ((int)(size%Num_procs)) { printf("ERROR: Grid size %d must be multiple of # procs %d\n", (int) size, Num_procs); error = 1; goto ENDOFTESTS; } /* compute number of points in the grid */ size2 = size*size; radius = atoi(*++argv); if (radius <0) { printf("ERROR: Stencil radius must be non-negative: %d\n", radius); error = 1; goto ENDOFTESTS; } /* emit error if (periodic) stencil overlaps with itself */ if (size <2*radius+1) { printf("ERROR: Grid extent %d smaller than stencil diameter 2*%d+1= %d\n", size, radius, radius*2+1); error = 1; goto ENDOFTESTS; } /* sparsity follows from number of non-zeroes per row */ sparsity = (double)(4*radius+1)/(double)size2; MPIX_Get_collocated_size(&procsize); printf("FG_MPI Sparse matrix-vector multiplication\n"); printf("Number of ranks = "FSTR64U"\n", Num_procs); printf("Number of ranks/process = "FSTR64U"\n", procsize); printf("Matrix order = "FSTR64U"\n", size2); printf("Stencil diameter = %16d\n", 2*radius+1); printf("Sparsity = %16.10lf\n", sparsity); printf("Number of iterations = %16d\n", iterations); #ifdef SCRAMBLE printf("Using scrambled indexing\n"); #else printf("Using canonical indexing\n"); #endif ENDOFTESTS:; } bail_out(error); /* Broadcast benchmark data to all ranks */ MPI_Bcast(&lsize, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&lsize2, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&size, 1, MPI_LONG_LONG_INT, root, MPI_COMM_WORLD); MPI_Bcast(&size2, 1, MPI_LONG_LONG_INT, root, MPI_COMM_WORLD); MPI_Bcast(&radius, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&iterations, 1, MPI_INT, root, MPI_COMM_WORLD); /* compute total size of star stencil in 2D */ stencil_size = 4*radius+1; /* compute number of rows owned by each rank */ nrows = size2/Num_procs; /* compute total number of non-zeroes for this rank */ nent = nrows*stencil_size; matrix_space = nent*sizeof(double); if (matrix_space/sizeof(double) != nent) { printf("ERROR: rank %d cannot represent space for matrix: %ul\n", my_ID, matrix_space); error = 1; } bail_out(error); matrix = (double *) malloc(matrix_space); if (!matrix) { printf("ERROR: rank %d could not allocate space for sparse matrix: "FSTR64U"\n", my_ID, matrix_space); error = 1; } bail_out(error); vector_space = (size2 + nrows)*sizeof(double); if (vector_space/sizeof(double) != (size2+nrows)) { printf("ERROR: rank %d Cannot represent space for vectors: %ul\n", my_ID, vector_space); error = 1; } bail_out(error); vector = (double *) malloc(vector_space); if (!vector) { printf("ERROR: rank %d could not allocate space for vectors: %d\n", my_ID, (int)(2*nrows)); error = 1; } bail_out(error); result = vector + size2; index_space = nent*sizeof(s64Int); if (index_space/sizeof(s64Int) != nent) { printf("ERROR: rank %d cannot represent space for column indices: %ul\n", my_ID, index_space); error = 1; } bail_out(error); colIndex = (s64Int *) malloc(index_space); if (!colIndex) { printf("ERROR: rank %d Could not allocate space for column indices: "FSTR64U"\n", my_ID, nent*sizeof(s64Int)); error = 1; } bail_out(error); /* fill matrix with nonzeroes corresponding to difference stencil. We use the scrambling for reordering the points in the grid. */ jstart = (size/Num_procs)*my_ID; jend = (size/Num_procs)*(my_ID+1); for (j=jstart; j<jend; j++) for (i=0; i<size; i++) { elm_start = (i+(j-jstart)*size)*stencil_size; elm = elm_start; colIndex[elm] = REVERSE(LIN(i,j),lsize2); for (r=1; r<=radius; r++, elm+=4) { colIndex[elm+1] = REVERSE(LIN((i+r)%size,j),lsize2); colIndex[elm+2] = REVERSE(LIN((i-r+size)%size,j),lsize2); colIndex[elm+3] = REVERSE(LIN(i,(j+r)%size),lsize2); colIndex[elm+4] = REVERSE(LIN(i,(j-r+size)%size),lsize2); } /* sort colIndex to make sure the compressed row accesses vector elements in increasing order */ qsort(&(colIndex[elm_start]), stencil_size, sizeof(s64Int), compare); for (elm=elm_start; elm<elm_start+stencil_size; elm++) matrix[elm] = 1.0/(double)(colIndex[elm]+1); } #if defined(TESTDENSE) && defined(VERBOSE) /* fill dense matrix to test */ matrix_space = size2*size2/Num_procs*sizeof(double); if (matrix_space/sizeof(double) != size2*size2/Num_procs) { printf("ERROR: Cannot represent space for matrix: %ul\n", matrix_space); exit(EXIT_FAILURE); } dense = (double *) malloc(matrix_space); if (!dense) { printf("ERROR: Could not allocate space for dense matrix of order: %d\n", (int) size2); exit(EXIT_FAILURE); } rhs = (double *) malloc(vector_space); if (!rhs) { printf("ERROR: Could not allocate space for rhs: %d\n", (int) size2); exit(EXIT_FAILURE); } for (row=0; row<nrows; row++) { for (col=0; col<size2; col++) DENSE(col,row) = 0.0; first = row*stencil_size; last = first+stencil_size-1; rhs[row] = (double) (last-first+1) * (double) iterations; for (elm=first; elm<=last; elm++) DENSE(colIndex[elm],row) = matrix[elm]; } #endif /* initialize the input and result vectors */ for (row=0; row<nrows; row++) result[row] = vector[row] = 0.0; for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_sparse_time = wtime(); } /* fill vector */ row_offset = nrows*my_ID; for (row=row_offset; row<nrows+row_offset; row++) vector[row] += (double) (row+1); /* replicate vector on all rankors */ MPI_Allgather(MPI_IN_PLACE, nrows, MPI_DOUBLE, vector, nrows, MPI_DOUBLE, MPI_COMM_WORLD); /* do the actual matrix multiplication */ for (row=0; row<nrows; row++) { first = stencil_size*row; last = first+stencil_size-1; #pragma simd reduction(+:temp) for (temp=0.0,col=first; col<=last; col++) { temp += matrix[col]*vector[colIndex[col]]; } result[row] += temp; } } /* end of iterations */ local_sparse_time = wtime() - local_sparse_time; MPI_Reduce(&local_sparse_time, &sparse_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); #if defined(TESTDENSE) && defined(VERBOSE) /* print matrix, vector, rhs, plus computed solution */ for (row=0; row<nrows; row++) { printf("( "); for (col=0; col<size2; col++) printf("%1.3lf ", DENSE(col,row)); printf(" ) ( %1.3lf ) = ( %1.3lf ) | ( %1.3lf )\n", vector[row], result[row], rhs[row]); } #endif /* verification test */ reference_sum = 0.5 * (double) size2 * (double) stencil_size * (double) (iterations+1) * (double) (iterations + 2); vector_sum = 0.0; for (row=0; row<nrows; row++) vector_sum += result[row]; MPI_Reduce(&vector_sum, &check_sum, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD); if (my_ID == root) { if (ABS(check_sum-reference_sum) > epsilon) { printf("ERROR: Vector sum = %lf, Reference vector sum = %lf, my_ID = %d\n", check_sum, reference_sum, my_ID); error = 1; } else { printf("Solution validates\n"); #ifdef VERBOSE printf("Reference sum = %lf, check sum = %lf\n", reference_sum, check_sum); #endif } avgtime = sparse_time/iterations; printf("Rate (MFlops/s): %lf Avg time (s): %lf\n", 1.0E-06 * (2.0*nent*Num_procs)/avgtime, avgtime); } bail_out(error); MPI_Finalize(); exit(EXIT_SUCCESS); }