void barrier() { if (rank==0) { printf("Rank 0 in barrier\n"); // Rank zero waits for the others while (barrier_count != xcount*ycount-1) {} barrier_count = 0; // Reset the count for (int i=1;i<xcount*ycount;i++) { // Send message to all ranks uint32_t buffer = 0; // Assemble header set_bits(&buffer,i,OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); set_bits(&buffer,0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); set_bits(&buffer,0,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); set_bits(&buffer,MSG_TYPE_BARRIER,1,0); optimsoc_mp_simple_send(1,&buffer); } } else { // Send message to rank 0 uint32_t buffer = 0; // Assemble header set_bits(&buffer,0,OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); set_bits(&buffer,0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); set_bits(&buffer,rank,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); set_bits(&buffer,MSG_TYPE_BARRIER,1,0); // is a barrier optimsoc_mp_simple_send(1,&buffer); // Wait until we received the message of rank 0 while (barrier_continue==0) {} // And reset signal barrier_continue = 0; } }
uint32_t control_msg_alloc(struct endpoint_handle *to_ep, uint32_t size) { trace_msg_alloc_begin(to_ep, size); // Wait until receive_endpoint is ready to receive (allocate) do { // Try to retrieve from remote // We do this as long as we do not get a valid handle back (-1) ctrl_request.buffer[0] = (to_ep->domain << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_MSG_ALLOC_REQ << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = (unsigned int) to_ep->ep; ctrl_request.buffer[2] = (unsigned int) size; ctrl_request.done = 0; trace_msg_alloc_req_send(to_ep, size); optimsoc_mp_simple_send(3,ctrl_request.buffer); while (ctrl_request.done == 0) {} if (ctrl_request.buffer[1]==CTRL_REQUEST_NACK) { #ifdef RUNTIME thread_yield(); #endif for (int t=0;t<timeout_insns;t++) { asm __volatile__("l.nop 0x0"); } timeout_insns = timeout_insns * 10; // somewhat arbitrary.. } } while (ctrl_request.buffer[1]==CTRL_REQUEST_NACK); trace_msg_alloc_end(to_ep, ctrl_request.buffer[2]); return ctrl_request.buffer[2]; }
void control_channel_send(struct endpoint_handle *ep, uint8_t *data, uint32_t size) { unsigned int words = (size+3)>>2; unsigned int wordsperpacket = optimsoc_noc_maxpacketsize()-4; for (int i=0;i<words;i=i+wordsperpacket) { ctrl_request.buffer[0] = (ep->domain << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_CHAN_DATA << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = (unsigned int) ep->ep; ctrl_request.buffer[2] = i; if (((i+wordsperpacket) >= words)) { ctrl_request.buffer[3] = size - i * wordsperpacket; } else { ctrl_request.buffer[3] = 0; } int sz = words - i; if (sz>wordsperpacket) sz = wordsperpacket; for (int d=0;d<sz;d++) { ctrl_request.buffer[4+d] = ((unsigned int *)data)[i+d]; } optimsoc_mp_simple_send(4+sz,ctrl_request.buffer); } }
void control_msg_data(struct endpoint_handle *ep, uint32_t address, void* buffer, uint32_t size) { // TODO: what if size%4!=0? assert(size % 4 == 0); trace_msg_data_begin(ep, address, size); unsigned int words = (size+3)>>2; unsigned int wordsperpacket = optimsoc_noc_maxpacketsize()-4; for (int i=0;i<words;i=i+wordsperpacket) { ctrl_request.buffer[0] = (ep->domain << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_MSG_DATA << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = (unsigned int) ep->ep; ctrl_request.buffer[2] = (uint32_t) address; ctrl_request.buffer[3] = i; int sz = words - i; if (sz>wordsperpacket) sz = wordsperpacket; for (int d=0;d<sz;d++) { ctrl_request.buffer[2+d] = ((unsigned int *)buffer)[i+d]; } trace_msg_data_send(ep, ctrl_request.buffer[2], sz); optimsoc_mp_simple_send(2+sz,ctrl_request.buffer); } ctrl_request.buffer[0] = (ep->domain << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_MSG_COMPLETE << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = (unsigned int) ep->ep; ctrl_request.buffer[2] = (uint32_t) address; ctrl_request.buffer[3] = size; trace_msg_complete_send(ep, address, size); optimsoc_mp_simple_send(4, ctrl_request.buffer); trace_msg_data_end(ep); }
void control_channel_sendcredit(struct endpoint_handle *ep, int32_t credit) { ctrl_request.buffer[0] = (ep->ep->remotedomain << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_CHAN_CREDIT << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = (unsigned int) ep->ep->remote; ctrl_request.buffer[2] = credit; optimsoc_mp_simple_send(3, ctrl_request.buffer); }
int lcd_set(unsigned int row,unsigned int col,char c) { uint32_t buffer = 0; if (optimsoc_has_uart() && optimsoc_uart_lcd_enable()) { set_bits(&buffer, optimsoc_uarttile(), OPTIMSOC_DEST_MSB, OPTIMSOC_DEST_LSB); set_bits(&buffer, 0, OPTIMSOC_CLASS_MSB, OPTIMSOC_CLASS_LSB); set_bits(&buffer, optimsoc_get_tileid(), OPTIMSOC_SRC_MSB, OPTIMSOC_SRC_LSB); set_bits(&buffer, 1, 13, 13); set_bits(&buffer, row, 12, 12); set_bits(&buffer, col, 11, 8); set_bits(&buffer, (uint32_t) c, 7, 0); optimsoc_mp_simple_send(1,&buffer); } return 0; }
uint32_t control_channel_connect(struct endpoint_handle *from, struct endpoint_handle *to) { ctrl_request.buffer[0] = (to->domain << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_CHAN_CONNECT_REQ << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = (unsigned int) to->ep; ctrl_request.buffer[2] = (unsigned int) from->domain; ctrl_request.buffer[3] = (unsigned int) from->ep; ctrl_request.done = 0; optimsoc_mp_simple_send(4, ctrl_request.buffer); while (ctrl_request.done == 0) { } return ctrl_request.buffer[1]; }
int lcd_init() { uint32_t buffer = 0; set_bits(&buffer, optimsoc_uarttile(), OPTIMSOC_DEST_MSB, OPTIMSOC_DEST_LSB); set_bits(&buffer, 0, OPTIMSOC_CLASS_MSB, OPTIMSOC_CLASS_LSB); set_bits(&buffer, optimsoc_get_tileid(), OPTIMSOC_SRC_MSB, OPTIMSOC_SRC_LSB); set_bits(&buffer, 1, 13, 13); set_bits(&buffer, ' ', 7, 0); for (int r=0;r<=1;r++) { set_bits(&buffer, r, 12, 12); for (int c=0;c<16;c++) { set_bits(&buffer, c, 11, 8); optimsoc_mp_simple_send(1,&buffer); } } return 0; }
struct endpoint *control_get_endpoint(uint32_t domain, uint32_t node, uint32_t port) { struct endpoint *ep; while (!optimsoc_mp_simple_ctready(domain)); trace_ep_get_req_begin(domain, node, port); do { // Try to retrieve from remote // We do this as long as we do not get a valid handle back (-1) ctrl_request.buffer[0] = (domain << OPTIMSOC_DEST_LSB) | (NOC_CLASS_FIFO << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_GETEP_REQ << CTRL_REQUEST_LSB); ctrl_request.buffer[1] = node; ctrl_request.buffer[2] = port; ctrl_request.done = 0; trace_ep_get_req_send(domain, node, port); optimsoc_mp_simple_send(3,ctrl_request.buffer); control_wait_response(); ep = (struct endpoint*) ctrl_request.buffer[1]; trace_ep_get_resp_recv(domain, ep); if ((int)ep==-1) { #ifdef RUNTIME assert(0); // TODO: Reactivate //optimsoc_thread_yield(); #endif for (int t=0;t<timeout_insns;t++) { asm __volatile__("l.nop 0x0"); } timeout_insns = timeout_insns * 10; // somewhat arbitrary.. } } while ((int)ep==-1); trace_ep_get_req_end(ep); return ep; }
void uart_printf(const char *fmt, ...) { if (!optimsoc_has_uart()) { return; } char buffer[128]; va_list ap; va_start(ap, fmt); /* Initialize the va_list */ vsnprintf(buffer,128,fmt, ap); /* Call vprintf */ va_end(ap); /* Cleanup the va_list */ int size = strnlen(buffer,128); uint32_t msg = 0; set_bits(&msg, optimsoc_uarttile(), OPTIMSOC_DEST_MSB, OPTIMSOC_DEST_LSB); set_bits(&msg, 0, OPTIMSOC_CLASS_MSB, OPTIMSOC_CLASS_LSB); set_bits(&msg, optimsoc_get_tileid(), OPTIMSOC_SRC_MSB, OPTIMSOC_SRC_LSB); for (unsigned i=0;i<size;i++) { set_bits(&msg, buffer[i],7,0); optimsoc_mp_simple_send(1,&msg); } }
// The heat calculation void heat() { // Verify curmatrix is set correctly curmatrix = 1; // This points to the other matrix from the previous step int other = 0; for (int n=1;n<=ITERATIONS;n++) { #if 0 // Enable if you want an updated matrix output every iteration for (int x=0;x<xdim+2;x++) { printf("(%.2f) ",matrix[other][POS(x,0)]); } printf("\n"); for (int y=1;y<ydim+1;y++) { printf("(%.2f) ",matrix[other][POS(0,y)]); for (int x=1;x<xdim+1;x++) { printf(" %.2f ",matrix[other][POS(x,y)]); } printf("(%.2f) ",matrix[other][POS(xdim+1,y)]); printf("\n"); } for (int x=0;x<xdim+2;x++) { printf("(%.2f) ",matrix[other][POS(x,ydim+1)]); } printf("\n"); #endif optimsoc_trace_section(1); printf("Start iteration %d\n",n); // Calculate all new elements based on the previous values for (int x=1;x<xdim+1;x++) for (int y=1;y<ydim+1;y++) matrix[curmatrix][POS(x,y)] = 0.25 * (matrix[other][POS(x-1,y)] + matrix[other][POS(x+1,y)] + matrix[other][POS(x,y-1)] + matrix[other][POS(x,y+1)]); printf("Finished iteration %d\n", n); optimsoc_trace_section(2); // Now we send the results to the other ranks if (!topbound) { // If one is above us // Message buffer uint32_t buffer[3]; // Assemble the header // Find tile id set_bits(&buffer[0],optimsoc_ranktile(rank-xcount),OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); // Set class set_bits(&buffer[0],0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); // Sender is this rank set_bits(&buffer[0],rank,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); // Type is boundary data set_bits(&buffer[0],MSG_TYPE_BOUNDARY,1,0); // Boundary is other ranks bottom set_bits(&buffer[0],BOUNDARY_BOTTOM,3,2); // Only the payload varies, send them successively for (int x=1;x<xdim+1;x++) { // Set position in boundary buffer[1] = x; // Copy element to payload in buffer memcpy(&buffer[2],&matrix[curmatrix][POS(x,1)],4); // Call library to send the element optimsoc_mp_simple_send(3,buffer); } } if (!bottombound) { // Same as above to the rank below this one uint32_t buffer[3]; set_bits(&buffer[0],optimsoc_ranktile(rank+xcount),OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); set_bits(&buffer[0],0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); set_bits(&buffer[0],rank,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); set_bits(&buffer[0],MSG_TYPE_BOUNDARY,1,0); set_bits(&buffer[0],BOUNDARY_TOP,3,2); for (int x=1;x<xdim+1;x++) { buffer[1] = x; memcpy(&buffer[2],&matrix[curmatrix][POS(x,ydim)],4); optimsoc_mp_simple_send(3,buffer); } } if (!leftbound) { // Same as above to the rank left of this one uint32_t buffer[3]; set_bits(&buffer[0],optimsoc_ranktile(rank-1),OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); set_bits(&buffer[0],0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); set_bits(&buffer[0],rank,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); set_bits(&buffer[0],MSG_TYPE_BOUNDARY,1,0); set_bits(&buffer[0],BOUNDARY_RIGHT,3,2); for (int y=1;y<ydim+1;y++) { buffer[1] = y; memcpy(&buffer[2],&matrix[curmatrix][POS(1,y)],4); optimsoc_mp_simple_send(3,buffer); } } if (!rightbound) { // Same as above to the rank right of this one uint32_t buffer[3]; set_bits(&buffer[0],optimsoc_ranktile(rank+1),OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); set_bits(&buffer[0],0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); set_bits(&buffer[0],rank,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); set_bits(&buffer[0],MSG_TYPE_BOUNDARY,1,0); set_bits(&buffer[0],BOUNDARY_LEFT,3,2); for (int y=1;y<ydim+1;y++) { buffer[1] = y; memcpy(&buffer[2],&matrix[curmatrix][POS(xdim,y)],4); optimsoc_mp_simple_send(3,buffer); } } // Wait for all other ranks to reach this point optimsoc_trace_section(3); barrier(); // Change matrices // (1-0=1, 1-1=0) curmatrix = 1 - curmatrix; other = 1 - other; } // When all iterations are done, assemble the result if (rank==0) { // rank 0 sets it directly for (int x=1;x<xdim+1;x++) { for (int y=1;y<ydim+1;y++) { result[x-1+(y-1)*XSIZE] = matrix[other][POS(x,y)]; } } } else { // the other ranks send their results to rank 0 // The buffer contains the message described above uint32_t buffer[4]; // Assemble header // Destination is rank 0 set_bits(&buffer[0],0,OPTIMSOC_DEST_MSB,OPTIMSOC_DEST_LSB); // Class is 0 set_bits(&buffer[0],0,OPTIMSOC_CLASS_MSB,OPTIMSOC_CLASS_LSB); // Sender is this rank set_bits(&buffer[0],rank,OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); // This is a result element set_bits(&buffer[0],MSG_TYPE_RESULT,1,0); // Send each element now for (int x=1;x<xdim+1;x++) { for (int y=1;y<ydim+1;y++) { // Set x and y position of element on complete grid buffer[1] = xbase+x-1; buffer[2] = ybase+y-1; // Copy value memcpy(&buffer[3],&matrix[other][POS(x,y)],4); // Send this element optimsoc_mp_simple_send(4,buffer); } } } // Final barrier to ensure the result is complete barrier(); }
// The following handler is called by the message interrupt service routine void control_msg_handler(unsigned int* buffer,int len) { // Extract sender information unsigned int src = EXTRACT(buffer[0],OPTIMSOC_SRC_MSB,OPTIMSOC_SRC_LSB); // Extract request type int req = EXTRACT(buffer[0],CTRL_REQUEST_MSB,CTRL_REQUEST_LSB); // Reply buffer uint32_t rbuffer[5]; // Handle the respective request switch (req) { case CTRL_REQUEST_GETEP_REQ: { trace_ep_get_req_recv(src, buffer[1], buffer[2]); // This is the request to get an endpoint handle // Flit 1: node number // Flit 2: port number // Return the get endpoint response to sender rbuffer[0] = (src << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_GETEP_RESP << CTRL_REQUEST_LSB); // Get endpoint handle for <thisdomain,node,port> where // this domain is the tile id struct endpoint_handle *eph = endpoint_get(optimsoc_get_tileid(), buffer[1], buffer[2]); // If valid numbers and endpoint handle found if ( //buffer[1] < MCA_MAX_NODES && //buffer[2] < MCAPI_MAX_ENDPOINTS && (eph!=0)) { // Return endpoint rbuffer[1] = (unsigned int) eph->ep; } else { // Signal this is an invalid endpoint rbuffer[1] = (int) -1; } trace_ep_get_resp_send(src, (struct endpoint*) rbuffer[1]); optimsoc_mp_simple_send(2,rbuffer); break; } case CTRL_REQUEST_MSG_ALLOC_REQ: { rbuffer[0] = (src << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_MSG_ALLOC_RESP << CTRL_REQUEST_LSB); struct endpoint *ep = (struct endpoint*) buffer[1]; unsigned int size = buffer[2]; trace_msg_alloc_req_recv(src, ep, size); uint32_t ptr; int rv = endpoint_alloc(ep, size, &ptr); if (rv == 0) { rbuffer[1] = CTRL_REQUEST_ACK; rbuffer[2] = ptr; trace_msg_alloc_resp_send(src, ep, ptr); optimsoc_mp_simple_send(3, rbuffer); } else { rbuffer[1] = CTRL_REQUEST_NACK; trace_msg_alloc_resp_send(src, ep, -1); optimsoc_mp_simple_send(2,rbuffer); } break; } case CTRL_REQUEST_MSG_DATA: { struct endpoint *ep = (struct endpoint*) buffer[1]; endpoint_write(ep, buffer[2], buffer[3], (uint32_t*) &buffer[4], len-4); break; } case CTRL_REQUEST_MSG_COMPLETE: { struct endpoint *ep = (struct endpoint*) buffer[1]; endpoint_write_complete(ep, buffer[2], buffer[3]); #ifdef RUNTIME if (ep->waiting) { thread_resume(ep->waiting_thread); ep->waiting = 0; } #endif break; } case CTRL_REQUEST_CHAN_CONNECT_REQ: { struct endpoint *ep = (struct endpoint *) buffer[1]; ep->remotedomain = (uint32_t) buffer[2]; ep->remote = (struct endpoint *) buffer[3]; rbuffer[0] = (src << OPTIMSOC_DEST_LSB) | (1 << OPTIMSOC_CLASS_LSB) | (optimsoc_get_tileid() << OPTIMSOC_SRC_LSB) | (CTRL_REQUEST_CHAN_CONNECT_RESP << CTRL_REQUEST_LSB); rbuffer[1] = endpoint_channel_get_credit(ep); optimsoc_mp_simple_send(2, rbuffer); break; } case CTRL_REQUEST_CHAN_DATA: { struct endpoint *ep = (struct endpoint *) buffer[1]; uint32_t offset = buffer[2]; uint32_t eom = buffer[3]; endpoint_write(ep, ep->buffer->write_ptr, offset, (uint32_t*) &buffer[4], len-4); if (eom) { ep->buffer->data_size[ep->buffer->write_ptr] = offset + len - 4; ep->buffer->write_ptr = _endpoint_addptrwrap(ep, ep->buffer->write_ptr, 1); trace_ep_bufferstate(ep, endpoint_channel_get_fillstate(ep)); } break; } case CTRL_REQUEST_CHAN_CREDIT: { struct endpoint *ep = (struct endpoint *) buffer[1]; uint32_t credit = buffer[2]; if (credit == 0) { ep->remotecredit = 0; } else { ep->remotecredit += credit; } break; } case CTRL_REQUEST_GETEP_RESP: case CTRL_REQUEST_MSG_ALLOC_RESP: case CTRL_REQUEST_CHAN_CONNECT_RESP: // Forward the responses to the handler ctrl_request.buffer[0] = buffer[0]; ctrl_request.buffer[1] = buffer[1]; ctrl_request.buffer[2] = buffer[2]; ctrl_request.buffer[3] = buffer[3]; ctrl_request.buffer[4] = buffer[4]; ctrl_request.done = 1; break; default: printf("Unknown request: %d\n",req); break; } }