void ssmp_barrier_wait_platf(int barrier_num) { if (barrier_num >= SSMP_NUM_BARRIERS) { return; } tmc_sync_barrier_wait(ssmp_barrier + barrier_num); }
/** Main function. */ int main(int argc, char** argv) { // Number of instances of this program to run // (including the initial parent process). int instances = 4; // Detect whether we're the parent or an exec'd child, int is_parent = is_parent_process(); // Get the application's affinity set. // We'll use the first N available cpus from this set. // NOTE: this means parent should _not_ call any functions // that shrink the affinity set prior to go_parellel(); cpu_set_t cpus; int status = tmc_cpus_get_my_affinity(&cpus); check_tmc_status(status, "tmc_cpus_get_my_affinity()"); // Define UDN cpu set as first N available cpus status = udn_init(instances, &cpus); check_tmc_status(status, "udn_init()"); // Initialize "common" shared memory with default size. status = tmc_cmem_init(0); check_tmc_status(status, "tmc_cmem_init()"); // Allocate barrier data structure in shared memory. tmc_sync_barrier_t* barrier = NULL; if (is_parent) { // Allocate/initialize barrier data structure in common memory. barrier = (tmc_sync_barrier_t*) tmc_cmem_malloc(sizeof(*barrier)); if (barrier == NULL) tmc_task_die("barrier_init(): " "Failed to allocate barrier data structure."); tmc_sync_barrier_init(barrier, instances); } // Pass the barrier pointer to any exec'd children. share_pointer("SHARED_BARRIER_POINTER", (void**) &barrier); // Fork/exec any additional child processes, // each locked to its own tile, // and get index [0 -- instances-1] of current process. int index = go_parallel(instances, &cpus, argc, argv); pid_t pid = getpid(); printf("Process(pid=%i), index=%i: started.\n", pid, index); // Enable UDN access for this process (parent or child). // Note: this needs to be done after we're locked to a tile. status = tmc_udn_activate(); check_tmc_status(status, "tmc_udn_activate()"); // Wait here until all other processes have caught up. tmc_sync_barrier_wait(barrier); // Send/receive a value over the UDN. int from = 0; int to = instances - 1; if (index == from) { int value = 42; printf("Process(pid=%i), index=%i: sending value %i to cpu %i...\n", pid, index, value, to); udn_send_to_nth_cpu(to, &cpus, value); printf("Process(pid=%i), index=%i: sent value %i to cpu %i.\n", pid, index, value, to); } else if (index == to) { int received = 0; printf("Process(pid=%i), index=%i: receiving value...\n", pid, index); received = udn_receive(); printf("Process(pid=%i), index=%i: received value %i...\n", pid, index, received); } // Wait here until all other processes have caught up. tmc_sync_barrier_wait(barrier); printf("Process(pid=%i), index=%i: finished.\n", pid, index); // We're done. return 0; }
void* net_thread(void* arg) { int iix = (uintptr_t)arg; /*Ingress interface index*/ int eix; /*Egress interface index*/ int i, n; /*Index, Number*/ gxio_mpipe_iqueue_t *iqueue = iqueues[iix]; /*Ingress queue*/ gxio_mpipe_equeue_t *equeue; /*Egress queue*/ gxio_mpipe_idesc_t *idescs; /*Ingress packet descriptors*/ gxio_mpipe_edesc_t edescs[MAXBATCH]; /*Egress descriptors.*/ long slot; /*Setup egress queue.*/ switch (iix) { case 0: eix = 1; break; case 1: eix = 0; break; case 2: eix = 3; break; case 3: eix = 2; break; default: tmc_task_die("Invalid interface index, %d", iix); break; } equeue = &equeues[eix]; /*Egress queue*/ /*Bind to a single CPU.*/ if (tmc_cpus_set_my_cpu(tmc_cpus_find_nth_cpu(&cpus, DTILEBASE + iix)) < 0) { tmc_task_die("Failed to setup CPU affinity\n"); } if (set_dataplane(0) < 0) { tmc_task_die("Failed to setup dataplane\n"); } /*Line up all network threads.*/ tmc_sync_barrier_wait(&syncbar); tmc_spin_barrier_wait(&spinbar); if (iix == 0) { /*Pause briefly, to let everyone finish passing the barrier.*/ for (i = 0; i < 10000; i++) __insn_mfspr(SPR_PASS); /*Allow packets to flow (on all links).*/ sim_enable_mpipe_links(mpipei, -1); } /*-------------------------------------------------------------------------*/ /* Process(forward) packets. */ /*-------------------------------------------------------------------------*/ while (1) { /*Receive packet(s).*/ n = gxio_mpipe_iqueue_peek(iqueue, &idescs); if (n <= 0) continue; else if (n > 16) n = 16; //TODO: Experiment with this number. #if 0 printf("[%d] Get packet(s), n=%d\n", iix, n); #endif /*Prefetch packet descriptors from L3 to L1.*/ tmc_mem_prefetch(idescs, n * sizeof(*idescs)); /*Reserve slots. NOTE: This might spin.*/ slot = gxio_mpipe_equeue_reserve_fast(equeue, n); /*Process packet(s).*/ for (i = 0; i < n; i++) { /*Detect Call(s), clone the packet and pass it to antother Tile, if necessary.*/ //TODO: For now, inspect and record the packet using this Tile. if (ccap_detect_call(&idescs[i])) { ccap_trace_add(0, &idescs[i]); //TODO: Use actual link number. } /*Send the packets out on the peer port.*/ gxio_mpipe_edesc_copy_idesc(&edescs[i], &idescs[i]); #if 1 /*Drop "error" packets (but ignore "checksum" problems).*/ if (idescs[i].be || idescs[i].me || idescs[i].tr || idescs[i].ce) { edescs[i].ns = 1; } #endif gxio_mpipe_equeue_put_at(equeue, edescs[i], slot + i); gxio_mpipe_iqueue_consume(iqueue, &idescs[i]); } } /*Make compiler happy.*/ return (void *)NULL; }