RTDECL(bool) RTThreadYield(void) { #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) uint64_t u64TS = ASMReadTSC(); #endif #ifdef RT_OS_DARWIN pthread_yield_np(); #elif defined(RT_OS_SOLARIS) || defined(RT_OS_HAIKU) sched_yield(); #else pthread_yield(); #endif #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) u64TS = ASMReadTSC() - u64TS; bool fRc = u64TS > 1500; LogFlow(("RTThreadYield: returning %d (%llu ticks)\n", fRc, u64TS)); #else bool fRc = true; /* PORTME: Add heuristics for determining whether the cpus was yielded. */ #endif return fRc; }
void* produce_function1 ( void *ptr ) { RingBufferProducer* p_producer = (RingBufferProducer *) ptr; /* type cast to a pointer to thdata */ struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 1; unsigned char v[64]; for (unsigned long i = 1 ; i <= N_ITERS; i++) { *((unsigned long*)v) = i; while (p_producer->write(v, 64) != 64) pthread_yield(); // if (i % 10000 == 0) // printf("%d\n", i); } fprintf(stderr, "producer is done\n"); }
void* func_noise(void* arg) { Thread* pthr = (Thread*)arg; int rc, i, j, policy, tid = gettid(); struct sched_param schedp; cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(0, &mask); rc = sched_setaffinity(0, sizeof(mask), &mask); if (rc < 0) { printf("Thread %d: Can't set affinity: %d %s\n", tid, rc, strerror(rc)); exit(-1); } rc = sched_getaffinity(0, sizeof(mask), &mask); printf("Noise Thread started %d on CPU %ld\n", pthr->priority, (long)mask.__bits[0]); pthread_getschedparam(pthr->pthread, &policy, &schedp); while (1) { sleep(1); printf("Noise Thread running %d\n", pthr->priority); for (i = 0; i < 10000; i++) { if ((i % 100) == 0) { sched_getparam(tid, &schedp); policy = sched_getscheduler(tid); printf("Noise Thread %d(%d) loop %d pthread pol %d pri %d\n", tid, pthr->priority, i, policy, schedp.sched_priority); fflush(NULL); } pthr->id++; for (j = 0; j < 5000; j++) { pthread_mutex_lock(&(pthr->mutex)); pthread_mutex_unlock(&(pthr->mutex)); } } pthread_yield(); } return NULL; }
void do_send_err(int len) { int ret; struct fi_cq_tagged_entry s_cqe; struct fi_cq_err_entry err_cqe; ssize_t sz; uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; rdm_sr_init_data(source, len, 0xab); rdm_sr_init_data(target, len, 0); sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(msg_cq[0], &s_cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, -FI_EAVAIL); ret = fi_cq_readerr(msg_cq[0], &err_cqe, 0); cr_assert_eq(ret, 1); cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, "Bad error context"); cr_assert(err_cqe.flags == (FI_MSG | FI_SEND)); cr_assert(err_cqe.len == 0, "Bad error len"); cr_assert(err_cqe.buf == 0, "Bad error buf"); cr_assert(err_cqe.data == 0, "Bad error data"); cr_assert(err_cqe.tag == 0, "Bad error tag"); cr_assert(err_cqe.olen == 0, "Bad error olen"); cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); cr_assert(err_cqe.prov_errno == GNI_RC_TRANSACTION_ERROR, "Bad prov errno"); cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); s_e[0] = 1; rdm_sr_check_cntrs(s, r, s_e, r_e); }
/* Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or lf_hash_put_pins(). DESCRIPTION empty the purgatory (XXX deadlock warning below!), push LF_PINS structure to a stack */ void _lf_pinbox_put_pins(LF_PINS *pins) { LF_PINBOX *pinbox= pins->pinbox; uint32 top_ver, nr; nr= pins->link; #ifndef DBUG_OFF { /* This thread should not hold any pin. */ int i; for (i= 0; i < LF_PINBOX_PINS; i++) DBUG_ASSERT(pins->pin[i] == 0); } #endif /* DBUG_OFF */ /* XXX this will deadlock if other threads will wait for the caller to do something after _lf_pinbox_put_pins(), and they would have pinned addresses that the caller wants to free. Thus: only free pins when all work is done and nobody can wait for you!!! */ while (pins->purgatory_count) { _lf_pinbox_real_free(pins); if (pins->purgatory_count) { my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock); pthread_yield(); my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock); } } top_ver= pinbox->pinstack_top_ver; do { pins->link= top_ver % LF_PINBOX_MAX_PINS; } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, (int32*) &top_ver, top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); return; }
static void *thread_func(void *parameter) { int id = (int)parameter; int ndx = id - 1; int i; for (nloops[ndx] = 0; nloops[ndx] < NLOOPS; nloops[ndx]++) { int status = pthread_mutex_lock(&mut); if (status != 0) { printf("ERROR thread %d: pthread_mutex_lock failed, status=%d\n", id, status); } if (my_mutex == 1) { printf("ERROR thread=%d: " "my_mutex should be zero, instead my_mutex=%d\n", id, my_mutex); nerrors[ndx]++; } my_mutex = 1; for (i = 0; i < 10; i++) { pthread_yield(); } my_mutex = 0; status = pthread_mutex_unlock(&mut); if (status != 0) { printf("ERROR thread %d: pthread_mutex_unlock failed, status=%d\n", id, status); } } pthread_exit(NULL); return NULL; /* Non-reachable -- needed for some compilers */ }
static void do_read(int len) { ssize_t sz; uint64_t old_w_cnt, new_w_cnt; uint64_t old_r_cnt, new_r_cnt; #define READ_CTX 0x4e3dda1aULL init_data(source, len, 0); init_data(target, len, 0xad); old_w_cnt = fi_cntr_read(write_cntr); cr_assert(old_w_cnt >= 0); old_r_cnt = fi_cntr_read(read_cntr); cr_assert(old_r_cnt >= 0); sz = fi_read(ep[0], source, len, loc_mr, gni_addr[1], (uint64_t)target, mr_key, (void *)READ_CTX); cr_assert_eq(sz, 0); do { new_r_cnt = fi_cntr_read(read_cntr); cr_assert(new_r_cnt >= 0); if (new_r_cnt == (old_r_cnt + 1)) break; pthread_yield(); } while (1); cr_assert(check_data(source, target, len), "Data mismatch"); new_w_cnt = fi_cntr_read(write_cntr); cr_assert(new_w_cnt >= 0); /* * no fi_read called so old and new read cnts should be equal */ cr_assert(new_w_cnt == old_w_cnt); }
void thread::yield() { #if defined(BOOST_HAS_WINTHREADS) Sleep(0); #elif defined(BOOST_HAS_PTHREADS) # if defined(BOOST_HAS_SCHED_YIELD) int res = 0; res = sched_yield(); assert(res == 0); # elif defined(BOOST_HAS_PTHREAD_YIELD) int res = 0; res = pthread_yield(); assert(res == 0); # else xtime xt; xtime_get(&xt, TIME_UTC); sleep(xt); # endif #elif defined(BOOST_HAS_MPTASKS) MPYield(); #endif }
/** * SortedList_insert ... insert an element into a sorted list * *The specified element will be inserted in to *the specified list, which will be kept sorted *in ascending order based on associated keys * * @param SortedList_t *list ... header for the list * @param SortedListElement_t *element ... element to be added to the list * * Note: if (opt_yield & INSERT_YIELD) *call pthread_yield in middle of critical section */ void SortedList_insert(SortedList_t *list, SortedListElement_t *element){ SortedList_t *next = list->next; SortedList_t *prev = list; while(next != NULL){ if(strcmp(element->key, next->key) <= 0) break; prev = next; next = next->next; } if(opt_yield & INSERT_YIELD){ pthread_yield(); } element->prev = prev; element->next = next; prev->next = element; if(next != NULL){ next->prev = element; } }
void execute_thread(void *c) { command_t command = (command_t) c; pthread_mutex_lock(&d_mutex); while(!is_runnable(pthread_self())) { pthread_mutex_unlock(&d_mutex); pthread_yield(); pthread_mutex_lock(&d_mutex); } pthread_mutex_unlock(&d_mutex); exec_command(c); pthread_mutex_lock(&d_mutex); pthread_mutex_lock(&tc_mutex); thread_count--; remove_dependencies(pthread_self()); pthread_mutex_unlock(&tc_mutex); pthread_mutex_unlock(&d_mutex); free(command); pthread_exit(0); }
bool Skitt::start_animation() { if (!animation->open()) { return false; } /* Have to get that first frame. */ if (!animation->next_frame()) { return false; } if (!start_loading_files()) { return false; } printf("Waiting for buffers to get a few frames...\n"); while (!screen->are_buffers_full(min_num_frames_load)) { pthread_yield(); } printf("Buffers have 100 frames. Starting the playback timer.\n"); if (!start_playback_loop()) { return false; } return true; }
void do_read_error(int len) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe; struct fi_cq_err_entry err_cqe; init_data(source, len, 0); init_data(target, len, 0xad); sz = fi_read(ep[0], source, len, loc_mr, gni_addr[1], (uint64_t)target, mr_key, (void *)READ_CTX); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(send_cq, &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, -FI_EAVAIL); ret = fi_cq_readerr(send_cq, &err_cqe, 0); cr_assert_eq(ret, 1); cr_assert((uint64_t)err_cqe.op_context == (uint64_t)READ_CTX, "Bad error context"); cr_assert(err_cqe.flags == (FI_RMA | FI_READ)); cr_assert(err_cqe.len == 0, "Bad error len"); cr_assert(err_cqe.buf == 0, "Bad error buf"); cr_assert(err_cqe.data == 0, "Bad error data"); cr_assert(err_cqe.tag == 0, "Bad error tag"); cr_assert(err_cqe.olen == 0, "Bad error olen"); cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); cr_assert(err_cqe.prov_errno == GNI_RC_TRANSACTION_ERROR, "Bad prov errno"); cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); rdm_rma_check_cntrs(0, 0, 0, 1); }
void SortedList_insert(SortedList_t *list, SortedListElement_t *element){ SortedListElement_t *previous=list; while(previous->next!=NULL){ if(strcmp(previous->next->key,element->key)<=0) previous=previous->next; else break; } if(opt_yield&INSERT_YIELD) pthread_yield();//yield if(previous->next!=NULL){ SortedListElement_t *successor=previous->next; previous->next=element; element->next=successor; element->prev=previous; successor->prev=element; } else{ previous->next=element; element->prev=previous; element->next=NULL; } return; }
void *producer_generate(void *handle) { producer_thread_data_pt th_data = (producer_thread_data_pt) handle; celix_status_t status = CELIX_SUCCESS; int sampleRate; th_data->running = true; while (th_data->running && status == CELIX_SUCCESS) { pthread_rwlock_rdlock(&th_data->throughputLock); sampleRate = th_data->sampleRate; pthread_rwlock_unlock(&th_data->throughputLock); if (th_data->sampleRate > 0) { status = producer_sendBursts(th_data, sampleRate); status = producer_sendSamples(th_data, sampleRate); } pthread_yield(); } return NULL; }
/* Lookahead in the circular buffer. Same rules and behavior as remove_cbuf, except the cbuf->head pointer does not move. CAVEAT: the size of the lookahead is assumed to be a small fraction of the buffer size No "unget" is implemented. "unget" in a multithreaded environment such as this would be potentially disastrous. Lookahead should provide a reasonable alternative to "unget". */ int lookahead_cbuf(cbuf * buffer, buf_t * bytes, int count) { int j, phead, actual; actual = 0; pthread_mutex_lock(buffer->readlock); phead = buffer->head; for (j = phead; actual < count; j = (j + 1) % (buffer->bufsize)) { while (j == (buffer->tail + 1) % (buffer->bufsize)) { if (buffer->refcnt) { pthread_yield(); } else { pthread_mutex_unlock(buffer->readlock); return (actual); } } bytes[actual] = buffer->buf[j]; actual++; phead = (phead + 1) % (buffer->bufsize); } pthread_mutex_unlock(buffer->readlock); return (actual); }
// thread listening function void *listen_event_file(void *arg) { int id_file = (int) *((int *) arg); char * path_file = get_event_file_path(id_file); printf("listen to %s\n",path_file); int fd = open(path_file, O_RDONLY | O_NONBLOCK); struct input_event ev; while (found < 0) { int count = read(fd, &ev, sizeof(struct input_event)); if (ev.type == 1) { pthread_mutex_lock(&mutex_device); found = id_file; pthread_mutex_unlock(&mutex_device); found_path_file = path_file; } pthread_yield(); } pthread_exit(NULL); }
COMMON_SYSDEP void __cilkrts_yield(void) { #if __APPLE__ || __FreeBSD__ || __VXWORKS__ // On MacOS, call sched_yield to yield quantum. I'm not sure why we // don't do this on Linux also. sched_yield(); #elif defined(__MIC__) // On MIC, pthread_yield() really trashes things. Arch's measurements // showed that calling _mm_delay_32() (or doing nothing) was a better // option. Delaying 1024 clock cycles is a reasonable compromise between // giving up the processor and latency starting up when work becomes // available _mm_delay_32(1024); #elif defined(ANDROID) // On Android, call sched_yield to yield quantum. I'm not sure why we // don't do this on Linux also. sched_yield(); #else // On Linux, call pthread_yield (which in turn will call sched_yield) // to yield quantum. pthread_yield(); #endif }
/** * SortedList_delete ... remove an element from a sorted list * * The specified element will be removed from whatever * list it is currently in. * * Before doing the deletion, we check to make sure that * next->prev and prev->next both point to this node * * @param SortedListElement_t *element ... element to be removed * * @return 0: element deleted successfully, 1: corrtuped prev/next pointers * * Note: if (opt_yield & DELETE_YIELD) * call pthread_yield in middle of critical section */ int SortedList_delete( SortedListElement_t *element){ if (element == NULL) return 1; SortedListElement_t *q = element->next; // middle of critical section if (opt_yield & DELETE_YIELD) pthread_yield(); SortedListElement_t *p = element->prev; // Check for race conditions if( (p->next != element) || (q->prev != element) ) return 1; q->prev = p; p->next = q; element->next = NULL; element->prev = NULL; // free memory? free(element); return 0; }
void sep_atomic_compwrite(int index) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, (void *) -1, UINT_MAX, UINT_MAX }; uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; /* u64 */ *((uint64_t *)source) = FETCH_SOURCE_DATA; *((uint64_t *)target) = TARGET_DATA; sz = fi_compare_atomic(tx_ep[0][index], &operand, 1, NULL, &op2, NULL, source, loc_mr[0], rx_addr[index], (uint64_t)target, mr_key[1], FI_UINT64, FI_CSWAP, target); cr_assert_eq(sz, 0, "fi_compare_atomic returned %ld (%s)", sz, fi_strerror(-sz)); /* reset cqe */ cqe.op_context = cqe.buf = (void *) -1; cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, 1); sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); r[0] = 1; sep_check_cntrs(w, r, w_e, r_e); ret = *((uint64_t *)target) == SOURCE_DATA; cr_assert(ret, "Data mismatch"); ret = *((uint64_t *)source) == TARGET_DATA; cr_assert(ret, "Fetch data mismatch"); }
void sep_inject_write(int index, int len) { ssize_t sz; int ret, i; struct fi_cq_tagged_entry cqe; sep_init_data(source, len, 0x33); sep_init_data(target, len, 0); sz = fi_inject_write(tx_ep[0][index], source, len, rx_addr[index], (uint64_t)target, mr_key[1]); cr_assert_eq(sz, 0, "fi_inject_write returned %ld (%s)", sz, fi_strerror(-sz)); for (i = 0; i < len; i++) { while (source[i] != target[i]) { /* for progress */ ret = fi_cq_read(tx_cq[0][index], &cqe, 1); cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, "Received unexpected event\n"); pthread_yield(); } } }
static void usdf_dom_rdc_free_data(struct usdf_domain *udp) { struct usdf_rdm_connection *rdc; int i; if (udp->dom_rdc_hashtab != NULL) { pthread_spin_lock(&udp->dom_progress_lock); for (i = 0; i < USDF_RDM_HASH_SIZE; ++i) { rdc = udp->dom_rdc_hashtab[i]; while (rdc != NULL) { usdf_timer_reset(udp->dom_fabric, rdc->dc_timer, 0); rdc = rdc->dc_hash_next; } } pthread_spin_unlock(&udp->dom_progress_lock); /* XXX probably want a timeout here... */ while (ofi_atomic_get32(&udp->dom_rdc_free_cnt) < (int)udp->dom_rdc_total) { pthread_yield(); } free(udp->dom_rdc_hashtab); udp->dom_rdc_hashtab = NULL; } while (!SLIST_EMPTY(&udp->dom_rdc_free)) { rdc = SLIST_FIRST(&udp->dom_rdc_free); SLIST_REMOVE_HEAD(&udp->dom_rdc_free, dc_addr_link); usdf_timer_free(udp->dom_fabric, rdc->dc_timer); free(rdc); } }
int daemonShutdown(const String& daemonName, const ServiceEnvironmentIFCRef& env) { #ifndef WIN32 #if defined(OW_NETWARE) (void)daemonName; { NonRecursiveMutexLock l(g_shutdownGuard); g_shutDown = true; g_shutdownCond.notifyAll(); pthread_yield(); } if(!FromEventHandler) { UnRegisterEventNotification(DownEvent); } #else String pidFile(env->getConfigItem(ConfigOpts::PIDFILE_opt, OW_DEFAULT_PIDFILE)); PidFile::removePid(pidFile.c_str()); #endif #endif shutdownSig(); return 0; }
void do_inject_write(int len) { ssize_t sz; int ret, i, loops = 0; struct fi_cq_tagged_entry cqe; init_data(source, len, 0x23); init_data(target, len, 0); sz = fi_inject_write(ep[0], source, len, gni_addr[1], (uint64_t)target, mr_key); cr_assert_eq(sz, 0); for (i = 0; i < len; i++) { loops = 0; while (source[i] != target[i]) { ret = fi_cq_read(send_cq, &cqe, 1); /* for progress */ cr_assert(ret == -EAGAIN, "Received unexpected event\n"); pthread_yield(); cr_assert(++loops < 10000, "Data mismatch"); } } }
void sep_atomic_v(int index) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, (void *) -1, UINT_MAX, UINT_MAX }; uint64_t min; struct fi_ioc iov; uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; iov.addr = source; iov.count = 1; /* i64 */ *((int64_t *)source) = SOURCE_DATA; *((int64_t *)target) = TARGET_DATA; sz = fi_atomicv(tx_ep[0][index], &iov, (void **)loc_mr, 1, rx_addr[index], (uint64_t)target, mr_key[1], FI_INT64, FI_MIN, target); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, 1); sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); w[0] = 1; sep_check_cntrs(w, r, w_e, r_e); min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? SOURCE_DATA : TARGET_DATA; ret = *((int64_t *)target) == min; cr_assert(ret, "Data mismatch"); }
void do_write_buf(void *s, void *t, int len) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe; init_data(s, len, 0xab); init_data(t, len, 0); sz = fi_write(ep[0], s, len, NULL, gni_addr[1], (uint64_t)t, mr_key, t); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(send_cq, &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, 1); rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_WRITE, 0); rdm_rma_check_cntrs(1, 0, 0, 0); dbg_printf("got write context event!\n"); cr_assert(check_data(s, t, len), "Data mismatch"); }
extern "C" void GPUReader_mapcudaMemcpy(uint32_t addr0, uint32_t addr1, uint32_t size, uint32_t kind,void *env, uint32_t* cpufid){ uint32_t dev_addr = 0; uint32_t host_addr = 0; //avgMemcpy.sample(size); if (firstevertime == true){ #if SPECRATE_SYNC //Pause the CPU qsamplerlist[*cpufid]->pauseThread(*cpufid); oldqemuid = *cpufid; gpuTM->putinfile(qsamplerlist[*cpufid]->totalnInst); //MSG("BYEEEEEEEEEEEEEEEE!!!!!"); //MSG("BYEEEEEEEEEEEEEEEE!!!!!"); //MSG("BYEEEEEEEEEEEEEEEE!!!!!"); //MSG("BYEEEEEEEEEEEEEEEE!!!!!"); //MSG("BYEEEEEEEEEEEEEEEE!!!!!"); //MSG("BYEEEEEEEEEEEEEEEE!!!!!"); //exit(-1); //Wait till the other threads reach the nInstSkipThreads mark. while (cuda_go_ahead == false){ fprintf(stderr,"."); pthread_yield(); sleep(10); } //Resume the CPU newqemuid = qsamplerlist[oldqemuid]->getFid(oldqemuid); newqemuid = qsamplerlist[newqemuid]->resumeThread(newqemuid, newqemuid); *cpufid = newqemuid; #endif //totalThreadCount.setIgnoreSampler(); //totalTimingThreadCount.setIgnoreSampler(); firstevertime = false; MSG("\n\n\n\n\n\n\n************************************************************** GO AHEAD ****************************************************************\n\n\n\n\n\n\n"); } if (unifiedCPUGPUmem){ /***************************************************/ // enum cudaMemcpyKind // { // cudaMemcpyHostToHost = 0, /* *< Host -> Host */ // cudaMemcpyHostToDevice = 1, /* *< Host -> Device */ // cudaMemcpyDeviceToHost = 2, /* *< Device -> Host */ // cudaMemcpyDeviceToDevice = 3 /**< Device -> Device */ // }; /***************************************************/ if (kind == 1){ //cudaMemcpyHostToDevice = 1, /* *< Host -> Device */ dev_addr = addr0; host_addr = addr1; memcpy2device.add(size); } else if (kind == 2){ //cudaMemcpyDeviceToHost = 2, /* *< Device -> Host */ dev_addr = addr1; host_addr = addr0; memcpy2host.add(size); } IS(MSG("Map %d bytes of memory between CPU address %x, and GPU address %x", (int)size, host_addr, dev_addr)); bool notfound = true; for (uint32_t i = 0; i < Addrmap.size(); i++){ if ((dev_addr >= Addrmap[i].dev_start) && (dev_addr <= Addrmap[i].dev_end)){ notfound = false; Addrmap[i].host_start = host_addr; Addrmap[i].host_end = host_addr+size; I(size <= Addrmap[i].size); i = Addrmap.size()+1; } } if (notfound){ I(0); IS(MSG("ERROR!!!")); } } else { #if DO_MEMCPY #if CPU_DOES_MEMCPY /////////////////////////////////////////////////////////////////// // RAWInstType loadinsn = 0xe4917004; //DO NOT CHANGE // RAWInstType storeinsn = 0xe4804004; //DO NOT CHANGE // uint32_t loadpc = 0xdeaddead; // Dummy PC // uint32_t storepc = 0xdeaddeb1; // deaddead+4 /////////////////////////////////////////////////////////////////// // RAWInstType loadinsn1 = 0xe5912000; //DO NOT CHANGE // RAWInstType loadinsn2 = 0xe2811010; //DO NOT CHANGE // // RAWInstType storeinsn1 = 0xe5834000; //DO NOT CHANGE // RAWInstType storeinsn2 = 0xe2833010; //DO NOT CHANGE // // DataType data = 0; // uint32_t loadpc1 = 0xdeaddead; // Dummy PC // uint32_t loadpc2 = 0xdeaddeb1; // Dummy PC // uint32_t storepc1 = 0xdeaddeb5; // deaddead+4 // uint32_t storepc2 = 0xdeaddeb9; // deaddead+4 /////////////////////////////////////////////////////////////////// //op = 4 (alu+br) fcf4: f5d1f07c pld [r1, #124]; 0x7c │~ //op = 1 (8ld+9alu) fcf8: e8b151f8 ldm r1!, {r3, r4, r5, r6, r7, r8, ip, lr} │~ //op = 4 (2alu) fcfc: e2522020 subs r2, r2, #32 │~ //op = 2 (8st+9alu) fd00: e8a051f8 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} │~ //op =3 (1br) fd04: aafffffa bge fcf4 <memcpy+0x44> RAWInstType insn1 = 0xf5d1f07c; // DO NOT CHANGE RAWInstType insn2 = 0xe8b151f8; // DO NOT CHANGE RAWInstType insn3 = 0xe2522020; // DO NOT CHANGE RAWInstType insn4 = 0xe8a051f8; // DO NOT CHANGE RAWInstType insn5 = 0xaafffffa; // DO NOT CHANGE uint32_t pc1 = 0xf00dfcf4; // DO NOT CHANGE uint32_t pc2 = 0xf00dfcf8; // DO NOT CHANGE uint32_t pc3 = 0xf00dfcfc; // DO NOT CHANGE uint32_t pc4 = 0xf00dfd00; // DO NOT CHANGE uint32_t pc5 = 0xf00dfd04; // DO NOT CHANGE char op1 = 4 | 0xc0; // Thumb32 char op2 = 1; // ARM32 char op3 = 4; // ARM32 char op4 = 2; // ARM32 char op5 = 3; // ARM32 //DataType data = 0; uint64_t icount = 1; int32_t bytecount = size; uint32_t datachunk = 32; // Bytes fetched at a time if (kind == 1){ //cudaMemcpyHostToDevice = 1, /* *< Host -> Device */ dev_addr = addr0; host_addr = addr1; memcpy2device.add(bytecount); // qsamplerlist[*cpufid]->setyesStats(*cpufid); do { //loadSampler(cpufid); qsamplerlist[*cpufid]->queue(insn1,pc1, 0xdeadf00d ,*cpufid,op1,icount,env); for(int i=0;i<8;i++) // 8 LD qsamplerlist[*cpufid]->queue(insn2,pc2, host_addr + i*4,*cpufid,op2,icount,env); qsamplerlist[*cpufid]->queue(insn3,pc3, 0,*cpufid,op3,icount,env); for(int i=0;i<8;i++) // 8 ST qsamplerlist[*cpufid]->queue(insn4,pc4, dev_addr + i*4,*cpufid,op4,icount,env); qsamplerlist[*cpufid]->queue(insn5,pc5, pc1 ,*cpufid,op5,icount,env); //op = 1; //Load from host //qsamplerlist[*cpufid]->queue(loadinsn1,loadpc1,host_addr,*cpufid,op,icount,env); //qsamplerlist[*cpufid]->queue(loadinsn2,loadpc1,host_addr,*cpufid,op,icount,env); //gsampler->queue(insn,pc,addr,data,*cpufid,op,icount,env); //op = 2; // Store to device //qsamplerlist[*cpufid]->queue(storeinsn1,storepc1,dev_addr,*cpufid,op,icount,env); //qsamplerlist[*cpufid]->queue(storeinsn2,storepc2,dev_addr,*cpufid,op,icount,env); //gsampler->queue(insn,pc,addr,data,*cpufid,op,icount,env); host_addr += datachunk; dev_addr += datachunk; bytecount = bytecount - datachunk; } while (bytecount > 0); // qsamplerlist[*cpufid]->setnoStats(*cpufid); } else if (kind == 2){ //cudaMemcpyDeviceToHost = 2, /* *< Device -> Host */ memcpy2host.add(bytecount); dev_addr = addr1; host_addr = addr0; // qsamplerlist[*cpufid]->setyesStats(*cpufid); do { //loadSampler(cpufid); qsamplerlist[*cpufid]->queue(insn1,pc1, 0xdeadf00d ,*cpufid,op1,icount,env); for(int i=0;i<8;i++) // 8 LD qsamplerlist[*cpufid]->queue(insn2,pc2, dev_addr + i*4,*cpufid,op2,icount,env); qsamplerlist[*cpufid]->queue(insn3,pc3, 0,*cpufid,op3,icount,env); for(int i=0;i<8;i++) // 8 LD qsamplerlist[*cpufid]->queue(insn4,pc4, host_addr + i*4,*cpufid,op4,icount,env); qsamplerlist[*cpufid]->queue(insn5,pc5, pc1 ,*cpufid,op5,icount,env); //op = 1; // Load from device //qsamplerlist[*cpufid]->queue(loadinsn1,loadpc1,dev_addr,*cpufid,op,icount,env); //qsamplerlist[*cpufid]->queue(loadinsn2,loadpc2,dev_addr,*cpufid,op,icount,env); //gsampler->queue(insn,pc,addr,*cpufid,op,icount,env); //op = 2; //Store to host //qsamplerlist[*cpufid]->queue(storeinsn1,storepc1,host_addr,*cpufid,op,icount,env); //qsamplerlist[*cpufid]->queue(storeinsn2,storepc2,host_addr,*cpufid,op,icount,env); //gsampler->queue(insn,pc,addr,*cpufid,op,icount,env); dev_addr += datachunk; host_addr += datachunk; bytecount = bytecount - datachunk; } while (bytecount > 0); // qsamplerlist[*cpufid]->setnoStats(*cpufid); } #else #if 1 //IF CPU DOES NOT DO MEMCPY, instead GPU fetched the memory address. uint32_t smid = 0; uint32_t glofid = 0; RAWInstType loadinsn = 0xFFFF; RAWInstType storeinsn= 0xFFFB; uint32_t loadpc = 0xf00ddead; uint32_t storepc = 0xf00ddeb1; AddrType addr = 0; char op = 0; uint64_t icount = 1; int32_t bytecount = size; uint32_t datachunk = 64; // Bytes fetched at a time, equal to the cache linesize of DL1G gsampler->startTiming(glofid); if (kind == 1) { //cudaMemcpyHostToDevice = 1, /* *< Host -> Device */ memcpy2device.add(bytecount); dev_addr = addr0; host_addr = addr1; //Pause the CPU oldqemuid = *cpufid; qsamplerlist[*cpufid]->pauseThread(*cpufid); //Memcpy do{ glofid = gpuTM->mapLocalID(smid); //Load gsampler->queue(loadinsn,loadpc,host_addr,glofid,op,1,env); if (istsfifoBlocked){ gsampler->resumeThread(glofid); } else { //Store gsampler->queue(storeinsn,storepc,dev_addr,glofid,op,1,env); host_addr += datachunk; dev_addr += datachunk; bytecount -= datachunk; } smid++; if (smid == gpuTM->ret_numSM()){ smid = 0; } } while (bytecount > 0); //Resume the CPU newqemuid = qsamplerlist[oldqemuid]->getFid(oldqemuid); newqemuid = qsamplerlist[newqemuid]->resumeThread(newqemuid, newqemuid); *cpufid = newqemuid; //Pause the GPUs for (smid = 0; ((smid < gpuTM->ret_numSM())); smid++){ gsampler->pauseThread(glofid); } } else if (kind == 2){ //cudaMemcpyDeviceToHost = 2, /* *< Device -> Host */ memcpy2host.add(bytecount); dev_addr = addr1; host_addr = addr0; //Pause the CPU oldqemuid = *cpufid; qsamplerlist[*cpufid]->pauseThread(*cpufid); //Memcpy do{ glofid = gpuTM->mapLocalID(smid); //Load gsampler->queue(loadinsn,loadpc,host_addr,glofid,op,1,env); if (istsfifoBlocked){ gsampler->resumeThread(glofid); } else { //Store gsampler->queue(storeinsn,storepc,dev_addr,glofid,op,1,env); host_addr += datachunk; dev_addr += datachunk; bytecount -= datachunk; } smid++; if (smid == gpuTM->ret_numSM()){ smid = 0; } } while (bytecount > 0); //Resume the CPU newqemuid = qsamplerlist[oldqemuid]->getFid(oldqemuid); newqemuid = qsamplerlist[newqemuid]->resumeThread(newqemuid, newqemuid); *cpufid = newqemuid; //Pause the GPUs for (smid = 0; ((smid < gpuTM->ret_numSM())); smid++){ gsampler->pauseThread(glofid); } } gsampler->stop(); #endif #endif #endif } }
void add(long long *pointer, long long value) { long long sum = *pointer + value; if (opt_yield) pthread_yield(); *pointer = sum; }
void YieldTask() noexcept { AbortIfError( pthread_yield() ,"CCore::Sys::YieldTask()"); }
// Begin control algorithm main method int main() { printf("Start of Control\r\n"); mot_Init(); // Declare variables for navdata int rc; nav_struct nav; // Initialize navdata rc = nav_Init(&nav); // Check if navdata initializes int nav_fail = 0; if (rc==0) { printf("navdata failed to initialize\n"); nav_fail = 1; } // Calibrate navdata rc=nav_FlatTrim(); if(rc) { printf("Failed navdata: retcode=%d\r\n",rc); nav_fail = 1; } // Kick off value getting thing in a separate thread! pthread_create(&image_processing_thread, NULL, process_images, NULL); int angle = getAngle(); printf("Angle: %i\r\n",angle); int dir = angle != 0 ? angle/abs(angle) : 0; /* //first pulse pulse(dir,pulseDuration); printf("Wait: %f\r\n",wait(angle)*1000000); for(int i=0; i<100; i++){ usleep(wait(angle)/100*1000000); checkKeypress(); } pulse(-dir,pulseDuration -.04); */ prevAngle = angle; // start timer gettimeofday(&t1, NULL); // PID Loop float s = .01; dir= 1; while(1) { checkKeypress(); if(waitToStart) continue; if(stopLoop) break; // Check navdata if navdata initiates if (nav_fail == 0) { checkNavdata(&nav); } pid_controller(); /* smallPulse(dir,.9); usleep(s * 1000000); smallPulse(dir,.9); usleep(1.5 * 1000000); printf("%f\n",s); if(dir > 0) dir = -1; else dir = 1; s+=.01; smallPulse(dir,.9); usleep(s * 1000000); smallPulse(dir,.9); usleep(1.5 * 1000000); printf("%f\n",s); s+=.01; */ //yield to other threads pthread_yield(); } // Cleanup // Delete the mutex pthread_mutex_destroy(&video_results_mutex); //close(sockfd); //close(newsockfd); // Close TCP socket //video_Close(&vid); // Close video thread mot_Close(); // Close motor thread printf("\nDone!\n"); return 0; }
void * process_images(void * param) { // Initialize used variables int n, sum; // Buffer for image data unsigned char * buf1; // Buffer for message passing info char buffer[4]; // Initialize getting a picture vid_struct vid; // Device location // Video0 is front camera, video1 is bottom camera vid.device = (char*)"/dev/video0"; // Other params for video vid.w = VIDEO_WIDTH; vid.h = VIDEO_HEIGHT; vid.n_buffers = NUM_BUFFERS; // Initialize video thread for streaming video_Init(&vid); // Create blank image img_struct * img = video_CreateImage(&vid); // INITIALIZE TCP CONNECTION // Note: AR.Drone is server and listens for connection opening // IP:192.168.1.1 Port: 7777 // Socket file descriptor, new socket file descriptor, port number int sockfd, newsockfd, portno; socklen_t clilen; struct sockaddr_in serv_addr, cli_addr; // Open tcp socket sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd < 0) { error("ERROR opening socket"); } //zero-initialize serv_addr bzero((char *) &serv_addr, sizeof(serv_addr)); //set port number portno = PORT_NUM; //set parameters for serv_addr serv_addr.sin_family = AF_INET; serv_addr.sin_addr.s_addr = INADDR_ANY; serv_addr.sin_port = htons(portno); //bind socket if (bind(sockfd, (struct sockaddr *) &serv_addr,sizeof(serv_addr)) < 0) { error("ERROR on binding"); } //listen for client to open connection listen(sockfd,5); clilen = sizeof(cli_addr); //open new socket newsockfd = accept(sockfd,(struct sockaddr *) &cli_addr,&clilen); if (newsockfd < 0) { error("ERROR on accept"); } // Now constantly fetch this and update the global variables while(1) { // Get picture into image buffer from video thread video_GrabImage(&vid, img); buf1 = img->buf; // Set image buffer unsigned char image[chopped_size]; // Copy over data from buf1 to image memcpy(image, buf1 + y_upper, y_lower - y_upper + 1); // Copy Y values memcpy(image + (y_lower - y_upper + 1), buf1 + cr_upper, cr_lower - cr_upper + 1); // Copy Cr values memcpy(image + (y_lower - y_upper + 1)+ (cr_lower - cr_upper + 1), buf1 + cb_upper, cb_lower - cb_upper + 1); // Copy Cb values // Send packet to client n = 0; sum = 0; while (sum < chopped_size) { n = write(newsockfd, image + sum, chopped_size - sum); if (n < 0) { error("ERROR reading image data from socket!"); } sum += n; } // Read 4 character return message from client bzero(buffer,4); n = 0; sum = 0; while (sum < 4) { n = read(newsockfd, buffer + sum, 4 - sum); if (n < 0) { error("ERROR reading client message!"); } sum += n; } // Convert buffer to integer or NULL char none[] = "None"; int equality = 0; //Check that buffer is "None" int check; for (check=0;check<4;check++) { if (buffer[check]==none[check]) { equality = 1; } else { equality = 0; break; } } // Message received is integer string if not equal if (equality==0) { // Lock the position value and update it + timestamp pthread_mutex_lock(&video_results_mutex); position_value = atoi(buffer); time(&img_recv_timestamp); pthread_mutex_unlock(&video_results_mutex); } else { // Lock the position value and update it pthread_mutex_lock(&video_results_mutex); position_value = 9999; time(&img_recv_timestamp); pthread_mutex_unlock(&video_results_mutex); } // Relinquish CPU before starting again pthread_yield(); } }