/* * Internal function used for waiting a specific amount of ms * in Curl_socket_check() and Curl_poll() when no file descriptor * is provided to wait on, just being used to delay execution. * WinSock select() and poll() timeout mechanisms need a valid * socket descriptor in a not null file descriptor set to work. * Waiting indefinitely with this function is not allowed, a * zero or negative timeout value will return immediately. * Timeout resolution, accuracy, as well as maximum supported * value is system dependent, neither factor is a citical issue * for the intended use of this function in the library. * * Return values: * -1 = system call error, invalid timeout value, or interrupted * 0 = specified timeout has elapsed */ int Curl_wait_ms(int timeout_ms) { #if !defined(MSDOS) && !defined(USE_WINSOCK) #ifndef HAVE_POLL_FINE struct timeval pending_tv; #endif struct curltime initial_tv; int pending_ms; int error; #endif int r = 0; if(!timeout_ms) return 0; if(timeout_ms < 0) { SET_SOCKERRNO(EINVAL); return -1; } #if defined(MSDOS) delay(timeout_ms); #elif defined(USE_WINSOCK) Sleep(timeout_ms); #else pending_ms = timeout_ms; initial_tv = Curl_now(); do { #if defined(HAVE_POLL_FINE) r = poll(NULL, 0, pending_ms); #else pending_tv.tv_sec = pending_ms / 1000; pending_tv.tv_usec = (pending_ms % 1000) * 1000; r = select(0, NULL, NULL, NULL, &pending_tv); #endif /* HAVE_POLL_FINE */ if(r != -1) break; error = SOCKERRNO; if(error && ERROR_NOT_EINTR(error)) break; pending_ms = timeout_ms - ELAPSED_MS(); if(pending_ms <= 0) { r = 0; /* Simulate a "call timed out" case */ break; } } while(r == -1); #endif /* USE_WINSOCK */ if(r) r = -1; return r; }
int main(int argc, char** argv) { int opt; int i; int device = 0; unsigned int seed = 1; int count = 1; int syncOp = SYNCOP_DUMP; int finalSyncOp = SYNCOP_DUMP; int verbose = 0; int hostAlloc = 0; FLAGRTBInfo flagrtb_info; unsigned int npol, nstation, nfrequency; int flagrtb_error = 0; Complex *omp_matrix_h = NULL; struct timespec start, stop; double total, per_call, max_bw; #ifdef RUNTIME_STATS struct timespec tic, toc; #endif while ((opt = getopt(argc, argv, "c:d:f:ho:rs:v:")) != -1) { switch (opt) { case 'c': // Set number of time to call flagrtbCudaXengine count = strtoul(optarg, NULL, 0); if(count < 1) { fprintf(stderr, "count must be positive\n"); return 1; } break; case 'd': // Set CUDA device number device = strtoul(optarg, NULL, 0); break; case 'f': // Set syncOp for final call finalSyncOp = strtoul(optarg, NULL, 0); break; case 'o': // Set syncOp syncOp = strtoul(optarg, NULL, 0); break; case 'r': // Register host allocated memory hostAlloc = 1; break; case 's': // Set seed for random data seed = strtoul(optarg, NULL, 0); break; case 'v': // Set verbosity level verbose = strtoul(optarg, NULL, 0); break; default: /* '?' */ fprintf(stderr, "Usage: %s [options]\n" "Options:\n" " -c COUNT How many times to call flagrtbCudaXengine [1]\n" " -d DEVNUM GPU device to use [0]\n" " -f FINAL_SYNCOP Sync operation for final call [1]\n" " -o SYNCOP Sync operation for all but final call [1]\n" " Sync operation values are:\n" " 0 (no sync)\n" " 1 (sync and dump)\n" " 2 (sync host to device transfer)\n" " 3 (sync kernel computations)\n" " -r Register host allocated memory [false]\n" " (otherwise use CUDA allocated memory)\n" " -s SEED Random number seed [1]\n" " -v {0|1|2|3} Verbosity level (debug only) [0]\n" " -h Show this message\n", argv[0]); exit(EXIT_FAILURE); } } srand(seed); // Get sizing info from library flagrtbInfo(&flagrtb_info); npol = flagrtb_info.npol; nstation = flagrtb_info.nstation; nfrequency = flagrtb_info.nfrequency; printf("Correlating %u stations with %u channels and integration length %u\n", flagrtb_info.nstation, flagrtb_info.nfrequency, flagrtb_info.ntime); #ifndef FIXED_POINT printf("Sending floating point data to GPU.\n"); #else printf("Sending fixed point data to GPU.\n"); #endif // perform host memory allocation // allocate the GPU X-engine memory FLAGRTBContext context; if(hostAlloc) { context.array_len = flagrtb_info.vecLength; context.matrix_len = flagrtb_info.matLength; context.array_h = malloc(context.array_len*sizeof(ComplexInput)); context.matrix_h = malloc(context.matrix_len*sizeof(Complex)); } else { context.array_h = NULL; context.matrix_h = NULL; } flagrtb_error = flagrtbInit(&context, device); if(flagrtb_error) { fprintf(stderr, "flagrtbInit returned error code %d\n", flagrtb_error); goto cleanup; } ComplexInput *array_h = context.array_h; // this is pinned memory Complex *cuda_matrix_h = context.matrix_h; // create an array of complex noise flagrtbRandomComplex(array_h, flagrtb_info.vecLength); // ompXengine always uses TRIANGULAR_ORDER unsigned int ompMatLength = nfrequency * ((nstation+1)*(nstation/2)*npol*npol); omp_matrix_h = (Complex *) malloc(ompMatLength*sizeof(Complex)); if(!omp_matrix_h) { fprintf(stderr, "error allocating output buffer for flagrtbOmpXengine\n"); goto cleanup; } #if (CUBE_MODE == CUBE_DEFAULT && !defined(POWER_LOOP) ) // Only call CPU X engine if dumping GPU X engine printf("Calling CPU X-Engine\n"); flagrtbOmpXengine(omp_matrix_h, array_h); #endif #define ELAPSED_MS(start,stop) \ ((((int64_t)stop.tv_sec-start.tv_sec)*1000*1000*1000+(stop.tv_nsec-start.tv_nsec))/1e6) printf("Calling GPU X-Engine\n"); clock_gettime(CLOCK_MONOTONIC, &start); for(i=0; i<count; i++) { #ifdef RUNTIME_STATS clock_gettime(CLOCK_MONOTONIC, &tic); #endif flagrtb_error = flagrtbCudaXengine(&context, i==count-1 ? finalSyncOp : syncOp); #ifdef RUNTIME_STATS clock_gettime(CLOCK_MONOTONIC, &toc); #endif if(flagrtb_error) { fprintf(stderr, "flagrtbCudaXengine returned error code %d\n", flagrtb_error); goto cleanup; } #ifdef RUNTIME_STATS fprintf(stderr, "%11.6f %11.6f ms%s\n", ELAPSED_MS(start,tic), ELAPSED_MS(tic,toc), i==count-1 ? " final" : ""); #endif } clock_gettime(CLOCK_MONOTONIC, &stop); total = ELAPSED_MS(start,stop); per_call = total/count; // per_spectrum = per_call / NTIME // per_channel = per_spectrum / NFREQUENCY // = per_call / (NTIME * NFREQUENCY) // max_bw (kHz) = 1 / per_channel = (NTIME * NFREQUENCY) / per_call max_bw = flagrtb_info.ntime*flagrtb_info.nfrequency/per_call/1000; // MHz printf("Elapsed time %.6f ms total, %.6f ms/call average, theoretical max BW %.3f MHz\n", total, per_call, max_bw); #if (CUBE_MODE == CUBE_DEFAULT) if(count > 1) { for(i=0; i<context.matrix_len; i++) { cuda_matrix_h[i].real /= count; cuda_matrix_h[i].imag /= count; } } flagrtbReorderMatrix(cuda_matrix_h); flagrtbCheckResult(cuda_matrix_h, omp_matrix_h, verbose, array_h); #if 0 int fullMatLength = nfrequency * nstation*nstation*npol*npol; Complex *full_matrix_h = (Complex *) malloc(fullMatLength*sizeof(Complex)); // convert from packed triangular to full matrix flagrtbExtractMatrix(full_matrix_h, cuda_matrix_h); free(full_matrix_h); #endif #endif cleanup: //free host memory free(omp_matrix_h); // free gpu memory flagrtbFree(&context); if(hostAlloc) { free(context.array_h); free(context.matrix_h); } return flagrtb_error; }
/* * This is a wrapper around poll(). If poll() does not exist, then * select() is used instead. An error is returned if select() is * being used and a file descriptor is too large for FD_SETSIZE. * A negative timeout value makes this function wait indefinitely, * unless no valid file descriptor is given, when this happens the * negative timeout is ignored and the function times out immediately. * * Return values: * -1 = system call error or fd >= FD_SETSIZE * 0 = timeout * N = number of structures with non zero revent fields */ int Curl_poll(struct pollfd ufds[], unsigned int nfds, int timeout_ms) { #ifndef HAVE_POLL_FINE struct timeval pending_tv; struct timeval *ptimeout; fd_set fds_read; fd_set fds_write; fd_set fds_err; curl_socket_t maxfd; #endif struct curltime initial_tv = {0, 0}; bool fds_none = TRUE; unsigned int i; int pending_ms = 0; int error; int r; if(ufds) { for(i = 0; i < nfds; i++) { if(ufds[i].fd != CURL_SOCKET_BAD) { fds_none = FALSE; break; } } } if(fds_none) { r = Curl_wait_ms(timeout_ms); return r; } /* Avoid initial timestamp, avoid Curl_now() call, when elapsed time in this function does not need to be measured. This happens when function is called with a zero timeout or a negative timeout value indicating a blocking call should be performed. */ if(timeout_ms > 0) { pending_ms = timeout_ms; initial_tv = Curl_now(); } #ifdef HAVE_POLL_FINE do { if(timeout_ms < 0) pending_ms = -1; else if(!timeout_ms) pending_ms = 0; r = poll(ufds, nfds, pending_ms); if(r != -1) break; error = SOCKERRNO; if(error && ERROR_NOT_EINTR(error)) break; if(timeout_ms > 0) { pending_ms = (int)(timeout_ms - ELAPSED_MS()); if(pending_ms <= 0) { r = 0; /* Simulate a "call timed out" case */ break; } } } while(r == -1); if(r < 0) return -1; if(r == 0) return 0; for(i = 0; i < nfds; i++) { if(ufds[i].fd == CURL_SOCKET_BAD) continue; if(ufds[i].revents & POLLHUP) ufds[i].revents |= POLLIN; if(ufds[i].revents & POLLERR) ufds[i].revents |= (POLLIN|POLLOUT); } #else /* HAVE_POLL_FINE */ FD_ZERO(&fds_read); FD_ZERO(&fds_write); FD_ZERO(&fds_err); maxfd = (curl_socket_t)-1; for(i = 0; i < nfds; i++) { ufds[i].revents = 0; if(ufds[i].fd == CURL_SOCKET_BAD) continue; VERIFY_SOCK(ufds[i].fd); if(ufds[i].events & (POLLIN|POLLOUT|POLLPRI| POLLRDNORM|POLLWRNORM|POLLRDBAND)) { if(ufds[i].fd > maxfd) maxfd = ufds[i].fd; if(ufds[i].events & (POLLRDNORM|POLLIN)) FD_SET(ufds[i].fd, &fds_read); if(ufds[i].events & (POLLWRNORM|POLLOUT)) FD_SET(ufds[i].fd, &fds_write); if(ufds[i].events & (POLLRDBAND|POLLPRI)) FD_SET(ufds[i].fd, &fds_err); } } #ifdef USE_WINSOCK /* WinSock select() can't handle zero events. See the comment about this in Curl_check_socket(). */ if(fds_read.fd_count == 0 && fds_write.fd_count == 0 && fds_err.fd_count == 0) { r = Curl_wait_ms(timeout_ms); return r; } #endif ptimeout = (timeout_ms < 0) ? NULL : &pending_tv; do { if(timeout_ms > 0) { pending_tv.tv_sec = pending_ms / 1000; pending_tv.tv_usec = (pending_ms % 1000) * 1000; } else if(!timeout_ms) { pending_tv.tv_sec = 0; pending_tv.tv_usec = 0; } #ifdef USE_WINSOCK r = select((int)maxfd + 1, /* WinSock select() can't handle fd_sets with zero bits set, so don't give it such arguments. See the comment about this in Curl_check_socket(). */ fds_read.fd_count ? &fds_read : NULL, fds_write.fd_count ? &fds_write : NULL, fds_err.fd_count ? &fds_err : NULL, ptimeout); #else r = select((int)maxfd + 1, &fds_read, &fds_write, &fds_err, ptimeout); #endif if(r != -1) break; error = SOCKERRNO; if(error && ERROR_NOT_EINTR(error)) break; if(timeout_ms > 0) { pending_ms = timeout_ms - ELAPSED_MS(); if(pending_ms <= 0) { r = 0; /* Simulate a "call timed out" case */ break; } } } while(r == -1); if(r < 0) return -1; if(r == 0) return 0; r = 0; for(i = 0; i < nfds; i++) { ufds[i].revents = 0; if(ufds[i].fd == CURL_SOCKET_BAD) continue; if(FD_ISSET(ufds[i].fd, &fds_read)) ufds[i].revents |= POLLIN; if(FD_ISSET(ufds[i].fd, &fds_write)) ufds[i].revents |= POLLOUT; if(FD_ISSET(ufds[i].fd, &fds_err)) ufds[i].revents |= POLLPRI; if(ufds[i].revents != 0) r++; } #endif /* HAVE_POLL_FINE */ return r; }
/* * Wait for read or write events on a set of file descriptors. It uses poll() * when a fine poll() is available, in order to avoid limits with FD_SETSIZE, * otherwise select() is used. An error is returned if select() is being used * and a file descriptor is too large for FD_SETSIZE. * * A negative timeout value makes this function wait indefinitely, * unless no valid file descriptor is given, when this happens the * negative timeout is ignored and the function times out immediately. * * Return values: * -1 = system call error or fd >= FD_SETSIZE * 0 = timeout * [bitmask] = action as described below * * CURL_CSELECT_IN - first socket is readable * CURL_CSELECT_IN2 - second socket is readable * CURL_CSELECT_OUT - write socket is writable * CURL_CSELECT_ERR - an error condition occurred */ int Curl_socket_check(curl_socket_t readfd0, /* two sockets to read from */ curl_socket_t readfd1, curl_socket_t writefd, /* socket to write to */ time_t timeout_ms) /* milliseconds to wait */ { #ifdef HAVE_POLL_FINE struct pollfd pfd[3]; int num; #else struct timeval pending_tv; struct timeval *ptimeout; fd_set fds_read; fd_set fds_write; fd_set fds_err; curl_socket_t maxfd; #endif struct curltime initial_tv = {0, 0}; int pending_ms = 0; int error; int r; int ret; #if SIZEOF_TIME_T != SIZEOF_INT /* wrap-around precaution */ if(timeout_ms >= INT_MAX) timeout_ms = INT_MAX; #endif if((readfd0 == CURL_SOCKET_BAD) && (readfd1 == CURL_SOCKET_BAD) && (writefd == CURL_SOCKET_BAD)) { /* no sockets, just wait */ r = Curl_wait_ms((int)timeout_ms); return r; } /* Avoid initial timestamp, avoid Curl_now() call, when elapsed time in this function does not need to be measured. This happens when function is called with a zero timeout or a negative timeout value indicating a blocking call should be performed. */ if(timeout_ms > 0) { pending_ms = (int)timeout_ms; initial_tv = Curl_now(); } #ifdef HAVE_POLL_FINE num = 0; if(readfd0 != CURL_SOCKET_BAD) { pfd[num].fd = readfd0; pfd[num].events = POLLRDNORM|POLLIN|POLLRDBAND|POLLPRI; pfd[num].revents = 0; num++; } if(readfd1 != CURL_SOCKET_BAD) { pfd[num].fd = readfd1; pfd[num].events = POLLRDNORM|POLLIN|POLLRDBAND|POLLPRI; pfd[num].revents = 0; num++; } if(writefd != CURL_SOCKET_BAD) { pfd[num].fd = writefd; pfd[num].events = POLLWRNORM|POLLOUT; pfd[num].revents = 0; num++; } do { if(timeout_ms < 0) pending_ms = -1; else if(!timeout_ms) pending_ms = 0; r = poll(pfd, num, pending_ms); if(r != -1) break; error = SOCKERRNO; if(error && ERROR_NOT_EINTR(error)) break; if(timeout_ms > 0) { pending_ms = (int)(timeout_ms - ELAPSED_MS()); if(pending_ms <= 0) { r = 0; /* Simulate a "call timed out" case */ break; } } } while(r == -1); if(r < 0) return -1; if(r == 0) return 0; ret = 0; num = 0; if(readfd0 != CURL_SOCKET_BAD) { if(pfd[num].revents & (POLLRDNORM|POLLIN|POLLERR|POLLHUP)) ret |= CURL_CSELECT_IN; if(pfd[num].revents & (POLLRDBAND|POLLPRI|POLLNVAL)) ret |= CURL_CSELECT_ERR; num++; } if(readfd1 != CURL_SOCKET_BAD) { if(pfd[num].revents & (POLLRDNORM|POLLIN|POLLERR|POLLHUP)) ret |= CURL_CSELECT_IN2; if(pfd[num].revents & (POLLRDBAND|POLLPRI|POLLNVAL)) ret |= CURL_CSELECT_ERR; num++; } if(writefd != CURL_SOCKET_BAD) { if(pfd[num].revents & (POLLWRNORM|POLLOUT)) ret |= CURL_CSELECT_OUT; if(pfd[num].revents & (POLLERR|POLLHUP|POLLNVAL)) ret |= CURL_CSELECT_ERR; } return ret; #else /* HAVE_POLL_FINE */ FD_ZERO(&fds_err); maxfd = (curl_socket_t)-1; FD_ZERO(&fds_read); if(readfd0 != CURL_SOCKET_BAD) { VERIFY_SOCK(readfd0); FD_SET(readfd0, &fds_read); FD_SET(readfd0, &fds_err); maxfd = readfd0; } if(readfd1 != CURL_SOCKET_BAD) { VERIFY_SOCK(readfd1); FD_SET(readfd1, &fds_read); FD_SET(readfd1, &fds_err); if(readfd1 > maxfd) maxfd = readfd1; } FD_ZERO(&fds_write); if(writefd != CURL_SOCKET_BAD) { VERIFY_SOCK(writefd); FD_SET(writefd, &fds_write); FD_SET(writefd, &fds_err); if(writefd > maxfd) maxfd = writefd; } ptimeout = (timeout_ms < 0) ? NULL : &pending_tv; do { if(timeout_ms > 0) { pending_tv.tv_sec = pending_ms / 1000; pending_tv.tv_usec = (pending_ms % 1000) * 1000; } else if(!timeout_ms) { pending_tv.tv_sec = 0; pending_tv.tv_usec = 0; } /* WinSock select() must not be called with an fd_set that contains zero fd flags, or it will return WSAEINVAL. But, it also can't be called with no fd_sets at all! From the documentation: Any two of the parameters, readfds, writefds, or exceptfds, can be given as null. At least one must be non-null, and any non-null descriptor set must contain at least one handle to a socket. We know that we have at least one bit set in at least two fd_sets in this case, but we may have no bits set in either fds_read or fd_write, so check for that and handle it. Luckily, with WinSock, we can _also_ ask how many bits are set on an fd_set. It is unclear why WinSock doesn't just handle this for us instead of calling this an error. Note also that WinSock ignores the first argument, so we don't worry about the fact that maxfd is computed incorrectly with WinSock (since curl_socket_t is unsigned in such cases and thus -1 is the largest value). */ #ifdef USE_WINSOCK r = select((int)maxfd + 1, fds_read.fd_count ? &fds_read : NULL, fds_write.fd_count ? &fds_write : NULL, &fds_err, ptimeout); #else r = select((int)maxfd + 1, &fds_read, &fds_write, &fds_err, ptimeout); #endif if(r != -1) break; error = SOCKERRNO; if(error && ERROR_NOT_EINTR(error)) break; if(timeout_ms > 0) { pending_ms = (int)(timeout_ms - ELAPSED_MS()); if(pending_ms <= 0) { r = 0; /* Simulate a "call timed out" case */ break; } } } while(r == -1); if(r < 0) return -1; if(r == 0) return 0; ret = 0; if(readfd0 != CURL_SOCKET_BAD) { if(FD_ISSET(readfd0, &fds_read)) ret |= CURL_CSELECT_IN; if(FD_ISSET(readfd0, &fds_err)) ret |= CURL_CSELECT_ERR; } if(readfd1 != CURL_SOCKET_BAD) { if(FD_ISSET(readfd1, &fds_read)) ret |= CURL_CSELECT_IN2; if(FD_ISSET(readfd1, &fds_err)) ret |= CURL_CSELECT_ERR; } if(writefd != CURL_SOCKET_BAD) { if(FD_ISSET(writefd, &fds_write)) ret |= CURL_CSELECT_OUT; if(FD_ISSET(writefd, &fds_err)) ret |= CURL_CSELECT_ERR; } return ret; #endif /* HAVE_POLL_FINE */ }