/* ------------------------------------------------------------------------- */ static pstatus_t ipp_wrapper_set_32u( UINT32 val, UINT32* pDst, INT32 len) { /* A little type conversion, then use the signed version. */ INT32 sval = *((INT32*) &val); return ippsSet_32s(sval, (INT32*) pDst, len); }
/* START FUNC DECL */ int find_conns( char *t1, char *flb, char *fub, char *t2, char *f2, char *t3, char *f3 ) /* STOP FUNC DECL */ { int status = 0; char *flb_X = NULL; size_t flb_nX = 0; char *fub_X = NULL; size_t fub_nX = 0; char *nn_flb_X = NULL; size_t nn_flb_nX = 0; char *nn_fub_X = NULL; size_t nn_fub_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *f3_X = NULL; size_t f3_nX = 0; int t1_id = INT_MIN, t2_id = INT_MIN, t3_id = INT_MIN; int flb_id = INT_MIN, fub_id = INT_MIN, f2_id = INT_MIN, f3_id = INT_MIN; int nn_flb_id = INT_MIN, nn_fub_id = INT_MIN, nn_f2_id = INT_MIN, nn_f3_id = INT_MIN; TBL_REC_TYPE t1_rec, t2_rec, t3_rec; FLD_REC_TYPE flb_rec, nn_flb_rec; FLD_REC_TYPE fub_rec, nn_fub_rec; FLD_REC_TYPE f2_rec, nn_f2_rec; FLD_REC_TYPE f3_rec, nn_f3_rec; long long nR3; long long nR1 = INT_MIN, nR2 = INT_MIN; char opfile[MAX_LEN_FILE_NAME+1]; char nn_opfile[MAX_LEN_FILE_NAME+1]; int ddir_id = INT_MAX; /*----------------------------------------------------------------*/ if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( flb == NULL ) || ( *flb == '\0' ) ) { go_BYE(-1); } if ( ( fub == NULL ) || ( *fub == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( t3 == NULL ) || ( *t3 == '\0' ) ) { go_BYE(-1); } if ( ( f3 == NULL ) || ( *f3 == '\0' ) ) { go_BYE(-1); } if ( strcmp(flb, fub) == 0 ) { go_BYE(-1); } if ( strcmp(t1, t3) == 0 ) { go_BYE(-1); } if ( strcmp(t2, t3) == 0 ) { go_BYE(-1); } zero_string(opfile, (MAX_LEN_FILE_NAME+1)); zero_string(nn_opfile, (MAX_LEN_FILE_NAME+1)); /*--------------------------------------------------------*/ status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); chk_range(t1_id, 0, g_n_tbl); nR1 = g_tbls[t1_id].nR; /*--------------------------------------------------------*/ status=is_fld(NULL, t1_id, flb, &flb_id, &flb_rec, &nn_flb_id, &nn_flb_rec); cBYE(status); chk_range(flb_id, 0, g_n_fld); status = get_data(flb_rec, &flb_X, &flb_nX, false); cBYE(status); if ( nn_flb_id >= 0 ) { status = get_data(nn_flb_rec, &nn_flb_X, &nn_flb_nX, false); cBYE(status); } if ( flb_rec.fldtype != I8 ) { go_BYE(-1); } long long *flbI8 = (long long *)flb_X; /*--------------------------------------------------------*/ status=is_fld(NULL, t1_id, fub, &fub_id, &fub_rec, &nn_fub_id, &nn_fub_rec); cBYE(status); chk_range(fub_id, 0, g_n_fld); status = get_data(fub_rec, &fub_X, &fub_nX, false); cBYE(status); if ( nn_fub_id >= 0 ) { status = get_data(nn_fub_rec, &nn_fub_X, &nn_fub_nX, false); cBYE(status); } if ( fub_rec.fldtype != I8 ) { go_BYE(-1); } long long *fubI8 = (long long *)fub_X; /*--------------------------------------------------------*/ status = is_tbl(t2, &t2_id, &t2_rec); cBYE(status); chk_range(t2_id, 0, g_n_tbl); nR2 = g_tbls[t2_id].nR; /*--------------------------------------------------------*/ status = is_fld(NULL, t2_id, f2, &f2_id, &f2_rec, &nn_f2_id, &nn_f2_rec); cBYE(status); chk_range(f2_id, 0, g_n_fld); status = get_data(f2_rec, &f2_X, &f2_nX, false); cBYE(status); if ( nn_f2_id >= 0 ) { go_BYE(-1); } if ( f2_rec.fldtype != I4 ) { go_BYE(-1); } int *f2I4 = (int *)f2_X; /*--------------------------------------------------------*/ status = is_tbl(t3, &t3_id, &t3_rec); cBYE(status); chk_range(t3_id, 0, g_n_tbl); nR3 = g_tbls[t3_id].nR; /*--------------------------------------------------------*/ /* Make space for output if necessary */ int *f3I4 = NULL; status = is_fld(NULL, t3_id, f3, &f3_id, &f3_rec, &nn_f3_id, &nn_f3_rec); cBYE(status); if ( f3_id >= 0 ) { status = get_data(f3_rec, &f3_X, &f3_nX, true); cBYE(status); if ( nn_f3_id >= 0 ) { go_BYE(-1); } f3I4 = (int *)f3_X; } else { status = mk_temp_file(opfile, nR3 * sizeof(int), &ddir_id); cBYE(status); status = q_mmap(ddir_id, opfile, &f3_X, &f3_nX, true); cBYE(status); f3I4 = (int *)f3_X; #ifdef IPP ippsSet_32s(0, f3I4, nR3); // TODO: P3: cilkfor? #else assign_const_I4(f3I4, nR3, 0); #endif status = add_fld(t3_id, f3, ddir_id, opfile, I4, -1, &f3_id, &f3_rec); cBYE(status); } core_find_conns(flbI8, fubI8, nR1, nn_flb_X, nn_fub_X, f2I4, nR2, f3I4, nR3); rs_munmap(f3_X, f3_nX); /*-----------------------------------------------------------*/ BYE: rs_munmap(flb_X, flb_nX); rs_munmap(fub_X, fub_nX); rs_munmap(nn_flb_X, nn_flb_nX); rs_munmap(nn_fub_X, nn_fub_nX); rs_munmap(f2_X, f2_nX); rs_munmap(f3_X, f3_nX); return(status); }
/* START FUNC DECL */ int int_pos_count( FLD_TYPE f1type, char *f1_X, long long nR1, FLD_TYPE f2type, char *f2_X, long long nR2, char *op_X, long long nR3 ) /* START FUNC DECL */ { int status = 0; int *I4f3 = (int *)op_X; /*-------------------------------------------------------- */ /* Set counters to 0. TODO P3: Does this need multi-threading? */ #ifdef IPP ippsSet_32s(0, (int *)op_X, nR3); #else assign_const_I4((int *)op_X, 0, nR3); #endif /* If nR3 is small enough, then the cost of duplicating it is not too high */ int *I4f1 = (int *)f1_X; long long *I8f1 = (long long *)f1_X; int *I4f2 = (int *)f2_X; long long *I8f2 = (long long *)f2_X; switch ( f1type ) { case I4 : switch ( f2type ) { case I4 : status = pos_count_I4_I4(I4f1, nR1, I4f2, nR2, I4f3, nR3); break; case I8 : status = pos_count_I4_I8(I4f1, nR1, I8f2, nR2, I4f3, nR3); break; case I1 : case I2 : case F4: case F8: default : go_BYE(-1); break; } break; case I8 : switch ( f2type ) { case I4 : status = pos_count_I8_I4(I8f1, nR1, I4f2, nR2, I4f3, nR3); break; case I8 : status = pos_count_I8_I8(I8f1, nR1, I8f2, nR2, I4f3, nR3); break; case I1 : case I2 : case F4: case F8: default : go_BYE(-1); break; } break; case I1 : case I2 : case F4: case F8: default : go_BYE(-1); break; } BYE: return(status); }
/* ------------------------------------------------------------------------- */ int test_set32s_func(void) { #if defined(WITH_SSE2) || defined(WITH_IPP) INT32 ALIGN(dest[512]); int off; #endif int failed = 0; char testStr[256]; testStr[0] = '\0'; #ifdef WITH_SSE2 /* Test SSE under various alignments */ if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { int len; for (len=1; len<512-off; ++len) { int i; memset(dest, 0, sizeof(dest)); sse2_set_32s(0xdeadbeef, dest+off, len); for (i=0; i<len; ++i) { if (dest[off+i] != 0xdeadbeef) { printf("set32s-SSE FAIL: off=%d len=%d dest[%d]=0x%08x\n", off, len, i+off, dest[i+off]); failed=1; } } } } } #endif /* i386 */ #ifdef WITH_IPP strcat(testStr, " IPP"); for (off=0; off<16; ++off) { int len; for (len=1; len<512-off; ++len) { int i; memset(dest, 0, sizeof(dest)); ippsSet_32s(0xdeadbeef, dest+off, len); for (i=0; i<len; ++i) { if (dest[off+i] != 0xdeadbeef) { printf("set32s-IPP FAIL: off=%d len=%d dest[%d]=0x%08x\n", off, len, i+off, dest[i+off]); failed=1; } } } } #endif /* WITH_IPP */ if (!failed) printf("All set32s tests passed (%s).\n", testStr); return (failed > 0) ? FAILURE : SUCCESS; }
static inline void memset32s_naive( INT32 val, INT32 *dst, size_t count) { while (count--) *dst++ = val; } /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, TRUE, memset32s_naive(constant, dst, size), #ifdef WITH_SSE2 TRUE, sse2_set_32s(constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif TRUE, ippsSet_32s(constant, dst, size)); int test_set32s_speed(void) { INT32 ALIGN(dst[MAX_TEST_SIZE+1]); set32s_speed_test("set32s", "aligned", NULL, NULL, 0xdeadbeef, dst, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #if 0 /* Not really necessary; should be almost as fast. */ set32s_speed_test("set32s", "unaligned", NULL, NULL, dst+1, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #endif return SUCCESS; }