static int inotify_modify_helper( const char *name, const char *path, const void *dummy) { int fd, rc = 0; char buffer[1] = { 0 }; (void)dummy; if (mk_file(name, path, 4096) < 0) return -1; if ((fd = open(path, O_RDWR)) < 0) { pr_err(stderr, "%s: cannot open file %s: errno=%d (%s)\n", name, path, errno, strerror(errno)); rc = -1; goto remove; } do_modify: if (opt_do_run && (write(fd, buffer, 1) < 0)) { if ((errno == EAGAIN) || (errno == EINTR)) goto do_modify; pr_err(stderr, "%s: cannot write to file %s: errno=%d (%s)\n", name, path, errno, strerror(errno)); rc = -1; } (void)close(fd); remove: (void)rm_file(name, path); return rc; }
// START FUNC DECL int mk_temp_file( size_t filesz, char *dir, char *filename ) // STOP FUNC DECL { int status = 0; extern char cwd[MAX_LEN_DIR_NAME+1]; if ( getcwd(cwd, MAX_LEN_DIR_NAME) == NULL ) { go_BYE(-1); } if ( filesz < 0 ) { go_BYE(-1); } if ( ( dir == NULL ) || ( *dir == '\0' ) ) { go_BYE(-1); } if ( ( filename == NULL ) || ( *filename == '\0' ) ) { go_BYE(-1); } status = get_disk_space(dir, &free_space, "free_space"); cBYE(status); if ( free_space < filesz ) { go_BYE(-1); } // Make empty file with that name status = open_file_in_dir(cwd, dir, filename, filesz); cBYE(status); // Stretch it to appropriate size status = mk_file(cwd, dir, filename, filesz); cBYE(status); BYE: return(status); }
static void inotify_open_file(const char *name, const char *path) { char filepath[PATH_MAX]; mk_filename(filepath, PATH_MAX, path, "inotify_file"); if (mk_file(name, filepath, 4096) < 0) return; inotify_exercise(name, filepath, path, "inotify_file", inotify_open_helper, IN_OPEN, NULL); (void)rm_file(name, filepath); }
static void inotify_delete_file(const char *name, const char *path) { char filepath[PATH_MAX]; mk_filename(filepath, PATH_MAX, path, "inotify_file"); if (mk_file(name, filepath, 4096) < 0) return; inotify_exercise(name, filepath, path, "inotify_file", inotify_delete_helper, IN_DELETE, NULL); /* We remove (again) it just in case the test failed */ (void)rm_file(name, filepath); }
void inotify_attrib_file(const char *name, const char *path) { char filepath[PATH_MAX]; mk_filename(filepath, PATH_MAX, path, "inotify_file"); if (mk_file(name, filepath, 4096) < 0) return; inotify_exercise(name, filepath, path, "inotify_file", inotify_attrib_helper, IN_ATTRIB, NULL); (void)rm_file(name, filepath); }
// START FUNC DECL int g_init_meta_data( char *docroot ) // STOP FUNC DECL { int status = 0; char cwd[MAX_LEN_DIR_NAME+1]; char *gpu_reg_X = NULL; size_t gpu_reg_nX = 0; GPU_REG_TYPE *gpu_reg = NULL; /* hash table for flds */ int n_gpu_reg = 0; /* sizeof above */ if ( ( docroot == NULL ) || ( *docroot == '\0' ) ) { go_BYE(-1); } zero_string(cwd, MAX_LEN_DIR_NAME+1); if ( getcwd(cwd, MAX_LEN_DIR_NAME) == NULL ) { go_BYE(-1); } /*----------------------------------------------------------*/ size_t filesz = G_GPU_REG_SIZE * sizeof(GPU_REG_TYPE); status = mk_file(cwd, docroot, "docroot.gpu", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.gpu", &gpu_reg_X, &gpu_reg_nX, 1); cBYE(status); if ( gpu_reg_nX == 0 ) { go_BYE(-1); } n_gpu_reg = gpu_reg_nX / sizeof(GPU_REG_TYPE); if ( (gpu_reg_nX % sizeof(GPU_REG_TYPE) ) != 0 ) { go_BYE(-1); } gpu_reg = (GPU_REG_TYPE *)gpu_reg_X; status = chdir(cwd); cBYE(status); /*----------------------------------------------------------*/ for ( int i = 0; i < n_gpu_reg; i++ ) { zero_gpu_reg(&(gpu_reg[i])); } /*----------------------------------------------------------*/ BYE: return status ; }
// START FUNC DECL int mk_temp_file( char *opfile, size_t filesz ) // STOP FUNC DECL { int status = 0; char cwd[MAX_LEN_DIR_NAME+1]; if ( g_data_dir == NULL ) { go_BYE(-1); } zero_string(cwd, MAX_LEN_DIR_NAME+1); getcwd(cwd, MAX_LEN_DIR_NAME); if ( strlen(cwd) == 0 ) { go_BYE(-1); } status = open_temp_file(opfile, filesz); cBYE(status); status = chdir(g_data_dir); cBYE(status); status = mk_file(opfile, filesz); cBYE(status); status = chdir(cwd); cBYE(status); BYE: return(status); }
// START FUNC DECL int init_meta_data( char *docroot, char *dsk_data_dir, char *ram_data_dir ) // STOP FUNC DECL { int status = 0; size_t filesz; if ( ( docroot == NULL ) || ( *docroot == '\0' ) ) { go_BYE(-1); } if ( strlen(docroot) > MAX_LEN_DIR_NAME ) { go_BYE(-1); } /*----------------------------------------------------------*/ filesz = G_HT_TBL_SIZE * sizeof(META_KEY_VAL_TYPE); status = mk_file(g_cwd, docroot, "docroot.ht_tbl", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.ht_tbl", &g_ht_tbl_X, &g_ht_tbl_nX, 1); cBYE(status); status = chdir(g_cwd); cBYE(status); if ( g_ht_tbl_nX == 0 ) { go_BYE(-1); } g_n_ht_tbl = g_ht_tbl_nX / sizeof(META_KEY_VAL_TYPE); if ( (g_ht_tbl_nX % sizeof(META_KEY_VAL_TYPE) ) != 0 ) { go_BYE(-1); } g_ht_tbl = (META_KEY_VAL_TYPE *)g_ht_tbl_X; /*----------------------------------------------------------*/ filesz = G_HT_FLD_SIZE * sizeof(META_KEY_VAL_TYPE); status = mk_file(g_cwd, docroot, "docroot.ht_fld", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.ht_fld", &g_ht_fld_X, &g_ht_fld_nX, 1); cBYE(status); status = chdir(g_cwd); cBYE(status); if ( g_ht_fld_nX == 0 ) { go_BYE(-1); } g_n_ht_fld = g_ht_fld_nX / sizeof(META_KEY_VAL_TYPE); if ( (g_ht_fld_nX % sizeof(META_KEY_VAL_TYPE) ) != 0 ) { go_BYE(-1); } g_ht_fld = (META_KEY_VAL_TYPE *)g_ht_fld_X; /*----------------------------------------------------------*/ filesz = MAX_NUM_TBLS * sizeof(TBL_REC_TYPE); status = mk_file(g_cwd, docroot, "docroot.tbls", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.tbls", &g_tbl_X, &g_tbl_nX, 1); cBYE(status); status = chdir(g_cwd); cBYE(status); if ( g_tbl_nX == 0 ) { go_BYE(-1); } g_n_tbl = g_tbl_nX / sizeof(TBL_REC_TYPE); if ( (g_tbl_nX % sizeof(TBL_REC_TYPE) ) != 0 ) { go_BYE(-1); } g_tbls = (TBL_REC_TYPE *)g_tbl_X; /*----------------------------------------------------------*/ filesz = MAX_NUM_FLDS * sizeof(FLD_REC_TYPE); status = mk_file(g_cwd, docroot, "docroot.flds", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.flds", &g_fld_X, &g_fld_nX, 1); cBYE(status); status = chdir(g_cwd); cBYE(status); if ( g_fld_nX == 0 ) { go_BYE(-1); } g_n_fld = g_fld_nX / sizeof(FLD_REC_TYPE); if ( (g_fld_nX % sizeof(FLD_REC_TYPE) ) != 0 ) { go_BYE(-1); } g_flds = (FLD_REC_TYPE *)g_fld_X; /*----------------------------------------------------------*/ filesz = MAX_NUM_DDIRS * sizeof(DDIR_REC_TYPE); status = mk_file(g_cwd, docroot, "docroot.ddirs", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.ddirs", &g_ddir_X, &g_ddir_nX, 1); cBYE(status); status = chdir(g_cwd); cBYE(status); if ( g_ddir_nX == 0 ) { go_BYE(-1); } g_n_ddir = g_ddir_nX / sizeof(DDIR_REC_TYPE); if ( (g_ddir_nX % sizeof(DDIR_REC_TYPE) ) != 0 ) { go_BYE(-1); } g_ddirs = (DDIR_REC_TYPE *)g_ddir_X; filesz = G_FLD_INFO_SIZE * sizeof(FLD_INFO_TYPE); status = mk_file(g_cwd, docroot, "docroot.fld_info", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.fld_info", &g_fld_info_X, &g_fld_info_nX, 1); status = chdir(g_cwd); cBYE(status); cBYE(status); if ( g_fld_info_nX == 0 ) { go_BYE(-1); } g_n_fld_info = g_fld_info_nX / sizeof(FLD_INFO_TYPE); if ( (g_fld_info_nX % sizeof(FLD_INFO_TYPE) ) != 0 ) { go_BYE(-1); } g_fld_info = (FLD_INFO_TYPE *)g_fld_info_X; /*----------------------------------------------------------*/ filesz = 2 * sizeof(int); status = mk_file(g_cwd, docroot, "docroot.aux", filesz); cBYE(status); status = chdir(docroot); cBYE(status); status = rs_mmap("docroot.aux", &g_aux_X, &g_aux_nX, 1); cBYE(status); status = chdir(g_cwd); cBYE(status); /*----------------------------------------------------------*/ if ( g_ddir_nX == 0 ) { go_BYE(-1); } for ( int i = 0; i < g_n_ht_tbl; i++ ) { g_ht_tbl[i].key = 0; g_ht_tbl[i].val = -1; } for ( int i = 0; i < g_n_tbl; i++ ) { zero_tbl_rec(&(g_tbls[i])); } for ( int i = 0; i < g_n_ht_fld; i++ ) { g_ht_fld[i].key = 0; g_ht_fld[i].val = -1; } for ( int i = 0; i < g_n_fld; i++ ) { zero_fld_rec(&(g_flds[i])); } for ( int i = 0; i < g_n_ddir; i++ ) { zero_ddir_rec(&(g_ddirs[i])); } for ( int i = 0; i < g_n_fld_info; i++ ) { zero_fld_info_rec(&(g_fld_info[i])); } int *I4ptr = (int *)g_aux_X; *I4ptr++ = 1; // Indicating that 1 is the first file number to be used *I4ptr++ = 0; // Indicating that we are NOT within compound expression status = init_ddirs(dsk_data_dir, ram_data_dir); cBYE(status); /*----------------------------------------------------------*/ BYE: chdir(g_cwd); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int parsort1( char *tbl, char *f1, char *f2, char *up_or_down /* not used right now */ ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *op_X = NULL; size_t op_nX = 0; char *cnt_X = NULL; size_t cnt_nX = 0; char *t2f2_X = NULL; size_t t2f2_nX = 0; FLD_TYPE *f1_meta = NULL; FLD_TYPE *f2_meta = NULL; FLD_TYPE *t2f2_meta = NULL; FLD_TYPE *cnt_meta = NULL; long long nR, nR2; int tbl_id = INT_MIN, f1_id = INT_MIN, f2_id = INT_MIN, cnt_id = INT_MIN; int t2f2_id = INT_MIN; char str_meta_data[1024]; FILE *ofp = NULL; char *opfile = NULL; FILE *tfp = NULL; char *tempfile = NULL; char str_rslt[32]; zero_string(str_rslt, 32); char t2[MAX_LEN_TBL_NAME]; int itemp; int *xxx = NULL, *f1lb = NULL, *f1ub = NULL; long long *count = NULL, *chk_count = NULL; int **offsets = NULL, **bak_offsets = NULL; int *inptr = NULL; // For multi-threading int rc; // result code for thread create pthread_t threads[MAX_NUM_THREADS]; pthread_attr_t attr; void *thread_status; // START: For timing struct timeval Tps; struct timezone Tpf; void *Tzp = NULL; long long t_before_sec = 0, t_before_usec = 0, t_before = 0; long long t_after_sec, t_after_usec, t_after; long long t_delta_usec; // STOP : For timing //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } zero_string(str_meta_data, 1024); /* t2 isa temporary table */ zero_string(t2, MAX_LEN_TBL_NAME); status = qd_uq_str(t2, MAX_LEN_TBL_NAME); strcpy(t2, "t2"); // TODO DELETE THIS g_offsets = NULL; g_count = NULL; //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); nR = g_tbl[tbl_id].nR; status = is_fld(NULL, tbl_id, f1, &f1_id); cBYE(status); chk_range(f1_id, 0, g_n_fld); f1_meta = &(g_fld[f1_id]); status = rs_mmap(f1_meta->filename, &f1_X, &f1_nX, 0); cBYE(status); // Not implemented for following cases if ( g_fld[f1_id].nn_fld_id >= 0 ) { go_BYE(-1); } if ( strcmp(f1_meta->fldtype, "int") != 0 ) { go_BYE(-1); } if ( nR <= 1048576 ) { go_BYE(-1); } //--------------------------------------------- status = gettimeofday(&Tps, &Tpf); cBYE(status); t_before_sec = (long long)Tps.tv_sec; t_before_usec = (long long)Tps.tv_usec; t_before = t_before_sec * 1000000 + t_before_usec; int reduction_factor = (int)(sqrt((double)nR)); sprintf(str_rslt, "%d", reduction_factor); status = subsample(tbl, f1, str_rslt, t2, "f2"); cBYE(status); status = gettimeofday(&Tps, &Tpf); cBYE(status); t_after_sec = (long long)Tps.tv_sec; t_after_usec = (long long)Tps.tv_usec; t_after = t_after_sec * 1000000 + t_after_usec; fprintf(stderr, "TIME0 = %lld \n", t_after - t_before); t_before = t_after; // Must have sufficient diversity of values status = f1opf2(t2, "f2", "op=shift:val=-1", "nextf2"); cBYE(status); status = drop_nn_fld(t2, "nextf2"); cBYE(status); status = f1f2opf3(t2, "f2", "nextf2", "==", "x"); cBYE(status); status = f_to_s(t2, "x", "sum", str_rslt); char *endptr; long long lltemp = strtoll(str_rslt, &endptr, 10); if ( lltemp != 0 ) { go_BYE(-1); } //------------------------------------------------- // Get range of values of f1 status = f_to_s(tbl, f1, "max", str_rslt); int f1max = strtoll(str_rslt, &endptr, 10); status = f_to_s(tbl, f1, "min", str_rslt); int f1min = strtoll(str_rslt, &endptr, 10); //------------------------------------------------- // Now we sort the values that we sampled status = fop(t2, "f2", "sortA"); cBYE(status); // status = pr_fld(t2, "f2", "", stdout); status = get_nR(t2, &nR2); // Now each thread selects a range to work on int nT; for ( int i = 0; i < MAX_NUM_THREADS; i++ ) { g_thread_id[i] = i; } status = get_num_threads(&nT); cBYE(status); //-------------------------------------------- #define MIN_ROWS_FOR_PARSORT1 1048576 if ( nR <= MIN_ROWS_FOR_PARSORT1 ) { nT = 1; } /* Don't create more threads than you can use */ if ( nT > nR ) { nT = nR; } //-------------------------------------------- double block_size = (double)nR2 / (double)nT; status = is_fld(t2, -1, "f2", &t2f2_id); cBYE(status); chk_range(t2f2_id, 0, g_n_fld); t2f2_meta = &(g_fld[t2f2_id]); status = rs_mmap(t2f2_meta->filename, &t2f2_X, &t2f2_nX, 0); cBYE(status); int *iptr = (int *)t2f2_X; xxx = malloc(nT * sizeof(int)); return_if_malloc_failed(xxx); f1lb = malloc(nT * sizeof(int)); return_if_malloc_failed(f1lb); f1ub = malloc(nT * sizeof(int)); return_if_malloc_failed(f1ub); /* FOR OLD_WAY count = malloc(nT * sizeof(long long)); return_if_malloc_failed(count); */ chk_count = malloc(nT * sizeof(long long)); return_if_malloc_failed(chk_count); g_count = malloc(nT * sizeof(long long)); return_if_malloc_failed(g_count); for ( int i = 0; i < nT; i++ ) { // FOR OLD_WAY count[i]= 0; chk_count[i]= 0; int j = i+1; long long idx = j * block_size; if ( idx >= nR2 ) { idx = nR2 -1 ; } int y = iptr[idx]; xxx[i] = y; // fprintf(stdout,"idx = %lld: j = %d: y = %d \n", idx, j, y); } for ( int i = 0; i < nT; i++ ) { if ( ( i == 0 ) && ( i == (nT - 1 ) ) ) { f1lb[i] = f1min; f1ub[i] = f1max; } else if ( i == 0 ) { f1lb[i] = f1min; f1ub[i] = xxx[i]; } else if ( i == (nT -1 ) ) { f1lb[i] = xxx[i-1] + 1; f1ub[i] = f1max; } else { f1lb[i] = xxx[i-1] + 1; f1ub[i] = xxx[i]; } } // STOP: Each thread has now a range to work on // Create a temporary table t3 to store ranges char t3[MAX_LEN_TBL_NAME]; int t3_id; zero_string(t3, MAX_LEN_TBL_NAME); status = qd_uq_str(t3, MAX_LEN_TBL_NAME); strcpy(t3, "t3"); // TODO DELETE THIS sprintf(str_rslt, "%d", nT); status = add_tbl(t3, str_rslt, &t3_id); // Add lower bound to t3 status = open_temp_file(&tfp, &tempfile, -1); cBYE(status); fclose_if_non_null(tfp); tfp = fopen(tempfile, "wb"); return_if_fopen_failed(tfp, tempfile, "wb"); fwrite(f1lb, sizeof(int), nT, tfp); fclose_if_non_null(tfp); sprintf(str_meta_data, "fldtype=%s:n_sizeof=%d:filename=%s", f1_meta->fldtype, f1_meta->n_sizeof, tempfile); status = add_fld(t3, "lb", str_meta_data, &itemp); cBYE(status); free_if_non_null(tempfile); // Add upper bound to t3 status = open_temp_file(&tfp, &tempfile, -1); cBYE(status); fclose_if_non_null(tfp); tfp = fopen(tempfile, "wb"); return_if_fopen_failed(tfp, tempfile, "wb"); fwrite(f1ub, sizeof(int), nT, tfp); fclose_if_non_null(tfp); sprintf(str_meta_data, "fldtype=%s:n_sizeof=%d:filename=%s", f1_meta->fldtype, f1_meta->n_sizeof, tempfile); status = add_fld(t3, "ub", str_meta_data, &itemp); cBYE(status); free_if_non_null(tempfile); #undef OLD_WAY #ifdef OLD_WAY // Now we count how much there is in each range inptr = (int *)f1_X; for ( long long i = 0; i < nR; i++ ) { int ival = *inptr++; int range_idx = INT_MIN; // TODO: Improve sequential search for ( int j = 0; j < nT; j++ ) { if ( ival >= f1lb[j] && ( ival <= f1ub[j] ) ) { range_idx = j; break; } } count[range_idx]++; } /* for ( int i = 0; i < nT; i++ ) { fprintf(stdout,"%d: (%d, %d) = %lld \n", i, f1lb[i], f1ub[i], count[i]); } */ #else status = num_in_range(tbl, f1, t3, "lb", "ub", "cnt"); cBYE(status); // Get a pointer to the count field status = is_tbl(t3, &t3_id); chk_range(t3_id, 0, g_n_tbl); status = is_fld(NULL, t3_id, "cnt", &cnt_id); chk_range(cnt_id, 0, g_n_fld); cnt_meta = &(g_fld[cnt_id]); status = rs_mmap(cnt_meta->filename, &cnt_X, &cnt_nX, 0); cBYE(status); count = (long long *)cnt_X; #endif status = gettimeofday(&Tps, &Tpf); cBYE(status); t_after_sec = (long long)Tps.tv_sec; t_after_usec = (long long)Tps.tv_usec; t_after = t_after_sec * 1000000 + t_after_usec; fprintf(stderr, "TIME1 = %lld \n", t_after - t_before); t_before = t_after; bak_offsets = malloc(nT * sizeof(int *)); return_if_malloc_failed(bak_offsets); g_offsets = malloc(nT * sizeof(int *)); return_if_malloc_failed(g_offsets); #ifdef OLD_WAY // Make space for output long long filesz = nR * f1_meta->n_sizeof; status = open_temp_file(&ofp, &opfile, filesz); cBYE(status); status = mk_file(opfile, filesz); cBYE(status); status = rs_mmap(opfile, &op_X, &op_nX, 1); cBYE(status); offsets = malloc(nT * sizeof(int *)); return_if_malloc_failed(offsets); long long cum_count = 0; for ( int i = 0; i < nT; i++ ) { bak_offsets[i] = offsets[i] = (int *)op_X; if ( i > 0 ) { cum_count += count[i-1]; offsets[i] += cum_count; bak_offsets[i] = offsets[i]; } } inptr = (int *)f1_X; // Now we place each item into its thread bucket for ( long long i = 0; i < nR; i++ ) { int ival = *inptr++; int range_idx = INT_MIN; // TODO: Improve sequential search for ( int j = 0; j < nT; j++ ) { if ( ival >= f1lb[j] && ( ival <= f1ub[j] ) ) { range_idx = j; break; } } int *xptr = offsets[range_idx]; *xptr = ival; offsets[range_idx]++; chk_count[range_idx]++; if ( chk_count[range_idx] > count[range_idx] ) { go_BYE(-1); } } cum_count = 0; for ( int i = 0; i < nT-1; i++ ) { if ( offsets[i] != bak_offsets[i+1] ) { go_BYE(-1); } } #else status = mv_range(tbl, f1, f2, t3, "lb", "ub", "cnt"); cBYE(status); status = is_fld(NULL, tbl_id, f2, &f2_id); chk_range(f2_id, 0, g_n_fld); f2_meta = &(g_fld[f2_id]); status = rs_mmap(f2_meta->filename, &op_X, &op_nX, 1); cBYE(status); #endif long long cum_count = 0; for ( int i = 0; i < nT; i++ ) { bak_offsets[i] = (int *)op_X; if ( i > 0 ) { cum_count += count[i-1]; bak_offsets[i] += cum_count; } } status = gettimeofday(&Tps, &Tpf); cBYE(status); t_after_sec = (long long)Tps.tv_sec; t_after_usec = (long long)Tps.tv_usec; t_after = t_after_sec * 1000000 + t_after_usec; fprintf(stderr, "TIME2 = %lld \n", t_after - t_before); t_before = t_after; // Set up global variables g_nT = nT; for ( int i = 0; i < nT; i++ ) { g_offsets[i] = bak_offsets[i]; g_count[i] = count[i]; } if ( g_nT == 1 ) { core_parsort1(&(g_thread_id[0])); } else { pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for ( int t = 0; t < g_nT; t++ ) { rc = pthread_create(&threads[t], NULL, core_parsort1, &(g_thread_id[t])); if ( rc ) { go_BYE(-1); } } /* Free attribute and wait for the other threads */ pthread_attr_destroy(&attr); for ( int t = 0; t < g_nT; t++ ) { rc = pthread_join(threads[t], &thread_status); if ( rc ) { go_BYE(-1); } } } /* SEQUENTIAL CODE for ( int i = 0; i < nT; i++ ) { qsort_asc_int(bak_offsets[i], count[i], sizeof(int), NULL); } */ status = gettimeofday(&Tps, &Tpf); cBYE(status); t_after_sec = (long long)Tps.tv_sec; t_after_usec = (long long)Tps.tv_usec; t_after = t_after_sec * 1000000 + t_after_usec; fprintf(stderr, "TIME3 = %lld \n", t_after - t_before); // Indicate the dst_fld is sorted ascending status = set_fld_info(tbl, f2, "sort=1"); rs_munmap(op_X, op_nX); status = del_tbl(t2, -1); cBYE(status); status = del_tbl(t3, -1); cBYE(status); BYE: rs_munmap(op_X, op_nX); rs_munmap(cnt_X, cnt_nX); free_if_non_null(xxx); free_if_non_null(f1lb); free_if_non_null(f1ub); // Do not delete unless using OLD_WAY free_if_non_null(count); free_if_non_null(g_count); free_if_non_null(g_offsets); free_if_non_null(offsets); free_if_non_null(bak_offsets); free_if_non_null(chk_count); fclose_if_non_null(ofp); g_write_to_temp_dir = false; rs_munmap(f1_X, f1_nX); rs_munmap(op_X, op_nX); free_if_non_null(opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int count_vals( char *src_tbl, char *src_fld, char *in_cnt_fld, char *dst_tbl, char *dst_fld, char *out_cnt_fld ) // STOP FUNC DECL { int status = 0; char *src_fld_X = NULL; size_t src_fld_nX = 0; char *in_cnt_fld_X = NULL; size_t in_cnt_fld_nX = 0; FLD_TYPE *src_fld_meta = NULL, *in_cnt_fld_meta = NULL; FLD_TYPE *cnt_fld_meta = NULL, *nn_src_fld_meta = NULL; long long src_nR; int src_tbl_id = INT_MIN, dst_tbl_id = INT_MIN; int src_fld_id = INT_MIN, nn_src_fld_id = INT_MIN; int dst_fld_id = INT_MIN; int cnt_fld_id = INT_MIN, nn_cnt_fld_id = INT_MIN; long long dst_nR, n_out; char buffer[32]; char str_meta_data[256]; char *nn_src_fld_X = NULL; size_t nn_src_fld_nX = 0; char *out_X = NULL; char *cnt_X = NULL; char *opfile = NULL, *tmp_opfile = NULL; FILE *ofp = 0; char *cnt_opfile = NULL, *tmp_cnt_opfile = NULL; FILE *cnt_ofp = 0; size_t n_out_X = 0, n_cnt_X = 0; unsigned int n_sizeof = INT_MAX; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( ( dst_fld == NULL ) || ( *dst_fld == '\0' ) ) { go_BYE(-1); } if ( strcmp(src_tbl, dst_tbl) == 0 ) { go_BYE(-1); } zero_string(str_meta_data, 256); zero_string(buffer, 32); //-------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = g_tbl[src_tbl_id].nR; //-------------------------------------------------------- status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); src_fld_meta = &(g_fld[src_fld_id]); status = rs_mmap(src_fld_meta->filename, &src_fld_X, &src_fld_nX, 0); cBYE(status); nn_src_fld_id = src_fld_meta->nn_fld_id; if ( nn_src_fld_id >= 0 ) { nn_src_fld_meta = &(g_fld[nn_src_fld_id]); status = rs_mmap(nn_src_fld_meta->filename, &nn_src_fld_X, &nn_src_fld_nX, 0); cBYE(status); } // Get the count field if specified. It cannot have null values if ( *in_cnt_fld != '\0' ) { status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); src_fld_meta = &(g_fld[src_fld_id]); status = rs_mmap(src_fld_meta->filename, &src_fld_X, &src_fld_nX, 0); cBYE(status); nn_cnt_fld_id = cnt_fld_meta->nn_fld_id; chk_range(nn_cnt_fld_id, 0, g_n_fld); } /* Make sure src_fld field is sorted ascending */ zero_string(buffer, 32); status = f_to_s(src_tbl, src_fld, "is_sorted", buffer); cBYE(status); if ( strcmp(buffer, "ascending") != 0 ) { fprintf(stderr, "Field [%s] in Table [%s] not sorted ascending\n", src_fld, src_tbl); go_BYE(-1); } //-------------------------------------------------------- // Create 2 temporary files to store the results. We allocate space // differently based on field types g_write_to_temp_dir = true; status = open_temp_file(&ofp, &tmp_opfile, 0); cBYE(status); fclose_if_non_null(ofp); status = open_temp_file(&cnt_ofp, &tmp_cnt_opfile, 0); cBYE(status); fclose_if_non_null(cnt_ofp); //------------------------------------------------------ n_out = src_nR * sizeof(int); // upper bound if ( strcmp(src_fld_meta->fldtype, "int") == 0 ) { status = mk_file(tmp_cnt_opfile, sizeof(int) * src_nR); cBYE(status); rs_mmap(tmp_cnt_opfile, &cnt_X, &n_cnt_X, 1); cBYE(status); status = mk_file(tmp_opfile, sizeof(int) * src_nR); cBYE(status); rs_mmap(tmp_opfile, &out_X, &n_out_X, 1); cBYE(status); if ( nn_src_fld_X == NULL ) { if ( *in_cnt_fld == '\0' ) { uniq_alldef_int((int *)src_fld_X, src_nR, (int *)out_X, &dst_nR, (int *)cnt_X); } else { uniq_cnt_alldef_i_i((int *)src_fld_X, (int *)in_cnt_fld_X, src_nR, (int *)out_X, &dst_nR, (int *)cnt_X); } } else { uniq_int((int *)src_fld_X, src_nR, nn_src_fld_X, (int *)out_X, &dst_nR, (int *)cnt_X); } } else if ( strcmp(src_fld_meta->fldtype, "long long") == 0 ) { status = mk_file(tmp_opfile, sizeof(long long) * src_nR); cBYE(status); rs_mmap(tmp_opfile, &out_X, &n_out_X, 1); cBYE(status); if ( nn_src_fld_X == NULL ) { if ( *in_cnt_fld == '\0' ) { status = mk_file(tmp_cnt_opfile, sizeof(long long) * src_nR); cBYE(status); rs_mmap(tmp_cnt_opfile, &cnt_X, &n_cnt_X, 1); cBYE(status); uniq_alldef_longlong((long long *)src_fld_X, src_nR, (long long *)out_X, &dst_nR, (long long *)cnt_X); } else { if ( strcmp(in_cnt_fld_meta->fldtype, "long long") == 0 ) { status = mk_file(tmp_cnt_opfile, sizeof(long long) * src_nR); cBYE(status); rs_mmap(tmp_cnt_opfile, &cnt_X, &n_cnt_X, 1); cBYE(status); uniq_cnt_alldef_ll_ll((long long *)src_fld_X, (long long *)in_cnt_fld_X, src_nR, (long long *)out_X, &dst_nR, (long long *)cnt_X); } else if ( strcmp(in_cnt_fld_meta->fldtype, "double") == 0 ) { status = mk_file(tmp_cnt_opfile, sizeof(double) * src_nR); cBYE(status); rs_mmap(tmp_cnt_opfile, &cnt_X, &n_cnt_X, 1); cBYE(status); uniq_cnt_alldef_ll_dbl((long long *)src_fld_X, (double *)in_cnt_fld_X, src_nR, (long long *)out_X, &dst_nR, (double *)cnt_X); } } } else { if ( *in_cnt_fld == '\0' ) { status = mk_file(tmp_cnt_opfile, sizeof(long long) * src_nR); cBYE(status); rs_mmap(tmp_cnt_opfile, &cnt_X, &n_cnt_X, 1); cBYE(status); uniq_longlong((long long *)src_fld_X, src_nR, nn_src_fld_X, (long long *)out_X, &dst_nR, (long long *)cnt_X); } else { fprintf(stderr, "NOT IMPLEMENTED\n"); go_BYE(-1); } } } else { go_BYE(-1); } if ( dst_nR == 0 ) { goto BYE; } //----------------------------------------------------------- status = is_tbl(dst_tbl, &dst_tbl_id); cBYE(status); if ( dst_tbl_id >= 0 ) { status = del_tbl(NULL, dst_tbl_id); } sprintf(buffer, "%lld", dst_nR); status = add_tbl(dst_tbl, buffer, &dst_tbl_id); cBYE(status); //----------------------------------------------------------- // Now copy the temporary files over to real files status = open_temp_file(&ofp, &opfile, 0); cBYE(status); if ( strcmp(src_fld_meta->fldtype, "int") == 0 ) { fwrite(out_X, sizeof(int), dst_nR, ofp); n_sizeof = sizeof(int); } else if ( strcmp(src_fld_meta->fldtype, "long long") == 0 ) { fwrite(out_X, sizeof(long long), dst_nR, ofp); n_sizeof = sizeof(long long); } fclose_if_non_null(ofp); // Add output field to meta data sprintf(str_meta_data, "filename=%s:fldtype=%s:n_sizeof=%u", opfile, src_fld_meta->fldtype, n_sizeof); status = add_fld(dst_tbl, dst_fld, str_meta_data, &dst_fld_id); cBYE(status); // Since src_fld is sorted ascending, so is dst_fld g_fld[dst_fld_id].sorttype = 1; // Now copy the temporary count file to the real one status = open_temp_file(&cnt_ofp, &cnt_opfile, 0); cBYE(status); if ( *in_cnt_fld == '\0' ) { if ( strcmp(src_fld_meta->fldtype, "int") == 0 ) { fwrite(cnt_X, sizeof(int), dst_nR, cnt_ofp); sprintf(str_meta_data, "filename=%s:fldtype=int:n_sizeof=%lu", cnt_opfile, sizeof(int)); } else if ( strcmp(src_fld_meta->fldtype, "long long") == 0 ) { fwrite(cnt_X, sizeof(long long), dst_nR, cnt_ofp); sprintf(str_meta_data, "filename=%s:fldtype=long long:n_sizeof=%lu", cnt_opfile, sizeof(long long)); } else { go_BYE(-1); } } else { if ( strcmp(in_cnt_fld_meta->fldtype, "int") == 0 ) { fwrite(cnt_X, sizeof(int), dst_nR, cnt_ofp); sprintf(str_meta_data, "filename=%s:fldtype=int:n_sizeof=%lu", cnt_opfile, sizeof(int)); } else if ( strcmp(in_cnt_fld_meta->fldtype, "long long") == 0 ) { fwrite(cnt_X, sizeof(long long), dst_nR, cnt_ofp); sprintf(str_meta_data, "filename=%s:fldtype=long long:n_sizeof=%lu", cnt_opfile, sizeof(long long)); } else if ( strcmp(in_cnt_fld_meta->fldtype, "double") == 0 ) { fwrite(cnt_X, sizeof(double), dst_nR, cnt_ofp); sprintf(str_meta_data, "filename=%s:fldtype=double:n_sizeof=%lu", cnt_opfile, sizeof(double)); } else { go_BYE(-1); } } fclose_if_non_null(cnt_ofp); // Add count field to meta data status = add_fld(dst_tbl, out_cnt_fld, str_meta_data, &cnt_fld_id); cBYE(status); //----------------------------------------------------------- BYE: g_write_to_temp_dir = false; rs_munmap(src_fld_X, src_fld_nX); free_if_non_null(opfile); rs_munmap(src_fld_X, src_fld_nX); rs_munmap(in_cnt_fld_X, in_cnt_fld_nX); rs_munmap(cnt_X, n_cnt_X); rs_munmap(out_X, n_out_X); rs_munmap(nn_src_fld_X, nn_src_fld_nX); unlink(tmp_opfile); unlink(tmp_cnt_opfile); free_if_non_null(tmp_opfile); free_if_non_null(tmp_cnt_opfile); free_if_non_null(cnt_opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int crossprod( char *docroot, sqlite3 *in_db, char *t1, char *f1, char *t2, char *f2, char *t3 ) // STOP FUNC DECL { int status = 0; sqlite3 *db = NULL; char *Y1 = NULL; size_t nY1 = 0; char *Y2 = NULL; size_t nY2 = 0; char *f1_X = NULL; size_t f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *nn_f1_X = NULL; size_t nn_f1_nX = 0; char *nn_f2_X = NULL; size_t nn_f2_nX = 0; int t3_id, itemp; bool b_is_tbl; FLD_META_TYPE f1_meta, f2_meta; FLD_META_TYPE nn_f1_meta, nn_f2_meta; long long nR1, nR2, nn_nR1, nn_nR2, nR3; char str_meta_data[1024]; char *t3f1_opfile = NULL, *t3f2_opfile = NULL; FILE *ofp = NULL; char buffer[32]; //---------------------------------------------------------------- zero_string(str_meta_data, 1024); zero_string(buffer, 32); zero_fld_meta(&f1_meta); zero_fld_meta(&f2_meta); zero_fld_meta(&nn_f1_meta); zero_fld_meta(&nn_f2_meta); status = open_db_if_needed(docroot, in_db, &db); cBYE(status); //---------------------------------------------------------------- status = get_data(docroot, db, t1, f1, &nR1, &f1_meta, &f1_X, &f1_nX, &nn_f1_meta, &nn_f1_X, &nn_f1_nX, &nn_nR1); cBYE(status); status = get_data(docroot, db, t2, f2, &nR2, &f2_meta, &f2_X, &f2_nX, &nn_f2_meta, &nn_f2_X, &nn_f2_nX, &nn_nR2); cBYE(status); nR3 = nn_nR1 * nn_nR2; if ( nR3 == 0 ) { fprintf(stderr, "No data to create t3 \n"); goto BYE; } status = open_temp_file(&ofp, &t3f1_opfile); cBYE(status); fclose_if_non_null(ofp); status = mk_file(t3f1_opfile, nR3 * f1_meta.n_sizeof); cBYE(status); status = rs_mmap(t3f1_opfile, &Y1, &nY1, 1); cBYE(status); status = open_temp_file(&ofp, &t3f2_opfile); cBYE(status); fclose_if_non_null(ofp); status = mk_file(t3f2_opfile, nR3 * f2_meta.n_sizeof); cBYE(status); status = rs_mmap(t3f2_opfile, &Y2, &nY2, 1); cBYE(status); //---------------------------------------------------------------- if ( ( strcmp(f1_meta.fldtype, "int") == 0 ) && ( strcmp(f2_meta.fldtype, "int") == 0 ) ) { status = core_crossprod_I_I(f1_X, nn_f1_X, f2_X, nn_f2_X, nR1, nR2, nR3, Y1, Y2); } else if ( ( strcmp(f1_meta.fldtype, "int") == 0 ) && ( strcmp(f2_meta.fldtype, "long long") == 0 ) ) { status = core_crossprod_I_L(f1_X, nn_f1_X, f2_X, nn_f2_X, nR1, nR2, nR3, Y1, Y2); } else if ( ( strcmp(f1_meta.fldtype, "long long") == 0 ) && ( strcmp(f2_meta.fldtype, "int") == 0 ) ) { status = core_crossprod_L_I(f1_X, nn_f1_X, f2_X, nn_f2_X, nR1, nR2, nR3, Y1, Y2); } else if ( ( strcmp(f1_meta.fldtype, "long long") == 0 ) && ( strcmp(f2_meta.fldtype, "long long") == 0 ) ) { status = core_crossprod_L_L(f1_X, nn_f1_X, f2_X, nn_f2_X, nR1, nR2, nR3, Y1, Y2); } else { go_BYE(-1); } cBYE(status); //---------------------------------------------------------------- // Add output fields to t3 meta data status = is_tbl(docroot, db, t3, &b_is_tbl, &t3_id); cBYE(status); if ( b_is_tbl ) { status = del_tbl(docroot, db, t3); cBYE(status); } sprintf(buffer, "%lld", nR3); status = add_tbl(docroot, db, t3, buffer, &itemp); cBYE(status); sprintf(str_meta_data, "fldtype=int:n_sizeof=%u:filename=%s", f1_meta.n_sizeof, t3f1_opfile); status = add_fld(docroot, db, t3, f1, str_meta_data); cBYE(status); zero_string(str_meta_data, 1024); sprintf(str_meta_data, "fldtype=int:n_sizeof=%u:filename=%s", f2_meta.n_sizeof, t3f2_opfile); status = add_fld(docroot, db, t3, f2, str_meta_data); cBYE(status); BYE: fclose_if_non_null(ofp); if ( in_db == NULL ) { sqlite3_close(db); } rs_munmap(f1_X, f1_nX); rs_munmap(nn_f1_X, nn_f1_nX); rs_munmap(f2_X, f2_nX); rs_munmap(nn_f2_X, nn_f2_nX); rs_munmap(Y1, nY1); rs_munmap(Y2, nY2); free_if_non_null(t3f1_opfile); free_if_non_null(t3f2_opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int f1f2f3f4opf5( char *tbl, char *f1, char *f2, char *f3, char *f4, char *str_op_spec, char *f5 ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *nn_f1_X = NULL; size_t nn_f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *nn_f2_X = NULL; size_t nn_f2_nX = 0; char *f3_X = NULL; size_t f3_nX = 0; char *nn_f3_X = NULL; size_t nn_f3_nX = 0; char *f4_X = NULL; size_t f4_nX = 0; char *nn_f4_X = NULL; size_t nn_f4_nX = 0; char *out_X = NULL; size_t out_nX = 0; char *nn_out_X = NULL; size_t nn_out_nX = 0; char *opfile = NULL, *nn_opfile = NULL; FILE *ofp = NULL; FLD_TYPE *f1_meta = NULL, *nn_f1_meta = NULL; FLD_TYPE *f2_meta = NULL, *nn_f2_meta = NULL; FLD_TYPE *f3_meta = NULL, *nn_f3_meta = NULL; FLD_TYPE *f4_meta = NULL, *nn_f4_meta = NULL; long long nR; int tbl_id = INT_MIN; int f1_id = INT_MIN, f2_id = INT_MIN, f3_id = INT_MIN, f4_id = INT_MIN, f5_id = INT_MIN; int nn_f1_id = INT_MIN, nn_f2_id = INT_MIN, nn_f3_id = INT_MIN, nn_f4_id = INT_MIN, nn_f5_id = INT_MIN; char str_meta_data[1024]; bool is_any_null = false; float *f1ptr = NULL, *f2ptr = NULL, *f3ptr = NULL, *f4ptr = NULL; float *foutptr = NULL; char *nn_out = NULL; char *op = NULL; //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( f3 == NULL ) || ( *f3 == '\0' ) ) { go_BYE(-1); } if ( ( f4 == NULL ) || ( *f4 == '\0' ) ) { go_BYE(-1); } if ( ( f5 == NULL ) || ( *f5 == '\0' ) ) { go_BYE(-1); } if ( ( str_op_spec == NULL ) || ( *str_op_spec == '\0' ) ) { go_BYE(-1); } zero_string(str_meta_data, 1024); //-------------------------------------------------------- status = extract_name_value(str_op_spec, "op=", ":", &op); cBYE(status); if ( op == NULL ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); nR = g_tbl[tbl_id].nR; //-------------------------------------------------------- /* Get information for field f1 */ status = is_fld(NULL, tbl_id, f1, &f1_id); f1_meta = &(g_fld[f1_id]); status = rs_mmap(f1_meta->filename, &f1_X, &f1_nX, 0); cBYE(status); nn_f1_id = f1_meta->nn_fld_id; if ( nn_f1_id >= 0 ) { nn_f1_meta = &(g_fld[nn_f1_id]); status = rs_mmap(nn_f1_meta->filename, &nn_f1_X, &nn_f1_nX, 0); cBYE(status); } //-------------------------------------------------------- /* Get information for field f2 */ status = is_fld(NULL, tbl_id, f2, &f2_id); f2_meta = &(g_fld[f2_id]); status = rs_mmap(f2_meta->filename, &f2_X, &f2_nX, 0); cBYE(status); nn_f2_id = f2_meta->nn_fld_id; if ( nn_f2_id >= 0 ) { nn_f2_meta = &(g_fld[nn_f2_id]); status = rs_mmap(nn_f2_meta->filename, &nn_f2_X, &nn_f2_nX, 0); cBYE(status); } //-------------------------------------------------------- /* Get information for field f3 */ status = is_fld(NULL, tbl_id, f3, &f3_id); f3_meta = &(g_fld[f3_id]); status = rs_mmap(f3_meta->filename, &f3_X, &f3_nX, 0); cBYE(status); nn_f3_id = f3_meta->nn_fld_id; if ( nn_f3_id >= 0 ) { nn_f3_meta = &(g_fld[nn_f3_id]); status = rs_mmap(nn_f3_meta->filename, &nn_f3_X, &nn_f3_nX, 0); cBYE(status); } //-------------------------------------------------------- /* Get information for field f4 */ status = is_fld(NULL, tbl_id, f4, &f4_id); f4_meta = &(g_fld[f4_id]); status = rs_mmap(f4_meta->filename, &f4_X, &f4_nX, 0); cBYE(status); nn_f4_id = f4_meta->nn_fld_id; if ( nn_f4_id >= 0 ) { nn_f4_meta = &(g_fld[nn_f4_id]); status = rs_mmap(nn_f4_meta->filename, &nn_f4_X, &nn_f4_nX, 0); cBYE(status); } //-------------------------------------------------------- if ( strcmp(f1_meta->fldtype, "float") != 0 ) { go_BYE(-1); } if ( strcmp(f2_meta->fldtype, "float") != 0 ) { go_BYE(-1); } if ( strcmp(f3_meta->fldtype, "float") != 0 ) { go_BYE(-1); } if ( strcmp(f4_meta->fldtype, "float") != 0 ) { go_BYE(-1); } //-------------------------------------------------------- f1ptr = (float *)f1_X; f2ptr = (float *)f2_X; f3ptr = (float *)f3_X; f4ptr = (float *)f4_X; //-------------------------------------------------------- // Create output file long long filesz = sizeof(float) * nR; status = open_temp_file(&ofp, &opfile, filesz); cBYE(status); fclose_if_non_null(ofp); status = mk_file(opfile, filesz); cBYE(status); status = rs_mmap(opfile, &out_X, &out_nX, 1); cBYE(status); foutptr = (float*)out_X; //-------------------------------------------------------- // Create nn field for output file long long nnfilesz = sizeof(char) * nR; status = open_temp_file(&ofp, &nn_opfile, nnfilesz); cBYE(status); fclose_if_non_null(ofp); status = mk_file(nn_opfile, filesz); cBYE(status); status = rs_mmap(nn_opfile, &nn_out_X, &nn_out_nX, 1); cBYE(status); nn_out = (char*)nn_out_X; //-------------------------------------------------------- if ( strcmp(op, "lat_long_dist") == 0 ) { for ( long long i = 0; i < nR; i++ ) { if ( ( nn_f1_X != NULL ) && ( nn_f1_X[i] == 0 ) ) { is_any_null = true; foutptr[i] = 0; nn_out[i] = 0; continue; } if ( ( nn_f2_X != NULL ) && ( nn_f2_X[i] == 0 ) ) { is_any_null = true; foutptr[i] = 0; nn_out[i] = 0; continue; } if ( ( nn_f3_X != NULL ) && ( nn_f3_X[i] == 0 ) ) { is_any_null = true; foutptr[i] = 0; nn_out[i] = 0; continue; } if ( ( nn_f4_X != NULL ) && ( nn_f4_X[i] == 0 ) ) { is_any_null = true; foutptr[i] = 0; nn_out[i] = 0; continue; } if ( ( f1ptr[i] < -90 ) || ( f1ptr[i] > 90 ) ) { go_BYE(-1); } if ( ( f2ptr[i] < -180 ) || ( f2ptr[i] > 180 ) ) { go_BYE(-1); } if ( ( f3ptr[i] < -90 ) || ( f3ptr[i] > 90 ) ) { go_BYE(-1); } if ( ( f4ptr[i] < -180 ) || ( f4ptr[i] > 180 ) ) { go_BYE(-1); } status = latlong_distance(f1ptr[i], f2ptr[i], f3ptr[i], f4ptr[i], &(foutptr[i])); cBYE(status); nn_out[i] = TRUE; } } sprintf(str_meta_data, "filename=%s:fldtype=float:n_sizeof=4", opfile); status = add_fld(tbl, f5, str_meta_data, &f5_id); cBYE(status); if ( is_any_null ) { status = add_aux_fld(tbl, f5, nn_opfile, "nn", &nn_f5_id); cBYE(status); } else { unlink(nn_opfile); free_if_non_null(nn_opfile); } BYE: rs_munmap(f1_X, f1_nX); rs_munmap(nn_f1_X, nn_f1_nX); rs_munmap(f2_X, f2_nX); rs_munmap(nn_f2_X, nn_f2_nX); rs_munmap(f3_X, f3_nX); rs_munmap(nn_f3_X, nn_f3_nX); rs_munmap(f4_X, f4_nX); rs_munmap(nn_f4_X, nn_f4_nX); rs_munmap(out_X, out_nX); rs_munmap(nn_out_X, nn_out_nX); free_if_non_null(opfile); free_if_non_null(nn_opfile); free_if_non_null(op); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int srt_join( char *docroot, sqlite3 *in_db, char *src_tbl, char *src_lnk, char *src_val, char *dst_tbl, char *dst_lnk, char *dst_fld, char *op ) // STOP FUNC DECL { int status = 0; sqlite3 *db = NULL; char *src_val_X = NULL; size_t src_val_nX = 0; char *src_lnk_X = NULL; size_t src_lnk_nX = 0; char *nn_dst_val_X = NULL; size_t nn_dst_val_nX = 0; char *dst_val_X = NULL; size_t dst_val_nX = 0; char *dst_lnk_X = NULL; size_t dst_lnk_nX = 0; FLD_META_TYPE src_val_meta, src_lnk_meta; FLD_META_TYPE dst_lnk_meta; long long src_nR, dst_nR; // counts all rows char str_meta_data[1024]; int ijoin_op; bool is_any_null = false; int src_tbl_id, dst_tbl_id; bool b_is_tbl; FILE *ofp = NULL, *nn_ofp = NULL; char *opfile = NULL, *nn_opfile = NULL; //---------------------------------------------------------------- zero_string(str_meta_data, 1024); status = mk_mjoin_op(op, &ijoin_op); cBYE(status); status = open_db_if_needed(docroot, in_db, &db); cBYE(status); zero_fld_meta(&src_val_meta); zero_fld_meta(&src_lnk_meta); zero_fld_meta(&dst_lnk_meta); //---------------------------------------------------------------- // Get meta-data for all necessary fields status = is_tbl(docroot, db, src_tbl, &b_is_tbl, &src_tbl_id); cBYE(status); if ( b_is_tbl == false ) { fprintf(stderr, "tbl [%s] not found\n", src_tbl); go_BYE(-1); } status = is_tbl(docroot, db, dst_tbl, &b_is_tbl, &dst_tbl_id); cBYE(status); if ( b_is_tbl == false ) { fprintf(stderr, "tbl [%s] not found\n", src_tbl); go_BYE(-1); } status = fld_meta(docroot, db, src_tbl, src_lnk, -1, &src_lnk_meta); cBYE(status); if ( ( src_val != NULL ) && ( *src_val != '\0' ) ) { status = fld_meta(docroot, db, src_tbl, src_val, -1, &src_val_meta); cBYE(status); } status = fld_meta(docroot, db, dst_tbl, dst_lnk, -1, &dst_lnk_meta); cBYE(status); status = internal_get_nR(db, src_tbl_id, &src_nR); cBYE(status); status = internal_get_nR(db, dst_tbl_id, &dst_nR); cBYE(status); //---------------------------------------------------------------- // Get pointer access to all necessary fields status = rs_mmap(src_lnk_meta.filename, &src_lnk_X, &src_lnk_nX, 0); cBYE(status); if ( ( src_val != NULL ) && ( *src_val != '\0' ) ) { status = rs_mmap(src_val_meta.filename, &src_val_X, &src_val_nX, 0); cBYE(status); } status = rs_mmap(dst_lnk_meta.filename, &dst_lnk_X, &dst_lnk_nX, 0); cBYE(status); //-------------------------------------------------------- // Create output data files status = open_temp_file(&ofp, &opfile); cBYE(status); fclose_if_non_null(ofp); status = open_temp_file(&nn_ofp, &nn_opfile); cBYE(status); fclose_if_non_null(nn_ofp); if ( ( src_val != NULL ) && ( *src_val != '\0' ) ) { status = mk_file(opfile, src_val_meta.n_sizeof * dst_nR); cBYE(status); status = rs_mmap(opfile, &dst_val_X, &dst_val_nX, 1); cBYE(status); } status = mk_file(nn_opfile, sizeof(char) * dst_nR); cBYE(status); status = rs_mmap(nn_opfile, &nn_dst_val_X, &nn_dst_val_nX, 1); cBYE(status); //-------------------------------------------------------- // Core join if ( ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "int") == 0 ) ) { status = core_srt_join_I_I_I( (int *)src_lnk_X, (int *)src_val_X, src_nR, (int *)dst_lnk_X, (int *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "long long") == 0 ) ) { status = core_srt_join_I_I_L( (int *)src_lnk_X, (int *)src_val_X, src_nR, (long long *)dst_lnk_X, (int *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "int") == 0 ) ) { status = core_srt_join_I_L_I( (int *)src_lnk_X, (long long *)src_val_X, src_nR, (int *)dst_lnk_X, (long long *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "long long") == 0 ) ) { status = core_srt_join_I_L_L((int *)src_lnk_X, (long long *)src_val_X, src_nR, (long long *)dst_lnk_X, (long long *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "int") == 0 ) ) { status = core_srt_join_L_I_I( (long long *)src_lnk_X, (int *)src_val_X, src_nR, (int *)dst_lnk_X, (int *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "int") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "long long") == 0 ) ) { status = core_srt_join_L_I_L( (long long *)src_lnk_X, (int *)src_val_X, src_nR, (long long *)dst_lnk_X, (int *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "int") == 0 ) ) { status = core_srt_join_L_L_I( (long long *)src_lnk_X, (long long *)src_val_X, src_nR, (int *)dst_lnk_X, (long long *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else if ( ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(src_lnk_meta.fldtype, "long long") == 0 ) && ( strcmp(dst_lnk_meta.fldtype, "long long") == 0 ) ) { status = core_srt_join_L_L_L( (long long *)src_lnk_X, (long long *)src_val_X, src_nR, (long long *)dst_lnk_X, (long long *)dst_val_X, nn_dst_val_X, 0, dst_nR, ijoin_op, &is_any_null); } else { go_BYE(-1); } //-------------------------------------------------------- // Add output field to meta data if ( ( src_val != NULL ) && ( *src_val != '\0' ) ) { sprintf(str_meta_data,"filename=%s:n_sizeof=%d:fldtype=%s", opfile, src_val_meta.n_sizeof, src_val_meta.fldtype); status = add_fld(docroot, db, dst_tbl, dst_fld, str_meta_data); cBYE(status); if ( is_any_null == true ) { status = add_aux_fld(docroot, db, dst_tbl, dst_fld, nn_opfile, "nn"); cBYE(status); } else { unlink(nn_opfile); free_if_non_null(nn_opfile); } } else { sprintf(str_meta_data,"filename=%s:n_sizeof=%ld:fldtype=bool", nn_opfile, sizeof(char)); status = add_fld(docroot, db, dst_tbl, dst_fld, str_meta_data); cBYE(status); } BYE: if ( in_db == NULL ) { sqlite3_close(db); } rs_munmap(src_val_X, src_val_nX); rs_munmap(src_lnk_X, src_lnk_nX); rs_munmap(dst_val_X, dst_val_nX); rs_munmap(nn_dst_val_X, nn_dst_val_nX); rs_munmap(dst_lnk_X, dst_lnk_nX); free_if_non_null(nn_opfile); free_if_non_null(opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int count_strings( char *src_tbl, char *src_fld, char *str_trunc_len, // truncate strings beyond this len char *dst_tbl ) // STOP FUNC DECL { int status = 0; char *X = NULL; size_t nX = 0; char *src_fld_X = NULL; size_t src_fld_nX = 0; char *nn_src_fld_X = NULL; size_t nn_src_fld_nX = 0; char *sz_src_fld_X = NULL; size_t sz_src_fld_nX = 0; FLD_TYPE *src_fld_meta = NULL; FLD_TYPE *nn_src_fld_meta = NULL; FLD_TYPE *sz_src_fld_meta = NULL; long long src_nR; int src_tbl_id = INT_MIN, src_fld_id = INT_MIN; int dst_tbl_id = INT_MIN; int nn_src_fld_id = INT_MIN, sz_src_fld_id = INT_MIN; int itemp; char str_dst_nR[32]; long long dst_nR = 0; char str_meta_data[4096]; char *out_X = NULL; char *cnt_X = NULL; char *dst_fk_X = NULL; size_t dst_fk_nX = 0; FILE *ofp = NULL; char *opfile = NULL; FILE *tmp_ofp = NULL; char *tmp_opfile = NULL; FILE *nn_ofp = NULL; char *nn_opfile = NULL; FILE *sz_ofp = NULL; char *sz_opfile = NULL; FILE *cnt_ofp = NULL; char *cnt_opfile = NULL; FILE *from_dst_ofp = NULL; char *from_dst_opfile = NULL; FILE *dst_fk_ofp = NULL; char *dst_fk_opfile = NULL; int *idst_fk = NULL; FILE *to_dst_ofp = NULL; char *to_dst_opfile = NULL; size_t out_nX = 0, cnt_nX = 0; int *szptr = NULL; char *nnptr = NULL; char *cptr = NULL, *prev_cptr = NULL; char *sz_src_fld = NULL; char *str_maxlen = NULL; char *from_dst_fld = NULL, *to_dst_fld = NULL, *dst_fk_fld = NULL; char *null_str = NULL, *endptr = NULL; int maxlen, rec_size, trunc_len = -1; // no truncation by default long long cnt = 1; int icnt = 1; bool is_any_null = false; char is_nn; int sz; //---------------------------------------------------------------- if ( str_trunc_len != NULL ) { trunc_len = strtol(str_trunc_len, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( trunc_len <= 1 ) { go_BYE(-1); } trunc_len++; // to include null character } if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } zero_string(str_meta_data, 4096); zero_string(str_dst_nR, 32); //-------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); src_nR = g_tbl[src_tbl_id].nR; if ( src_nR > INT_MAX ) { fprintf(stderr, "NOT IMPLEMENTED\n"); go_BYE(-1); } //-------------------------------------------------------- src_fld_meta = &(g_fld[src_fld_id]); status = rs_mmap(src_fld_meta->filename, &src_fld_X, &src_fld_nX, 0); cBYE(status); //-------------------------------------------------------- if ( strcmp(src_fld_meta->fldtype, "char string") != 0 ) { go_BYE(-1); } cptr = src_fld_X; // Get nn field for source if if it exists nn_src_fld_id = g_fld[src_fld_id].nn_fld_id; if ( nn_src_fld_id >= 0 ) { nn_src_fld_meta = &(g_fld[nn_src_fld_id]); status = rs_mmap(nn_src_fld_meta->filename, &nn_src_fld_X, &nn_src_fld_nX, 0); cBYE(status); } nnptr = (char *)nn_src_fld_X; // Get sz field for source sz_src_fld_id = g_fld[src_fld_id].sz_fld_id; chk_range(sz_src_fld_id, 0, g_n_fld); sz_src_fld_meta = &(g_fld[sz_src_fld_id]); status = rs_mmap(sz_src_fld_meta->filename, &sz_src_fld_X, &sz_src_fld_nX, 0); cBYE(status); szptr = (int *)sz_src_fld_X; //------------------------------------ mcr_alloc_null_str(from_dst_fld, (MAX_LEN_FILE_NAME+16)); mcr_alloc_null_str(to_dst_fld, (MAX_LEN_FILE_NAME+16)); mcr_alloc_null_str(dst_fk_fld, (MAX_LEN_FILE_NAME+16)); mcr_alloc_null_str(sz_src_fld, (MAX_LEN_FILE_NAME+16)); mcr_alloc_null_str(str_maxlen, 32); strcpy(sz_src_fld, "_sz_"); strcat(sz_src_fld, src_fld); strcpy(from_dst_fld, src_tbl); strcat(from_dst_fld, "_from"); strcpy(to_dst_fld, dst_tbl); strcat(to_dst_fld, "_to"); strcpy(dst_fk_fld, dst_tbl); strcat(dst_fk_fld, "_id"); // Get max length of src field status = f_to_s(src_tbl, sz_src_fld, "max", str_maxlen); cBYE(status); maxlen = strtol(str_maxlen, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( maxlen <= 0 ) { go_BYE(-1); } if ( trunc_len < 0 ) { trunc_len = maxlen; } mcr_alloc_null_str(null_str, trunc_len); //-------------------------------------------------------- // Write out src field as constant length field status = open_temp_file(&tmp_ofp, &tmp_opfile, 0); cBYE(status); for ( int i = 0; i < src_nR; i++ ) { int sz, fill_sz; sz = szptr[i]; // includes terminating null character if ( sz <= trunc_len ) { fwrite(cptr, sizeof(char), sz, tmp_ofp); fill_sz = trunc_len - sz; fwrite(null_str, sizeof(char), fill_sz, tmp_ofp); } else { fwrite(cptr, sizeof(char), trunc_len-1, tmp_ofp); fwrite(null_str, sizeof(char), 1, tmp_ofp); } // Append origin of this string fwrite(&i, sizeof(int), 1, tmp_ofp); cptr += sz; // TODO: Check about nullc and sz } fclose_if_non_null(tmp_ofp); //--------------------------------------------------------- // Sort output file so that similar titles occur together status = rs_mmap(tmp_opfile, &X, &nX, 1); // 1 for writing cBYE(status); // Note that we added the origin of the string. Hence the size of a // record is trunc_len + sizeof(int) rec_size = trunc_len + sizeof(int); qsort(X, src_nR, rec_size, str_compare); rs_munmap(X, nX); //-------------------------------------------------------- // Now we need to compact the sorted output so that it is not padded // Open 4 files for // output string, sz(output string), nn(output string), cnt status = open_temp_file(&ofp, &opfile, 0); cBYE(status); status = open_temp_file(&nn_ofp, &nn_opfile, 0); cBYE(status); status = open_temp_file(&sz_ofp, &sz_opfile, 0); cBYE(status); status = open_temp_file(&cnt_ofp, &cnt_opfile, 0); cBYE(status); status = open_temp_file(&from_dst_ofp, &from_dst_opfile, 0); cBYE(status); status = open_temp_file(&to_dst_ofp, &to_dst_opfile, 0); cBYE(status); status = open_temp_file(&dst_fk_ofp, &dst_fk_opfile, 0); cBYE(status); fclose_if_non_null(dst_fk_ofp); mk_file(dst_fk_opfile, src_nR * sizeof(int)); cBYE(status); rs_mmap(dst_fk_opfile, &dst_fk_X, &dst_fk_nX, 1); cBYE(status); idst_fk = (int *)dst_fk_X; status = rs_mmap(tmp_opfile, &X, &nX, 0); cBYE(status); cptr = X; prev_cptr = cptr; cptr += rec_size; cnt = icnt = 1; for ( int i = 0; i < src_nR; i++ ) { char *xptr; int *iptr; xptr = prev_cptr + trunc_len; iptr = (int *)xptr; if ( ( *iptr < 0 ) || ( *iptr >= src_nR ) ) { go_BYE(-1); } fwrite(iptr, sizeof(int), 1, from_dst_ofp); fwrite(&dst_nR, sizeof(int), 1, to_dst_ofp); idst_fk[*iptr] = dst_nR; if ( strcmp(cptr, prev_cptr) != 0 ) { sz = strlen(prev_cptr); if ( sz == 0 ) { is_nn = FALSE; } else { is_nn = TRUE; } sz++; // for null character termination fwrite(prev_cptr, sizeof(char), sz, ofp); fwrite(&sz, sizeof(int), 1, sz_ofp); fwrite(&is_nn, sizeof(char), 1, nn_ofp); if ( cnt > INT_MAX ) { go_BYE(-1); } icnt = cnt; fwrite(&icnt, sizeof(int), 1, cnt_ofp); cnt = 1; dst_nR++; } else { cnt++; } prev_cptr = cptr; cptr += rec_size; } // START: Now for the last guy cnt--; // this is needed sz = strlen(prev_cptr); if ( sz == 0 ) { is_nn = FALSE; } else { is_nn = TRUE; } if ( is_nn == FALSE ) { is_any_null = true; } sz++; // for null character termination fwrite(prev_cptr, sizeof(char), sz, ofp); fwrite(&sz, sizeof(int), 1, sz_ofp); fwrite(&is_nn, sizeof(char), 1, nn_ofp); if ( cnt > INT_MAX ) { go_BYE(-1); } icnt = cnt; fwrite(&icnt, sizeof(int), 1, cnt_ofp); dst_nR++; // STOP: Now for the last guy //--------------------------- fclose_if_non_null(ofp); fclose_if_non_null(nn_ofp); fclose_if_non_null(sz_ofp); fclose_if_non_null(cnt_ofp); fclose_if_non_null(from_dst_ofp); fclose_if_non_null(to_dst_ofp); //----------------------------------------------- // Delete temp file unlink(tmp_opfile); //----------------------------------------------------------- status = is_tbl(dst_tbl, &dst_tbl_id); cBYE(status); if ( dst_tbl_id >= 0 ) { /* Delete table */ status = del_tbl(NULL, dst_tbl_id); } sprintf(str_dst_nR, "%lld", dst_nR); status = add_tbl(dst_tbl, str_dst_nR, &dst_tbl_id); cBYE(status); //----------------------------------------------------------- /* TODO I think we can dispense with from_dst_fld and to_dst_fld */ sprintf(str_meta_data, "filename=%s:fldtype=int:n_sizeof=4", from_dst_opfile); status = add_fld(src_tbl, from_dst_fld, str_meta_data, &itemp); cBYE(status); sprintf(str_meta_data, "filename=%s:fldtype=int:n_sizeof=4", to_dst_opfile); status = add_fld(src_tbl, to_dst_fld, str_meta_data, &itemp); cBYE(status); sprintf(str_meta_data, "filename=%s:fldtype=int:n_sizeof=4", dst_fk_opfile); status = add_fld(src_tbl, dst_fk_fld, str_meta_data, &itemp); cBYE(status); // Add output field to meta data sprintf(str_meta_data, "filename=%s:fldtype=char string:n_sizeof=0", opfile); status = add_fld(dst_tbl, src_fld, str_meta_data, &itemp); cBYE(status); fclose_if_non_null(cnt_ofp); // Add nn field if needed if ( is_any_null ) { status = add_aux_fld(dst_tbl, src_fld, nn_opfile, "nn", &itemp); cBYE(status); } else { unlink(nn_opfile); } // Add sz field status = add_aux_fld(dst_tbl, src_fld, sz_opfile, "sz", &itemp); cBYE(status); // Add count field to meta data sprintf(str_meta_data, "filename=%s:fldtype=int:n_sizeof=4", cnt_opfile); status = add_fld(dst_tbl, "cnt", str_meta_data, &itemp); cBYE(status); //----------------------------------------------------------- BYE: free_if_non_null(null_str); rs_munmap(X, nX); rs_munmap(src_fld_X, src_fld_nX); rs_munmap(cnt_X, cnt_nX); rs_munmap(out_X, out_nX); rs_munmap(dst_fk_X, dst_fk_nX); rs_munmap(nn_src_fld_X, nn_src_fld_nX); fclose_if_non_null(ofp); free_if_non_null(opfile); fclose_if_non_null(nn_ofp); free_if_non_null(nn_opfile); fclose_if_non_null(sz_ofp); free_if_non_null(sz_opfile); fclose_if_non_null(cnt_ofp); free_if_non_null(cnt_opfile); fclose_if_non_null(tmp_ofp); free_if_non_null(tmp_opfile); fclose_if_non_null(from_dst_ofp); free_if_non_null(from_dst_opfile); fclose_if_non_null(to_dst_ofp); free_if_non_null(to_dst_opfile); free_if_non_null(sz_src_fld); free_if_non_null(dst_fk_fld); free_if_non_null(str_maxlen); return(status); }
static int get_data( char *tbl, char *fld, long long *ptr_nR, char **ptr_X, size_t *ptr_nX, char **ptr_opfile, int *ptr_fldtype, int *ptr_fldsz ) { int status = 0; int tbl_id = INT_MIN, fld_id = INT_MIN, nn_fld_id = INT_MIN; char *X = NULL; size_t nX = 0; char *nn_X = NULL; size_t nn_nX = 0; char *op_X = NULL; size_t op_nX = 0; char *endptr = NULL; char buffer[32]; long long nR = 0; FLD_TYPE *fld_meta = NULL, *nn_fld_meta = NULL; FILE *ofp = NULL; char *opfile = NULL; zero_string(buffer, 32); status = is_tbl(tbl, &tbl_id); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); status = is_fld(NULL, tbl_id, fld, &fld_id); cBYE(status); chk_range(fld_id, 0, g_n_fld); nR = g_tbl[tbl_id].nR; fld_meta = &(g_fld[fld_id]); nn_fld_id = g_fld[fld_id].nn_fld_id; if ( nn_fld_id >= 0 ) { nn_fld_meta = &(g_fld[nn_fld_id]); status = rs_mmap(nn_fld_meta->filename, &nn_X, &nn_nX, 0); cBYE(status); } *ptr_fldsz = fld_meta->n_sizeof; status = mk_ifldtype(fld_meta->fldtype, ptr_fldtype); cBYE(status); switch ( *ptr_fldtype ) { case FLDTYPE_INT : case FLDTYPE_LONGLONG : /* all is well */ break; default : go_BYE(-1); } if ( nn_X == NULL ) { /* no nn field */ status = rs_mmap(fld_meta->filename, &X, &nX, 0); cBYE(status); *ptr_nR = nR; *ptr_X = X; *ptr_nX = nX; *ptr_opfile = NULL; } else { long long nn_nR = 0; status = f_to_s(tbl, nn_fld_meta->name, "sum", buffer); cBYE(status); nn_nR = strtol(buffer, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( ( nn_nR <= 0 ) || ( nn_nR >= nR ) ) { go_BYE(-1); } /* Now make a copy of just the non-null values */ status = rs_mmap(fld_meta->filename, &X, &nX, 0); cBYE(status); status = rs_mmap(nn_fld_meta->filename, &nn_X, &nn_nX, 0); cBYE(status); long long filesz = nn_nR * fld_meta->n_sizeof; status = open_temp_file(&ofp, &opfile, filesz); cBYE(status); fclose_if_non_null(ofp); status = mk_file(opfile, filesz); cBYE(status); status = rs_mmap(opfile, &op_X, &op_nX, 1); cBYE(status); status = copy_nn_vals(X, nR, nn_X, op_X, nn_nR, fld_meta->n_sizeof); cBYE(status); /* Cleanup and return values */ rs_munmap(X, nX); rs_munmap(nn_X, nn_nX); *ptr_nR = nn_nR; *ptr_X = op_X; *ptr_nX = op_nX; *ptr_opfile = opfile; } BYE: return(status); }
//--------------------------------------------------------------- // START FUNC DECL int dup_fld( char *tbl, char *f1, char *f2 ) // STOP FUNC DECL { int status = 0; char *Y = NULL; size_t nY = 0; char *X = NULL; size_t nX = 0; char *nn_X = NULL; size_t nn_nX = 0; char *sz_X = NULL; size_t sz_nX = 0; FILE *ofp = NULL; char *opfile = NULL; FILE *nn_ofp = NULL; char *nn_opfile = NULL; FILE *sz_ofp = NULL; char *sz_opfile = NULL; int tbl_id, f1_fld_id, f2_fld_id, nn_fld_id, sz_fld_id; //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( strcmp(f1, f2) == 0 ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); status = is_fld(NULL, tbl_id, f1, &f1_fld_id); cBYE(status); chk_range(f1_fld_id, 0, g_n_fld); status = is_fld(NULL, tbl_id, f2, &f2_fld_id); cBYE(status); if ( f2_fld_id >= 0 ) { status = del_fld(NULL, tbl_id, NULL, f2_fld_id); cBYE(status); } // Open input file status = rs_mmap(g_fld[f1_fld_id].filename, &X, &nX, 0); cBYE(status); // Create a copy of the data status = open_temp_file(&ofp, &opfile, nX); cBYE(status); fclose_if_non_null(ofp); status = mk_file(opfile, nX); cBYE(status); status = rs_mmap(opfile, &Y, &nY, 1); memcpy(Y, X, nX); rs_munmap(Y, nY); // Get empty spot for f2 status = get_empty_fld(&f2_fld_id); cBYE(status); // f2 is same as f1 except for data and "is_external" g_fld[f2_fld_id] = g_fld[f1_fld_id]; g_fld[f2_fld_id].is_external = false; g_fld[f2_fld_id].nn_fld_id = -1; g_fld[f2_fld_id].sz_fld_id = -1; status = chk_fld_name(f2, 0); zero_string(g_fld[f2_fld_id].name, MAX_LEN_FLD_NAME+1); zero_string(g_fld[f2_fld_id].filename, MAX_LEN_FILE_NAME+1); strcpy(g_fld[f2_fld_id].name, f2); strcpy(g_fld[f2_fld_id].filename, opfile); // Add to hash table //-------------------------------------------------------- // Add auxiliary field nn if if it exists nn_fld_id = g_fld[f1_fld_id].nn_fld_id; if ( nn_fld_id >= 0 ) { // open input file status = rs_mmap(g_fld[nn_fld_id].filename, &nn_X, &nn_nX, 0); cBYE(status); // create copy of data status = open_temp_file(&nn_ofp, &nn_opfile, nn_nX); cBYE(status); fclose_if_non_null(nn_ofp); status = mk_file(nn_opfile, nn_nX); cBYE(status); status = rs_mmap(nn_opfile, &Y, &nY, 1); memcpy(Y, nn_X, nn_nX); rs_munmap(Y, nY); rs_munmap(nn_X, nn_nX); nn_fld_id = INT_MIN; status = add_aux_fld(tbl, f2, nn_opfile, "nn", &nn_fld_id); cBYE(status); } //-------------------------------------------------------- // Add auxiliary field sz if if it exists sz_fld_id = g_fld[f1_fld_id].sz_fld_id; if ( sz_fld_id >= 0 ) { status = open_temp_file(&sz_ofp, &sz_opfile, sz_nX); cBYE(status); fclose_if_non_null(sz_ofp); status = mk_file(sz_opfile, sz_nX); cBYE(status); status = rs_mmap(sz_opfile, &Y, &nY, 1); status = rs_mmap(g_fld[sz_fld_id].filename, &sz_X, &sz_nX, 0); memcpy(Y, sz_X, sz_nX); rs_munmap(Y, nY); rs_munmap(sz_X, sz_nX); status = add_aux_fld(tbl, f2, sz_opfile, "sz", &sz_fld_id); cBYE(status); } //-------------------------------------------------------- BYE: rs_munmap(X, nX); rs_munmap(nn_X, nn_nX); rs_munmap(sz_X, sz_nX); rs_munmap(Y, nY); fclose_if_non_null(ofp); fclose_if_non_null(nn_ofp); fclose_if_non_null(sz_ofp); free_if_non_null(opfile); free_if_non_null(nn_opfile); free_if_non_null(sz_opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int xfer_string( FLD_TYPE *dst_idx_meta, char *src_fld_X, char *sz_src_fld_X, long long src_nR, char *dst_tbl, char *dst_fld, char *dst_idx_X, char *nn_dst_idx_X, long long dst_nR ) // STOP FUNC DECL { int status = 0; char str_meta_data[256]; char *szX = NULL; size_t n_szX= 0; int *szptr = NULL; char *nnX = NULL; size_t n_nnX= 0; char *nnptr = NULL; int dst_fld_id, nn_dst_fld_id, sz_dst_fld_id; FILE *ofp = NULL; char *opfile = NULL; FILE *nn_ofp = NULL; char *nn_opfile = NULL; FILE *sz_ofp = NULL; char *sz_opfile = NULL; long long *offset_src_fld = NULL; long long *ll_dst_idx = NULL; bool is_any_null = false; //-------------------------------------------------------- status = open_temp_file(&ofp, &opfile, 0); cBYE(status); /* We cannot allocate storage for this since we do not know how much * to allocate a-priori */ /* Open output fields for size and nn in mmmap mode */ long long filesz = dst_nR * sizeof(int); status = open_temp_file(&sz_ofp, &sz_opfile, filesz); cBYE(status); fclose_if_non_null(sz_ofp); status = mk_file(sz_opfile, filesz); cBYE(status); status = rs_mmap(sz_opfile, &szX, &n_szX, 1); cBYE(status); szptr = (int *)szX; long long nnfilesz = dst_nR * sizeof(char); status = open_temp_file(&nn_ofp, &nn_opfile, nnfilesz); cBYE(status); fclose_if_non_null(nn_ofp); status = mk_file(nn_opfile, nnfilesz); cBYE(status); status = rs_mmap(nn_opfile, &nnX, &n_nnX, 1); cBYE(status); nnptr = (char *)nnX; /*-------------------------------------------------------------*/ status = mk_offset((int *)sz_src_fld_X, src_nR, &offset_src_fld); cBYE(status); /* Convert from int to longlong if needed */ if ( strcmp(dst_idx_meta->fldtype, "int") == 0 ) { ll_dst_idx = malloc(dst_nR * sizeof(long long)); return_if_malloc_failed(ll_dst_idx); if ( dst_nR >= INT_MAX ) { fprintf(stderr, "TO BE IMPLEMENTED\n"); go_BYE(-1); } conv_int_to_longlong((int *)dst_idx_X, (int)dst_nR, ll_dst_idx); } else { ll_dst_idx = (long long *)dst_idx_X; } /*-------------------------------------------------------------*/ for ( long long i = 0; i < dst_nR; i++ ) { int *i_sz_src_fld_ptr = (int *)sz_src_fld_X; long long idx; size_t nw; char *testptr; char nullc = 0; long long offset; int sz; if ( ( nn_dst_idx_X != NULL ) && ( nn_dst_idx_X[i] == FALSE ) ) { // Output null value is_any_null = true; nnptr[i] = FALSE; szptr[i] = 1; nw = fwrite(&nullc, sizeof(char), 1, ofp); // Null terminate if ( nw != 1 ) { go_BYE(-1); } } else { idx = ll_dst_idx[i]; if ( ( idx < 0 ) || ( idx >= src_nR ) ) { go_BYE(-1); } sz = i_sz_src_fld_ptr[idx]; offset = offset_src_fld[idx]; // This checks that strings are null terminated testptr = src_fld_X + offset + sz - 1; if ( *testptr != '\0' ) { go_BYE(-1); } szptr[i] = sz; nnptr[i] = TRUE; nw = fwrite(src_fld_X + offset, sizeof(char), sz, ofp); if ( nw != sz ) { go_BYE(-1); } } } fclose_if_non_null(ofp); rs_munmap(szX, n_szX); rs_munmap(nnX, n_nnX); // Add output field to meta data zero_string(str_meta_data, 256); strcpy(str_meta_data, "fldtype=char string:n_sizeof=0"); strcat(str_meta_data, ":filename="); strcat(str_meta_data, opfile); status = add_fld(dst_tbl, dst_fld, str_meta_data, &dst_fld_id); cBYE(status); // Add size field status = add_aux_fld(dst_tbl, dst_fld, sz_opfile, "sz", &sz_dst_fld_id); cBYE(status); if ( is_any_null ) { status = add_aux_fld(dst_tbl, dst_fld, nn_opfile, "nn", &nn_dst_fld_id); cBYE(status); } else { unlink(nn_opfile); } BYE: if ( strcmp(dst_idx_meta->fldtype, "int") == 0 ) { free_if_non_null(ll_dst_idx); } fclose_if_non_null(ofp); fclose_if_non_null(nn_ofp); fclose_if_non_null(sz_ofp); free_if_non_null(opfile); free_if_non_null(nn_opfile); free_if_non_null(sz_opfile); free_if_non_null(offset_src_fld); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int crossprod( char *t1, char *f1, char *t2, char *f2, char *t3 ) // STOP FUNC DECL { int status = 0; char *Y1 = NULL; size_t nY1 = 0; char *Y2 = NULL; size_t nY2 = 0; char *f1_X = NULL; size_t f1_nX = 0; char *f1_opfile = NULL; int f1type, f2type; char *f2_X = NULL; size_t f2_nX = 0; char *f2_opfile = NULL; long long f1size, f2size; int t2f1_fld_id = INT_MIN, t2f2_fld_id = INT_MIN; int t3_id = INT_MIN, itemp; long long chk_nR1 = 0, nR1, nR2, nR3; char str_meta_data[1024]; char *t3f1_opfile = NULL, *t3f2_opfile = NULL; FILE *ofp = NULL; char buffer[32]; // For multi-threading int rc; // result code for thread create pthread_t threads[MAX_NUM_THREADS]; pthread_attr_t attr; void *thread_status; //---------------------------------------------------------------- zero_string(str_meta_data, 1024); zero_string(buffer, 32); if ( strcmp(f1, f2) == 0 ) { go_BYE(-1); } /* Remove f1 != f2 restriction later. To do so, we need to specify * fields of t3 explicitly */ //---------------------------------------------------------------- status = get_data(t1, f1, &nR1, &f1_X, &f1_nX, &f1_opfile, &f1type, &f1size); cBYE(status); status = get_data(t2, f2, &nR2, &f2_X, &f2_nX, &f2_opfile, &f2type, &f2size); cBYE(status); nR3 = nR1 * nR2; if ( nR3 == 0 ) { fprintf(stderr, "No data to create t3 \n"); goto BYE; } // Create storage for field 1 in Table t3 */ long long filesz = nR3 * f1size; status = open_temp_file(&ofp, &t3f1_opfile, filesz); cBYE(status); fclose_if_non_null(ofp); status = mk_file(t3f1_opfile, filesz); cBYE(status); status = rs_mmap(t3f1_opfile, &Y1, &nY1, 1); cBYE(status); // Create storage for field 2 in Table t3 */ filesz = nR3 * f2size; status = open_temp_file(&ofp, &t3f2_opfile, filesz); cBYE(status); fclose_if_non_null(ofp); status = mk_file(t3f2_opfile, filesz); cBYE(status); status = rs_mmap(t3f2_opfile, &Y2, &nY2, 1); cBYE(status); //---------------------------------------------------------------- /* Set up parallelism computations. Parallelization strategy is * simple. Partition field 1 (nR1 rows) among the threads */ g_nR1 = nR1; g_nR2 = nR2; g_nR3 = nR3; g_f1type = f1type; g_f2type = f2type; g_f1size = f1size; g_f2size = f2size; g_f1_X = f1_X; g_f2_X = f2_X; g_Y1 = Y1; g_Y2 = Y2; for ( int i = 0; i < MAX_NUM_THREADS; i++ ) { g_thread_id[i] = i; g_num_rows_processed[i] = 0; } status = get_num_threads(&g_nT); cBYE(status); //-------------------------------------------- #define MIN_ROWS_FOR_CROSSPROD 4 // 1024 if ( nR1 <= MIN_ROWS_FOR_CROSSPROD ) { g_nT = 1; } /* Don't create more threads than you can use */ if ( g_nT > nR1 ) { g_nT = nR1; } if ( g_nT == 1 ) { core_crossprod(&(g_thread_id[0])); chk_nR1 = g_num_rows_processed[0]; } else { /* Create threads */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for ( int t = 0; t < g_nT; t++ ) { rc = pthread_create(&threads[t], NULL, core_crossprod, &(g_thread_id[t])); if ( rc ) { go_BYE(-1); } } /* Free attribute and wait for the other threads */ pthread_attr_destroy(&attr); for ( int t = 0; t < g_nT; t++ ) { rc = pthread_join(threads[t], &thread_status); if ( rc ) { go_BYE(-1); } chk_nR1 += g_num_rows_processed[t]; } } if ( chk_nR1 != nR1 ) { go_BYE(-1); } //---------------------------------------------------------------- // Add output fields to t3 meta data status = is_tbl(t3, &t3_id); cBYE(status); if ( t3_id >= 0 ) { status = del_tbl(NULL, t3_id); cBYE(status); } sprintf(buffer, "%lld", nR3); status = add_tbl(t3, buffer, &itemp); cBYE(status); sprintf(str_meta_data, "fldtype=int:n_sizeof=%u:filename=%s", f1size, t3f1_opfile); status = add_fld(t3, f1, str_meta_data, &t2f1_fld_id); cBYE(status); zero_string(str_meta_data, 1024); sprintf(str_meta_data, "fldtype=int:n_sizeof=%u:filename=%s", f2size, t3f2_opfile); status = add_fld(t3, f2, str_meta_data, &t2f2_fld_id); cBYE(status); BYE: fclose_if_non_null(ofp); rs_munmap(f1_X, f1_nX); rs_munmap(f2_X, f2_nX); if ( f1_opfile != NULL ) { unlink(f1_opfile); free_if_non_null(f1_opfile); } if ( f2_opfile != NULL ) { unlink(f2_opfile); free_if_non_null(f2_opfile); } free_if_non_null(t3f1_opfile); free_if_non_null(t3f2_opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int join( char *docroot, sqlite3 *in_db, char *src_tbl, char *src_lnk, char *src_val, char *dst_tbl, char *dst_lnk, char *dst_fld, char *op ) // STOP FUNC DECL { int status = 0; sqlite3 *db = NULL; char *src_val_X = NULL; size_t src_val_nX = 0; char *src_lnk_X = NULL; size_t src_lnk_nX = 0; char *src_idx_X = NULL; size_t src_idx_nX = 0; char *nn_dst_val_X = NULL; size_t nn_dst_val_nX = 0; char *dst_val_X = NULL; size_t dst_val_nX = 0; char *dst_lnk_X = NULL; size_t dst_lnk_nX = 0; char *dst_idx_X = NULL; size_t dst_idx_nX = 0; FLD_META_TYPE src_val_meta, src_lnk_meta, src_idx_meta; FLD_META_TYPE dst_val_meta, dst_lnk_meta, dst_idx_meta; long long src_nR, dst_nR; // counts all rows char str_meta_data[1024]; int sizeof_src_idx, sizeof_dst_idx; int *iptr; int ijoin_op; char *x_src_tbl = NULL, *x_dst_tbl = NULL; FILE *ofp = NULL, *nn_ofp = NULL; char *opfile = NULL, *nn_opfile = NULL; //---------------------------------------------------------------- zero_string(str_meta_data, 1024); status = mk_mjoin_op(op, &ijoin_op); cBYE(status); status = open_db_if_needed(docroot, in_db, &db); cBYE(status); zero_fld_meta(&src_val_meta); zero_fld_meta(&src_lnk_meta); zero_fld_meta(&src_idx_meta); zero_fld_meta(&dst_val_meta); zero_fld_meta(&dst_lnk_meta); zero_fld_meta(&dst_idx_meta); //---------------------------------------------------------------- /* Given the src_tbl, we create x_src_tbl which has no null values in the link fields and which has an index field called "id" with values 0, 1, .... |x_src_tbl| -1 */ status = elim_null_vals(docroot, db, src_tbl, src_lnk, &x_src_tbl); cBYE(status); /* Similarly for dst_tbl */ status = elim_null_vals(docroot, db, dst_tbl, dst_lnk, &x_dst_tbl); cBYE(status); status = sort_if_necessary(docroot, db, x_src_tbl, src_lnk, "id"); cBYE(status); status = sort_if_necessary(docroot, db, x_dst_tbl, dst_lnk, "id"); cBYE(status); // Get meta-data for all necessary fields status = fld_meta(docroot, db, x_src_tbl, src_lnk, -1, &src_lnk_meta); cBYE(status); status = fld_meta(docroot, db, x_src_tbl, XXXXXXX, -1, &src_idx_meta); cBYE(status); status = fld_meta(docroot, db, x_src_tbl, src_val, -1, &src_val_meta); cBYE(status); status = fld_meta(docroot, db, x_dst_tbl, dst_lnk, -1, &dst_lnk_meta); cBYE(status); status = fld_meta(docroot, db, x_dst_tbl, XXXXXXX, -1, &dst_idx_meta); cBYE(status); // Get pointer access to all necessary fields status = rs_mmap(src_lnk_meta.filename, &src_lnk_X, &src_lnk_nX, 0); cBYE(status); status = rs_mmap(src_idx_meta.filename, &src_idx_X, &src_idx_nX, 0); cBYE(status); status = rs_mmap(src_val_meta.filename, &src_val_X, &src_val_nX, 0); cBYE(status); status = rs_mmap(dst_lnk_meta.filename, &dst_lnk_X, &dst_lnk_nX, 0); cBYE(status); status = rs_mmap(dst_idx_meta.filename, &dst_idx_X, &dst_idx_nX, 0); cBYE(status); //-------------------------------------------------------- // Create output data files status = open_temp_file(&ofp, &opfile); cBYE(status); fclose_if_non_null(ofp); status = open_temp_file(&nn_ofp, &nn_opfile); cBYE(status); fclose_if_non_null(nn_ofp); status = mk_file(opfile, src_val_meta.n_sizeof * dst_nR); cBYE(status); status = mk_file(nn_opfile, sizeof(char) * dst_nR); cBYE(status); status = rs_mmap(opfile, &dst_val_X, &dst_val_nX, 1); cBYE(status); status = rs_mmap(nn_opfile, &nn_dst_val_X, &nn_dst_val_nX, 1); cBYE(status); //-------------------------------------------------------- // Core join status = core_join_I_I_I_I_I_I( (int *)cpy_src_lnk_X, (int *)cpy_src_idx_X, (int *)src_val_X, src_nR, nn_src_nR, (int *)cpy_dst_lnk_X, (int *)cpy_dst_idx_X, (int *)dst_val_X, nn_dst_val_X, dst_nR, nn_dst_nR, ijoin_op, &is_any_null); //-------------------------------------------------------- // Clean up aux data structures rs_munmap(cpy_src_lnk_X, cpy_src_lnk_nX); rs_munmap(cpy_src_idx_X, cpy_src_idx_nX); rs_munmap(cpy_dst_lnk_X, cpy_dst_lnk_nX); rs_munmap(cpy_dst_idx_X, cpy_dst_idx_nX); if ( cpy_src_lnk_file == NULL ) { status = del_fld(docroot, db, src_tbl,"_cpy_lnk"); cBYE(status); } else { unlink(cpy_src_lnk_file); free_if_non_null(cpy_src_lnk_file); } if ( cpy_src_idx_file == NULL ) { status = del_fld(docroot, db, src_tbl,"_cpy_idx"); cBYE(status); } else { unlink(cpy_src_idx_file); free_if_non_null(cpy_src_idx_file); } if ( cpy_dst_lnk_file == NULL ) { status = del_fld(docroot, db, dst_tbl,"_cpy_lnk"); cBYE(status); } else { unlink(cpy_dst_lnk_file); free_if_non_null(cpy_dst_lnk_file); } if ( cpy_dst_idx_file == NULL ) { status = del_fld(docroot, db, dst_tbl,"_cpy_idx"); cBYE(status); } else { unlink(cpy_dst_idx_file); free_if_non_null(cpy_dst_idx_file); } // Add output field to meta data sprintf(str_meta_data,"filename=%s:n_sizeof=%d:fldtype=%s", opfile, src_val_meta.n_sizeof, src_val_meta.fldtype); status = add_fld(docroot, db, dst_tbl, dst_fld, str_meta_data); cBYE(status); if ( is_any_null == true ) { status = add_aux_fld(docroot, db, dst_tbl, dst_fld, nn_opfile, "nn"); cBYE(status); } else { unlink(nn_opfile); free_if_non_null(nn_opfile); } BYE: if ( in_db == NULL ) { sqlite3_close(db); } rs_munmap(dst_val_X, dst_nX); rs_munmap(nn_dst_val_X, nn_dst_nX); rs_munmap(src_val_X, src_val_nX); rs_munmap(src_lnk_X, src_lnk_nX); rs_munmap(src_lnk_X, src_lnk_nX); rs_munmap(dst_lnk_X, dst_lnk_nX); rs_munmap(dst_idx_X, dst_idx_nX); free_if_non_null(nn_opfile); free_if_non_null(opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int num_in_range( char *t1, char *f1, char *t2, char *lb, char *ub, char *cnt ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *lb_X = NULL; size_t lb_nX = 0; char *ub_X = NULL; size_t ub_nX = 0; char *cnt_X = NULL; size_t cnt_nX = 0; int t1_id = INT_MIN, t2_id = INT_MIN; int f1_id = INT_MIN, lb_id = INT_MIN, ub_id = INT_MIN, cnt_id = INT_MIN; FLD_TYPE *f1_meta = NULL, *lb_meta = NULL, *ub_meta = NULL; long long nR1 = INT_MIN, nR2 = INT_MIN, chk_nR1 = INT_MIN; long long **cntptrs = NULL; // For multi-threading int nT; int rc; // result code for thread create pthread_t threads[MAX_NUM_THREADS]; pthread_attr_t attr; void *thread_status; char str_meta_data[1024]; char *opfile = NULL; FILE *ofp = NULL; //---------------------------------------------------------------- if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( lb == NULL ) || ( *lb == '\0' ) ) { go_BYE(-1); } if ( ( ub == NULL ) || ( *ub == '\0' ) ) { go_BYE(-1); } if ( ( cnt == NULL ) || ( *cnt == '\0' ) ) { go_BYE(-1); } if ( strcmp(t1, t2) == 0 ) { go_BYE(-1); } zero_string(str_meta_data, 1024); //-------------------------------------------------------- status = is_tbl(t1, &t1_id); cBYE(status); chk_range(t1_id, 0, g_n_tbl); nR1 = g_tbl[t1_id].nR; //-------------------------------------------------------- status = is_fld(NULL, t1_id, f1, &f1_id); cBYE(status); chk_range(f1_id, 0, g_n_fld); f1_meta = &(g_fld[f1_id]); status = rs_mmap(f1_meta->filename, &f1_X, &f1_nX, 0); cBYE(status); // Have not implemented case where f1 has null field if ( f1_meta->nn_fld_id >= 0 ) { go_BYE(-1); } // Have implemented only for int if ( strcmp(f1_meta->fldtype, "int") != 0 ) { cBYE(-1); } //-------------------------------------------------------- status = is_tbl(t2, &t2_id); cBYE(status); chk_range(t2_id, 0, g_n_tbl); nR2 = g_tbl[t2_id].nR; //-------------------------------------------------------- status = is_fld(NULL, t2_id, lb, &lb_id); cBYE(status); chk_range(lb_id, 0, g_n_fld); lb_meta = &(g_fld[lb_id]); status = rs_mmap(lb_meta->filename, &lb_X, &lb_nX, 0); cBYE(status); // Have not implemented case where lb has null field if ( lb_meta->nn_fld_id >= 0 ) { go_BYE(-1); } // Have implemented only for int if ( strcmp(lb_meta->fldtype, "int") != 0 ) { cBYE(-1); } //-------------------------------------------------------- status = is_fld(NULL, t2_id, ub, &ub_id); cBYE(status); chk_range(ub_id, 0, g_n_fld); ub_meta = &(g_fld[ub_id]); status = rs_mmap(ub_meta->filename, &ub_X, &ub_nX, 0); cBYE(status); // Have not implemented case where ub has null field if ( ub_meta->nn_fld_id >= 0 ) { go_BYE(-1); } // Have implemented only for int if ( strcmp(ub_meta->fldtype, "int") != 0 ) { cBYE(-1); } //-------------------------------------------------------- // Set up access to input int *inptr = (int *)f1_X; int *lbptr = (int *)lb_X; int *ubptr = (int *)ub_X; //-------------------------------------------------------- //--- Decide on how much parallelism to use for ( int i = 0; i < MAX_NUM_THREADS; i++ ) { g_thread_id[i] = i; g_num_rows[i] = 0; } status = get_num_threads(&nT); cBYE(status); //-------------------------------------------- #define MIN_ROWS_FOR_SUBSAMPLE 10000 // 1048576 if ( nR1 <= MIN_ROWS_FOR_SUBSAMPLE ) { nT = 1; } /* Don't create more threads than you can use */ if ( nT > nR1 ) { nT = nR1; } //-------------------------------------------- /* Make space for output */ long long filesz = nR2 * sizeof(long long); status = open_temp_file(&ofp, &opfile, filesz); cBYE(status); fclose_if_non_null(ofp); status = mk_file(opfile, filesz); cBYE(status); status = rs_mmap(opfile, &cnt_X, &cnt_nX, 1); long long *cntptr = (long long *)cnt_X; /* Make a holding tank for partial results */ cntptrs = malloc(nT * sizeof(long long *)); return_if_malloc_failed(cntptrs); for ( int i = 0; i < nT; i++ ) { cntptrs[i] = malloc(nR2 * sizeof(long long)); return_if_malloc_failed(cntptrs[i]); for ( long long j = 0; j <nR2; j++ ) { cntptrs[i][j] = 0; } } // Add count field to meta data sprintf(str_meta_data, "fldtype=long long:n_sizeof=8:filename=%s", opfile); status = add_fld(t2, cnt, str_meta_data, &cnt_id); cBYE(status); chk_range(cnt_id, 0, g_n_fld); //----------------------------------------------------------- // Now we count how much there is in each range // Set up global variables g_nT = nT; g_inptr = inptr; g_lbptr = lbptr; g_ubptr = ubptr; g_cntptrs = cntptrs; g_nR1 = nR1; g_nR2 = nR2; if ( g_nT == 1 ) { core_num_in_range(&(g_thread_id[0])); chk_nR1 = g_num_rows[0]; } else { chk_nR1 = 0; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for ( int t = 0; t < g_nT; t++ ) { rc = pthread_create(&threads[t], NULL, core_num_in_range, &(g_thread_id[t])); if ( rc ) { go_BYE(-1); } } /* Free attribute and wait for the other threads */ pthread_attr_destroy(&attr); for ( int t = 0; t < g_nT; t++ ) { rc = pthread_join(threads[t], &thread_status); if ( rc ) { go_BYE(-1); } chk_nR1 += g_num_rows[t]; } } if ( chk_nR1 != nR1 ) { go_BYE(-1); } // Accumulate partial results for ( long long i = 0; i < nR2; i++ ) { cntptr[i] = 0; for ( int j= 0; j < nT; j++ ) { cntptr[i] += cntptrs[j][i]; } } BYE: g_write_to_temp_dir = false; rs_munmap(f1_X, f1_nX); rs_munmap(lb_X, lb_nX); rs_munmap(ub_X, ub_nX); rs_munmap(cnt_X, cnt_nX); free_if_non_null(opfile); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int is_a_in_b( char *docroot, sqlite3 *in_db, char *t1, char *f1, char *t2, char *f2, char *cfld, char *src_f2, /* fld_to_fetch_from_t2 */ char *dst_f1 /* fld_to_place_in_t1 */ ) // STOP FUNC DECL { int status = 0; sqlite3 *db = NULL; char *f1_X = NULL; size_t f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *src_f2_X = NULL; size_t src_f2_nX = 0; char *dst_f1_X = NULL; size_t dst_f1_nX = 0; char *cfX = NULL; size_t cf_nX = 0; char *nn_f1_X = NULL; size_t nn_f1_nX = 0; char *nn_src_f2_X = NULL; size_t nn_src_f2_nX = 0; FLD_META_TYPE f1_meta, f2_meta, src_f2_meta; FLD_META_TYPE nn_f1_meta, nn_src_f2_meta; int *if1 = NULL, *if2 = NULL, *isrc_f2, *idst_f1 = NULL; int i1val, prev_i1val, isrc_val; long long *llf1 = NULL, *llf2 = NULL, *llsrc_f2, *lldst_f1 = NULL; long long ll1val, prev_ll1val, llsrc_val; long long nR1, nR2; FILE *ofp = NULL; char *opfile = NULL; FILE *dst_ofp = NULL; char *dst_opfile = NULL; char str_meta_data[4096]; int t1_id, t2_id, f2_id, ifldtype, src_f2_ifldtype; int iminval, imaxval; long long llminval, llmaxval; bool b_is_tbl, b_write_dst; //---------------------------------------------------------------- if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( cfld == NULL ) || ( *cfld == '\0' ) ) { /* condition field NOT provided */ if ( ( src_f2 == NULL ) || ( *src_f2 == '\0' ) || ( dst_f1 == NULL ) || ( *dst_f1 == '\0' ) ) { go_BYE(-1); } } else { if ( strcmp(f1, cfld) == 0 ) { go_BYE(-1); } if ( ( src_f2 != NULL ) && ( *src_f2 != '\0' ) ) { go_BYE(-1); } if ( ( dst_f1 != NULL ) && ( *dst_f1 != '\0' ) ) { go_BYE(-1); } } //-------------------------------------------------------- zero_string(str_meta_data, 4096); zero_fld_meta(&f1_meta); zero_fld_meta(&nn_f1_meta); zero_fld_meta(&f2_meta); /* Recall no null values for f2 */ zero_fld_meta(&src_f2_meta); zero_fld_meta(&nn_src_f2_meta); status = open_db_if_needed(docroot, in_db, &db); cBYE(status); //-------------------------------------------------------- status = is_tbl(docroot, db, t1, &b_is_tbl, &t1_id); cBYE(status); if ( !b_is_tbl ) { go_BYE(-1); } status = internal_get_nR(db, t1_id, &nR1); cBYE(status); status = fld_meta(docroot, db, t1, f1, -1, &f1_meta); cBYE(status); status = rs_mmap(f1_meta.filename, &f1_X, &f1_nX, 0); cBYE(status); status = mk_ifldtype(f1_meta.fldtype, &ifldtype); cBYE(status); status = is_tbl(docroot, db, t2, &b_is_tbl, &t2_id); cBYE(status); if ( !b_is_tbl ) { go_BYE(-1); } status = internal_get_nR(db, t2_id, &nR2); cBYE(status); status = fld_meta(docroot, db, t2, f2, -1, &f2_meta); cBYE(status); status = rs_mmap(f2_meta.filename, &f2_X, &f2_nX, 0); cBYE(status); if ( ( src_f2 != NULL ) && ( *src_f2 != '\0' ) ) { status = fld_meta(docroot, db, t2, src_f2, -1, &src_f2_meta); cBYE(status); status = rs_mmap(src_f2_meta.filename, &src_f2_X, &src_f2_nX, 0); cBYE(status); status = get_aux_field_if_it_exists( docroot, db, t2, src_f2_meta.id, "nn", &nn_src_f2_meta, &nn_src_f2_X, &nn_src_f2_nX); cBYE(status); status = mk_ifldtype(src_f2_meta.fldtype, &src_f2_ifldtype); cBYE(status); } /*------------------------------------------------------------*/ if ( strcmp(f1_meta.fldtype, f2_meta.fldtype) != 0 ) { fprintf(stderr, "Fields being compared must be same type \n"); go_BYE(-1); } /*------------------------------------------------------------*/ // There can be null values in f1 but not in f2 status = get_aux_field_if_it_exists(docroot, db, t1, f1_meta.id, "nn", &nn_f1_meta, &nn_f1_X, &nn_f1_nX); cBYE(status); status = is_nn_fld(docroot, db, t2, f2, &f2_id, NULL); cBYE(status); if ( f2_id > 0 ) { fprintf(stderr, "Field [%s] in Table [%s] has null values\n", f2, t2); fprintf(stderr, "NOT IMPLEMENTED\n"); go_BYE(-1); } /* Determine whether a destination value needs to be written in T1 */ if ( ( dst_f1 != NULL ) && ( *dst_f1 != '\0' ) ) { b_write_dst = true; } else { b_write_dst = false; } /*------------------------------------------------------------*/ /* Make space for output condition field */ status = open_temp_file(&ofp, &opfile); cBYE(status); fclose_if_non_null(ofp); status = mk_file(opfile, nR1 * sizeof(char)); cBYE(status); status = rs_mmap(opfile, &cfX, &cf_nX, 1); cBYE(status); /* Make space for output value field */ if ( ( dst_f1 != NULL ) && ( *dst_f1 != '\0' ) ) { status = open_temp_file(&dst_ofp, &dst_opfile); cBYE(status); fclose_if_non_null(dst_ofp); status = mk_file(dst_opfile, nR1 * src_f2_meta.n_sizeof); cBYE(status); status = rs_mmap(dst_opfile, &dst_f1_X, &dst_f1_nX, 0); } //-------------------------------------------------------- /* We can finally start with the computations */ if1 = (int *)f1_X; llf1 = (long long *)f1_X; if2 = (int *)f2_X; llf2 = (long long *)f2_X; isrc_f2 = (int *)src_f2_X; llsrc_f2 = (long long *)src_f2_X; idst_f1 = (int *)dst_f1_X; lldst_f1 = (long long *)dst_f1_X; /* We find the smallest and largest values of f2 which allow us to * skip the binary search over the values of f2 */ switch ( ifldtype ) { case FLDTYPE_INT : iminval = if2[0]; imaxval = if2[nR2-1]; break; case FLDTYPE_LONGLONG : llminval = llf2[0]; llmaxval = llf2[nR2-1]; break; default : go_BYE(-1); break; } /*------------------------------------------------------------------*/ bool prev_defined = false; /* prev_defined tells us whetrher prev_i1val or prev_ll1val are defined */ long long prev_pos = -1; // undefined value for ( long long i1 = 0; i1 < nR1; i1++ ) { bool is_found; long long pos; is_found = false; /* If f1 is null, then cfld and dst_f1 must also be null */ if ( ( nn_f1_X != NULL ) && ( nn_f1_X[i1] == FALSE ) ) { isrc_val = 0; cfX[i1] = FALSE; if ( b_write_dst ) { lldst_f1[i1] = 0; } continue; } /*----------------------------------------------------------------*/ switch ( ifldtype ) { case FLDTYPE_INT : i1val = if1[i1]; if ( ( i1val < iminval ) || ( i1val > imaxval ) ) { pos = -1; } else { if ( ( prev_defined ) && ( i1val == prev_i1val ) ) { if ( prev_pos >= 0 ) { pos = prev_pos; } else { status = bin_search_i(if2, nR2, i1val, &pos, ""); cBYE(status); } } else { status = bin_search_i(if2, nR2, i1val, &pos, ""); cBYE(status); } } if ( pos >= 0 ) { is_found = true; } if ( b_write_dst ) { if ( pos >= 0 ) { isrc_val = isrc_f2[pos]; } else { isrc_val = 0; } idst_f1[i1] = isrc_val; } break; case FLDTYPE_LONGLONG : ll1val = llf1[i1]; if ( ( ll1val < llminval ) || ( ll1val > llmaxval ) ) { pos = -1; } else { if ( ( prev_defined ) && ( ll1val == prev_ll1val ) ) { if ( prev_pos >= 0 ) { pos = prev_pos; } else { status = bin_search_ll(llf2, nR2, ll1val, &pos, ""); cBYE(status); } } else { status = bin_search_ll(llf2, nR2, ll1val, &pos, ""); cBYE(status); } } if ( pos >= 0 ) { is_found = true; } if ( b_write_dst ) { if ( pos >= 0 ) { llsrc_val = llsrc_f2[pos]; } else { llsrc_val = 0; } lldst_f1[i1] = llsrc_val; } break; default : go_BYE(-1); break; } prev_i1val = i1val; prev_ll1val = ll1val; prev_defined = true; if ( is_found ) { cfX[i1] = TRUE; } else { cfX[i1] = FALSE; } } fclose_if_non_null(ofp); fclose_if_non_null(dst_ofp); // Add output field to meta data if ( ( cfld != NULL ) && ( *cfld != '\0' ) ) { sprintf(str_meta_data, "n_sizeof=1:fldtype=bool:filename=%s", opfile); status = add_fld(docroot, db, t1, cfld, str_meta_data); cBYE(status); } else { switch ( src_f2_ifldtype ) { case FLDTYPE_INT : sprintf(str_meta_data, "n_sizeof=%lu:fldtype=int:filename=%s", sizeof(int), dst_opfile); break; case FLDTYPE_LONGLONG : sprintf(str_meta_data, "n_sizeof=%lu:fldtype=int:filename=%s", sizeof(long long), dst_opfile); break; } status = add_fld(docroot, db, t1, dst_f1, str_meta_data); cBYE(status); status = add_aux_fld(docroot, db, t1, dst_f1, opfile, "nn"); cBYE(status); } BYE: rs_munmap(f1_X, f1_nX); rs_munmap(f2_X, f2_nX); rs_munmap(src_f2_X, src_f2_nX); rs_munmap(dst_f1_X, dst_f1_nX); rs_munmap(cfX, cf_nX); rs_munmap(nn_f1_X, nn_f1_nX); rs_munmap(nn_src_f2_X, nn_src_f2_nX); if ( in_db == NULL ) { sqlite3_close(db); } free_if_non_null(opfile); fclose_if_non_null(ofp); free_if_non_null(dst_opfile); fclose_if_non_null(dst_ofp); return(status); }