int SHash::sh_fd_init(int _fd, int total, int k_len, int v_len, int write_flag) { fd = _fd; shm_flag = _SHASH_FILE_FLAG; key_len = k_len; value_len = v_len; if (sh_check_key_value_len(total) == -1) return -1; if (write_flag && file_size(fd) == 0) { if (total == 0) return -1; if (ftruncate(fd, old_size) == -1) { debug_log(("ftruncate fd=%d size=%llu: %s\n", fd, (unsigned long long)old_size, strerror(errno))); return -1; } } #ifndef WIN32 if ((mem = q_mmap(fd, &old_size, write_flag)) == NULL) { debug_log(("fd: %s\n", strerror(errno))); return -1; } #endif if (sh_init_head(write_flag) == -1) return -1; return 0; }
int fz_test(const char *file) { mapfile_t mt; q_mmap(file, &mt); memcpy(m_chapter, mt.start, mt.size); m_wiki_dict->dict_add_title("1", 1); m_wiki_dict->dict_add_title_done(); zim_parse(m_chapter, (int)mt.size, m_buf); printf("%s\n", m_buf); exit(0); }
//--------------------------------------------------------------- // START FUNC DECL int mk_bins( char *src_tbl, char *src_fld, char *dst_tbl, char *str_dst_nR ) // STOP FUNC DECL { int status = 0; TBL_REC_TYPE src_tbl_rec, dst_tbl_rec; FLD_REC_TYPE src_fld_rec, nn_src_fld_rec; FLD_REC_TYPE out_lb_fld_rec, out_ub_fld_rec, out_cnt_fld_rec; long long src_nR = LLONG_MIN, dst_nR = LLONG_MIN; int src_tbl_id = INT_MIN, dst_tbl_id = INT_MIN; int src_fld_id = INT_MIN, nn_src_fld_id = INT_MIN; int out_lb_fld_id, out_ub_fld_id, out_cnt_fld_id; #define BUFLEN 32 char buffer[BUFLEN]; char *X = NULL; size_t nX = 0; char *lb_X = NULL; size_t lb_nX = 0; char *ub_X = NULL; size_t ub_nX = 0; char *cnt_X = NULL; size_t cnt_nX = 0; char lb_opfile[MAX_LEN_FILE_NAME+1]; char ub_opfile[MAX_LEN_FILE_NAME+1]; char cnt_opfile[MAX_LEN_FILE_NAME+1]; int fldsz, lb_ddir_id = INT_MAX, ub_ddir_id = INT_MAX, cnt_ddir_id = INT_MAX; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( strcmp(src_tbl, dst_tbl) == 0 ) { go_BYE(-1); } zero_string(buffer, BUFLEN); zero_string(lb_opfile, (MAX_LEN_FILE_NAME+1)); zero_string(ub_opfile, (MAX_LEN_FILE_NAME+1)); zero_string(cnt_opfile, (MAX_LEN_FILE_NAME+1)); char *endptr; dst_nR = strtoll(str_dst_nR, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( dst_nR <= 1 ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = g_tbls[src_tbl_id].nR; if ( dst_nR >= src_nR ) { dst_nR = src_nR; } //-------------------------------------------------------- status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id, &src_fld_rec, &nn_src_fld_id, &nn_src_fld_rec); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); status = get_data(src_fld_rec, &X, &nX, false); cBYE(status); if ( nn_src_fld_id >= 0 ) { go_BYE(-1); } if ( src_fld_rec.fldtype != I4 ) { go_BYE(-1); } //-------------------------------------------------------- zero_string(buffer, BUFLEN); status = int_get_meta(src_tbl_id, src_fld_id, "sort_type", buffer); cBYE(status); if ( strcmp(buffer, "ascending") != 0 ) { fprintf(stderr, "Field [%s] in Table [%s] not sorted ascending\n", src_fld, src_tbl); go_BYE(-1); } //-------------------------------------------------------- // Create output space. May need to be truncated later on status = get_fld_sz(src_fld_rec.fldtype, &fldsz); cBYE(status); status = mk_temp_file(lb_opfile, (dst_nR * fldsz), &lb_ddir_id); cBYE(status); status = q_mmap(lb_ddir_id, lb_opfile, &lb_X, &lb_nX, true); cBYE(status); status = mk_temp_file(ub_opfile, (dst_nR * fldsz), &ub_ddir_id); cBYE(status); status = q_mmap(ub_ddir_id, ub_opfile, &ub_X, &ub_nX, true); cBYE(status); status = mk_temp_file(cnt_opfile, (dst_nR * sizeof(long long)), &cnt_ddir_id); cBYE(status); status = q_mmap(cnt_ddir_id, cnt_opfile, &cnt_X, &cnt_nX, true); cBYE(status); //------------------------------------------------------ // Now we can finally get to work! int *I4lb = (int *)lb_X; int *I4ub = (int *)ub_X; for ( int i = 0; i < dst_nR; i++ ) { I4lb[i] = INT_MAX; I4ub[i] = INT_MIN; } long long *I8cnt = (long long *)cnt_X; int *items = (int *)X; for ( int i = 0; i < dst_nR; i++ ) { I8cnt[i] = 0; } int idx = 0; int num_bins_wanted = dst_nR; int num_bins_created = 0; int num_items_left = src_nR; int currval; for ( ; ; ) { long long stopidx_1, stopidx_2, stopidx_3, pos; int num_items_consumed = src_nR - num_items_left; #ifdef DEBUG long long chk_cnt = 0; for ( int i = 0; i < dst_nR; i++ ) { chk_cnt += I8cnt[i]; } if ( chk_cnt != num_items_consumed ) { go_BYE(-1); } #endif if ( num_bins_created >= num_bins_wanted ) { go_BYE(-1); } int bin_size = num_items_left / (num_bins_wanted - num_bins_created); if ( bin_size == 0 ) { bin_size++; } stopidx_1 = idx + bin_size; if ( stopidx_1 >= src_nR ) { /* close out the last bin */ currval = items[idx]; I4lb[num_bins_created] = currval; if ( I4lb[num_bins_created-1] == currval ) { go_BYE(-1); } I4ub[num_bins_created] = items[src_nR-1]; I8cnt[num_bins_created] = src_nR - idx ; num_bins_created++; break; } currval = items[idx]; I4lb[num_bins_created] = currval; // do binary search to find the next location with same val as current val status=bin_search_I4(items+idx, num_items_left, currval, &pos, "highest"); if ( pos < 0 ) { go_BYE(-1); } stopidx_2 = pos + idx; if ( stopidx_2 > stopidx_1 ) { /* This means that the number of items with value "currval" * exceeds the bin size. So, in this case lb=ub */ I4ub[num_bins_created] = currval; I8cnt[num_bins_created] = stopidx_2 - idx + 1; idx = stopidx_2 + 1; } else { int nextval = items[stopidx_1]; I4ub[num_bins_created] = nextval; status = bin_search_I4(items+idx, num_items_left, nextval, &pos, "highest"); if ( pos < 0 ) { go_BYE(-1); } stopidx_3 = pos + idx; if ( ( stopidx_3 - stopidx_1 + 1 ) <= 0 ) { printf("HI\n"); } I8cnt[num_bins_created] = stopidx_3 - idx + 1; idx = stopidx_3 + 1; } num_items_left -= I8cnt[num_bins_created]; if ( num_items_left + idx != src_nR ) { go_BYE(-1); } num_bins_created++; if ( idx == src_nR ) { break; } // consumed all input if ( idx > src_nR ) { go_BYE(-1); } } if ( num_bins_created > dst_nR ) { go_BYE(-1); } dst_nR = num_bins_created; #ifdef DEBUG long long chk_cnt = 0; for ( int i = 0; i < dst_nR; i++ ) { if ( I4lb[i] > I4ub[i] ) { go_BYE(-1); } if ( i > 0 ) { if ( I4lb[i] <= I4lb[i-1] ) { go_BYE(-1); } } // TODO P2: Is this check correct? if ( I8cnt[i] <= 0 ) { go_BYE(-1); } chk_cnt += I8cnt[i]; } if ( chk_cnt != src_nR ) { go_BYE(-1); } #endif //------------------------------------------------------ rs_munmap(lb_X, lb_nX); rs_munmap(ub_X, ub_nX); rs_munmap(cnt_X, cnt_nX); // truncate to proper size q_trunc(lb_ddir_id, lb_opfile, (dst_nR * fldsz)); q_trunc(ub_ddir_id, ub_opfile, (dst_nR * fldsz)); q_trunc(cnt_ddir_id, cnt_opfile, (dst_nR * sizeof(long long))); //----------------------------------------------------------- status = del_tbl(dst_tbl, -1); cBYE(status); sprintf(buffer, "%lld", dst_nR); status = add_tbl(dst_tbl, buffer, &dst_tbl_id, &dst_tbl_rec); cBYE(status); //----------------------------------------------------------- status = add_fld(dst_tbl_id, "lb", lb_ddir_id, lb_opfile, src_fld_rec.fldtype, -1, &out_lb_fld_id, &out_lb_fld_rec); cBYE(status); status = add_fld(dst_tbl_id, "ub", ub_ddir_id, ub_opfile, src_fld_rec.fldtype, -1, &out_ub_fld_id, &out_ub_fld_rec); cBYE(status); // Add count field to dst table status = add_fld(dst_tbl_id, "cnt", cnt_ddir_id, cnt_opfile, I8, -1, &out_cnt_fld_id, &out_cnt_fld_rec); cBYE(status); // Since src_fld is sorted ascending, so is dst_fld status = int_set_meta(dst_tbl_id, out_lb_fld_id, "sort_type", "ascending"); status = int_set_meta(dst_tbl_id, out_ub_fld_id, "sort_type", "ascending"); //----------------------------------------------------------- BYE: rs_munmap(X, nX); rs_munmap(lb_X, lb_nX); rs_munmap(ub_X, ub_nX); rs_munmap(cnt_X, cnt_nX); return(status); }
int WikiManage::wiki_do_url(void *type, int sock, HttpParse *http, int idx) { int len; char *data = m_math_data[idx]; WikiSocket *ws; ws = (WikiSocket *)type; const char *url = http->hp_url(); if (strncasecmp(url, "curr:", 5) == 0) { struct content_split *p = &m_split_pos[atoi(url + 5) + 1]; #if 0 if ((len = m_wiki_zh->wz_convert_2hans(m_curr_content + p->start, p->len, data)) <= 0) #endif { data = m_curr_content + p->start; len = p->len; } ws->ws_http_output_head(sock, 200, "text/html", len); ws->ws_http_output_body(sock, data, len); #if 0 int i; for (i = 0; i < len / 1024; i++) { ws->ws_http_output_body(sock, data + i * 1024, 1024); } if (len % 1024 > 0) { ws->ws_http_output_body(sock, data + i * 1024, len % 1024); } #endif return 0; } /* body image */ if (url[0] == 'B') { mapfile_t mt; const char *file = m_wiki_config->wc_get_body_image_path(); if (q_mmap(file, &mt)) { ws->ws_http_output_head(sock, 200, "image/png", (int)mt.size); ws->ws_http_output_body(sock, (char *)mt.start, mt.size); q_munmap(&mt); } return 0; } /* image */ if (url[0] == 'I') { char *p = (char *)url + 2; int size, one_block; WikiImage *wiki_image = CURR_WIKI(image); if (wiki_image == NULL) goto not_found; url_convert(p); pthread_mutex_lock(&m_mutex); if (wiki_image->we_reset(idx, p, &size) == 0) { #if 0 if (strncasecmp(url + strlen(url) - 4, ".svg", 4) == 0) ws->ws_http_output_head(sock, 200, "image/svg+xml", size); else #endif ws->ws_http_output_head(sock, 200, "image/png", size); while (wiki_image->we_read_next(idx, data, &one_block)) { ws->ws_http_output_body(sock, data, one_block); } } else { ws->ws_http_output_head(sock, 404, "image/png", 0); } pthread_mutex_unlock(&m_mutex); return 0; } /* math */ if (url[0] == 'M') { WikiMath *wiki_math = CURR_WIKI(math); if (wiki_math != NULL && wiki_math->wm_find(url, data, &len, 1)) { ws->ws_http_output_head(sock, 200, "image/png", len); ws->ws_http_output_body(sock, data, len); } else ws->ws_http_output_head(sock, 404, "image/png", 0); return 0; } not_found: ws->ws_http_output_head(sock, 404, "text/html", 0); return 0; }
// START FUNC DECL int ext_approx_frequent( char *tbl, char *fld, char *cfld, char *outtbl, char *str_min_freq, char *str_max_err, int *ptr_is_good ) // STOP FUNC DECL { int status = 0; TBL_REC_TYPE outtbl_rec; int outtbl_id; TBL_REC_TYPE tbl_rec; int tbl_id; FLD_REC_TYPE fld_rec; int fld_id; FLD_REC_TYPE cfld_rec; int cfld_id; FLD_REC_TYPE out_val_fld_rec; int out_val_fld_id; FLD_REC_TYPE out_cnt_fld_rec; int out_cnt_fld_id; FLD_REC_TYPE nn_fld_rec; int nn_fld_id; FLD_REC_TYPE nn_cfld_rec; int nn_cfld_id; char *fld_X = NULL; size_t fld_nX = 0; char *nn_fld_X = NULL; size_t nn_fld_nX = 0; char *out_val_X = NULL; size_t out_val_nX = 0; char *out_cnt_X = NULL; size_t out_cnt_nX = 0; char *cfld_X = NULL; size_t cfld_nX = 0; int fldsz = 0; size_t filesz = 0; long long nR = 0, nR_out = 0, actual_nR_out = 0; int out_val_ddir_id = -1, out_val_fileno = -1; int out_cnt_ddir_id = -1, out_cnt_fileno = -1; long long min_freq; long long max_err; if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( fld == NULL ) || ( *fld == '\0' ) ) { go_BYE(-1); } if ( ( outtbl == NULL ) || ( *outtbl == '\0' ) ) { go_BYE(-1); } if ( ( str_min_freq == NULL ) || ( *str_min_freq == '\0' ) ) { go_BYE(-1); } if ( ( str_max_err == NULL ) || ( *str_max_err == '\0' ) ) { go_BYE(-1); } if ( strcmp(tbl, outtbl) == 0 ) { go_BYE(-1); } status = stoI8(str_min_freq, &min_freq); cBYE(status); status = stoI8(str_max_err, &max_err); cBYE(status); if ( min_freq <= 1 ) { go_BYE(-1); } if ( max_err <= 1 ) { go_BYE(-1); } if ( ( min_freq - max_err ) < 1 ) { go_BYE(-1); } status = is_tbl(tbl, &tbl_id, &tbl_rec); cBYE(status); if ( tbl_id < 0 ) { go_BYE(-1); } nR = tbl_rec.nR; status = is_fld(NULL, tbl_id, fld, &fld_id, &fld_rec, &nn_fld_id,&nn_fld_rec); cBYE(status); if ( fld_id < 0 ) { go_BYE(-1); } status = get_data(fld_rec, &fld_X, &fld_nX, false); cBYE(status); if ( fld_rec.fldtype != I4 ) { go_BYE(-1); } // Get nn field for f1 if if it exists if ( nn_fld_id >= 0 ) { status = get_data(nn_fld_rec, &nn_fld_X, &nn_fld_nX, false); cBYE(status); if ( ( cfld == NULL ) && ( *cfld == '\0' ) ) { go_BYE(-1); } cfld_X = nn_fld_X; } else { if ( ( cfld == NULL ) && ( *cfld == '\0' ) ) { status = is_fld(NULL, tbl_id, cfld, &cfld_id, &cfld_rec, &nn_cfld_id,&nn_cfld_rec); cBYE(status); if ( cfld_id < 0 ) { go_BYE(-1); } status = get_data(cfld_rec, &cfld_X, &cfld_nX, false); cBYE(status); if ( cfld_rec.fldtype != I1 ) { go_BYE(-1); } } else { cfld_X = NULL; } } // TODO P3: Compute sum of cfld here and send to Kishore. // Compute worst case for nR_out nR_out = nR / ( min_freq - max_err ); zero_fld_rec(&out_cnt_fld_rec); out_cnt_fld_rec.fldtype = I4; // Note that count is I4 status = get_fld_sz(out_cnt_fld_rec.fldtype, &fldsz); cBYE(status); filesz = nR_out * fldsz; status = mk_temp_file(filesz, &out_cnt_ddir_id, &out_cnt_fileno); cBYE(status); status = q_mmap(out_cnt_ddir_id, out_cnt_fileno, &out_cnt_X, &out_cnt_nX, true); cBYE(status); zero_fld_rec(&out_val_fld_rec); out_val_fld_rec.fldtype = fld_rec.fldtype; status = get_fld_sz(out_val_fld_rec.fldtype, &fldsz); cBYE(status); filesz = nR_out * fldsz; status = mk_temp_file(filesz, &out_val_ddir_id, &out_val_fileno); cBYE(status); status = q_mmap(out_val_ddir_id, out_val_fileno, &out_val_X, &out_val_nX, true); cBYE(status); // Now the real work begins status = approx_frequent((int *)fld_X, (char *)cfld_X, nR, min_freq, max_err, (int *)out_val_X, (int *)out_cnt_X, nR_out, &actual_nR_out, ptr_is_good); cBYE(status); // truncate output as needed if ( actual_nR_out <= 0 ) { go_BYE(-1); } status = get_fld_sz(out_val_fld_rec.fldtype, &fldsz); cBYE(status); filesz = actual_nR_out * fldsz; status = q_trunc(out_val_ddir_id, out_val_fileno, filesz); cBYE(status); status = get_fld_sz(I4, &fldsz); cBYE(status); filesz = actual_nR_out * fldsz; status = q_trunc(out_cnt_ddir_id, out_cnt_fileno, filesz); cBYE(status); //---------------------------------------- char strbuf[32]; zero_string(strbuf, 32); sprintf(strbuf, "%lld", actual_nR_out); status = add_tbl(outtbl, strbuf, &outtbl_id, &outtbl_rec); cBYE(status); status = add_fld(outtbl_id, "cnt", out_cnt_ddir_id, out_cnt_fileno, &out_cnt_fld_id, &out_cnt_fld_rec); cBYE(status); status = add_fld(outtbl_id, fld, out_val_ddir_id, out_val_fileno, &out_val_fld_id, &out_val_fld_rec); cBYE(status); BYE: rs_munmap(fld_X, fld_nX); rs_munmap(out_val_X, out_val_nX); rs_munmap(out_cnt_X, out_cnt_nX); return status ; }
// last review 9/10/2013 //--------------------------------------------------------------- // START FUNC DECL int subsample( char *t1, char *f1, char *str_nR2, char *t2, char *f2 ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; TBL_REC_TYPE t1_rec, t2_rec; FLD_REC_TYPE f1_rec, f2_rec; FLD_REC_TYPE nn_f1_rec; long long nR1, nR2; int t1_id = INT_MIN, t2_id = INT_MIN; int f1_id = INT_MIN, f2_id = INT_MIN; int nn_f1_id = INT_MIN; int fldsz = INT_MAX; size_t filesz = 0; int ddir_id = -1, fileno = -1; //---------------------------------------------------------------- if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( strcmp(t1, t2) == 0 ) { go_BYE(-1); } status = stoI8(str_nR2, &nR2); cBYE(status); if ( nR2 <= 0 ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); if ( t1_id < 0 ) { go_BYE(-1); } nR1 = t1_rec.nR; if ( nR1 <= nR2 ) { go_BYE(-1); } status = is_fld(NULL, t1_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); if ( f1_id < 0 ) { go_BYE(-1); } status = get_data(f1_rec, &f1_X, &f1_nX, false); cBYE(status); // Not implemented for following cases if ( nn_f1_id >= 0 ) { go_BYE(-1); } switch ( f1_rec.fldtype ) { case I4 : case I8 : break; default : go_BYE(-1); break; } //--------------------------------------------- status = get_fld_sz(f1_rec.fldtype, &fldsz); cBYE(status); filesz = nR2 * fldsz; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &f2_X, &f2_nX, 1); cBYE(status); long long block_size_2; int nT; status = partition(nR2, 1024, -1, &block_size_2, &nT); cBYE(status); long long block_size_1 = (nR1 / nT); int ratio = nR1 / nR2; if ( ratio == 1 ) { ratio++; } for ( int tid = 0; tid < nT; tid++ ) { if ( status < 0 ) { continue; } int *idxs = NULL; long long lb1 = tid * block_size_1; /* input lb */ long long ub1 = lb1 + block_size_1; /* input ub */ if ( ub1 > nR1 ) { ub1 = nR1; } /* corner case */ if ( ub1 <= lb1 ) { status = -1; continue; } long long range1 = ub1 - lb1; if ( range1 >= 16*1048576 ) { status = -1; continue; } idxs = malloc(range1 * sizeof(int)); if ( idxs == NULL ) { status = -1; continue; } for ( int i = 0; i < range1; i++ ) { idxs[i] = i; } status = permute_I4(idxs, range1); if ( status < 0 ) { continue; } long long lb2 = tid * block_size_2; /* output lb */ long long ub2 = lb2 + block_size_2; /* output ub */ if ( tid == (nT-1) ) { ub2 = nR2; } /* corner case */ long long range2 = ub2 - lb2; int *I4f1 = NULL, *I4f2 = NULL; long long *I8f1 = NULL, *I8f2 = NULL; I4f1 = (int *)f1_X; I4f1 += lb1; I4f2 = (int *)f2_X; I4f2 += lb2; I8f1 = (long long *)f1_X; I8f1 += lb1; I8f2 = (long long *)f2_X; I8f2 += lb2; switch ( f1_rec.fldtype ) { case I4 : for ( int i = 0; i < range2; i++ ) { I4f2[i] = I4f1[idxs[i]]; } break; case I8 : for ( int i = 0; i < range2; i++ ) { I8f2[i] = I8f1[idxs[i]]; } break; default : if ( status == 0 ) { WHEREAMI; } status = -1; continue; break; } free_if_non_null(idxs); } cBYE(status); //--- Add to t2 status = del_tbl(t2, -1); cBYE(status); char strbuf[32]; sprintf(strbuf, "%lld", nR2); status = add_tbl(t2, strbuf, &t2_id, &t2_rec); cBYE(status); zero_fld_rec(&f2_rec); f2_rec.fldtype = f1_rec.fldtype; status = add_fld(t2_id, f2, ddir_id, fileno, &f2_id, &f2_rec); cBYE(status); BYE: rs_munmap(f1_X, f1_nX); rs_munmap(f2_X, f2_nX); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int count( char *src_tbl, char *fk_dst, char *cfld, char *dst_tbl, char *cnt_fld ) // STOP FUNC DECL { int status = 0; char *X = NULL; size_t nX = 0; char *op_X = NULL; size_t n_op_X = 0; char *cfld_X = NULL; size_t cfld_nX = 0; TBL_REC_TYPE src_tbl_rec; int src_tbl_id; TBL_REC_TYPE dst_tbl_rec; int dst_tbl_id; FLD_REC_TYPE fk_dst_rec; int fk_dst_id; FLD_REC_TYPE nn_fk_dst_rec; int nn_fk_dst_id; FLD_REC_TYPE cfld_rec; int cfld_id; FLD_REC_TYPE nn_cfld_rec; int nn_cfld_id; FLD_REC_TYPE cnt_fld_rec; int cnt_fld_id; char opfile[MAX_LEN_FILE_NAME+1]; long long src_nR, dst_nR; #define MAX_LEN 32 char str_dst_nR[MAX_LEN]; int **partial_counts = NULL; int nT = 0; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( fk_dst == NULL ) || ( *fk_dst == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( ( cnt_fld == NULL ) || ( *cnt_fld == '\0' ) ) { go_BYE(-1); } zero_string(str_dst_nR, MAX_LEN); zero_string(opfile, (MAX_LEN_FILE_NAME+1)); //-------------------------------------------------------- status = is_tbl(dst_tbl, &dst_tbl_id, &dst_tbl_rec); cBYE(status); chk_range(dst_tbl_id, 0, g_n_tbl); dst_nR = g_tbls[dst_tbl_id].nR; if ( dst_nR >= INT_MAX ) { go_BYE(-1); } status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = g_tbls[src_tbl_id].nR; if ( src_nR >= INT_MAX ) { go_BYE(-1); } status = is_fld(NULL, src_tbl_id, fk_dst, &fk_dst_id, &fk_dst_rec, &nn_fk_dst_id, &nn_fk_dst_rec); cBYE(status); chk_range(fk_dst_id, 0, g_n_fld); status = get_data(fk_dst_rec, &X, &nX, 0); cBYE(status); if ( nn_fk_dst_id >= 0 ) { fprintf(stderr, "NOT IMPLEMENTED\n"); go_BYE(-1); } if ( ( cfld != NULL ) && ( *cfld != '\0' ) ) { status = is_fld(NULL, src_tbl_id, cfld, &cfld_id, &cfld_rec, &nn_cfld_id, &nn_cfld_rec); if ( cfld_id >= 0 ) { if ( cfld_rec.fldtype != I1 ) { go_BYE(-1); } if ( nn_cfld_id >= 0 ) { go_BYE(-1); } } status = get_data(cfld_rec, &cfld_X, &cfld_nX, 0); cBYE(status); } //------------------------------------------------------ int ddir_id = INT_MAX; status = mk_temp_file(opfile, (dst_nR * sizeof(int)), &ddir_id); cBYE(status); status = q_mmap(ddir_id, opfile, &op_X, &n_op_X, true); cBYE(status); int *cntI4 = (int *)op_X; for ( int i = 0; i < dst_nR ; i++ ) { cntI4[i] = 0; } //------------------------------------------------------ if ( dst_nR > INT_MAX ) { go_BYE(-1); } /* required by count_In */ bool is_sequential = true; if ( dst_nR > 1048576 ) { fprintf(stderr, "Count(%s) = %lld > 1048576. Use another algorithm\n", dst_tbl, dst_nR); go_BYE(-1); } // TODO: Need to adjust parallelism better than current hack if ( ( dst_nR <= 32768 ) && ( src_nR > 1048576 ) ) { is_sequential = false; } // Initialize counters to 0 #ifdef IPP ippsZero_32s(cntI4, dst_nR); #else for ( int i = 0; i < dst_nR; i++ ) { cntI4[i] = 0; } #endif // TODO: Parallelism does not seem to provide any speedup at all // is_sequential = true; /* TODO P1: parallel version taking longer !!! */ // fprintf(stderr, "forcing sequential execution in count() \n"); int max_num_chunks = g_num_cores; long long min_block_size = 8192, block_size; if ( is_sequential == false ) { status = partition(src_nR, min_block_size, max_num_chunks, &block_size, &nT); cBYE(status); partial_counts = (int **)malloc(nT * sizeof(int *)); return_if_malloc_failed(partial_counts); for ( int tid = 0; tid < nT; tid++ ) { partial_counts[tid] = (int *)malloc(dst_nR * sizeof(int)); return_if_malloc_failed(partial_counts[tid]); } } // fprintf(stderr, "nT = %d \n", nT); // fprintf(stderr, "block_size = %lld \n", block_size); // fprintf(stderr, "src_nR = %lld \n", src_nR); if ( cfld_id >= 0 ) { if ( is_sequential ) { switch ( fk_dst_rec.fldtype ) { case I1 : status = count_nn_I1((char *)X, src_nR, cfld_X, cntI4, dst_nR); cBYE(status); break; case I2 : status = count_nn_I2((short *)X, src_nR, cfld_X, cntI4, dst_nR); cBYE(status); break; case I4 : status = count_nn_I4((int *)X, src_nR, cfld_X, cntI4, dst_nR); cBYE(status); break; case I8 : status = count_nn_I8((long long *)X, src_nR, cfld_X, cntI4, dst_nR); cBYE(status); break; default : go_BYE(-1); break; } } else { cilkfor ( int tid = 0; tid < nT; tid++ ) { // Initialize counts to 0 int *partial_counts_t = partial_counts[tid]; // Initialize counters to 0 #ifdef IPP ippsZero_32s(partial_counts_t, dst_nR); #else assign_const_I4(partial_counts_t, 0, dst_nR); #endif long long lb = block_size * tid; long long ub = lb + block_size; if ( tid == (nT-1) ) { ub = src_nR; } char *inI1 = (char *)X; inI1 += lb; short *inI2 = (short *)X; inI2 += lb; int *inI4 = (int *)X; inI4 += lb; long long *inI8 = (long long *)X; inI8 += lb; long long t_src_nR = ub - lb; switch ( fk_dst_rec.fldtype ) { case I1 : status = count_nn_I1(inI1, t_src_nR, cfld_X, partial_counts_t, dst_nR); cBYE(status); break; case I2 : status = count_nn_I2(inI2, t_src_nR, cfld_X, partial_counts_t, dst_nR); cBYE(status); break; case I4 : status = count_nn_I4(inI4, t_src_nR, cfld_X, partial_counts_t, dst_nR); cBYE(status); break; case I8 : status = count_nn_I8(inI8, t_src_nR, cfld_X, partial_counts_t, dst_nR); cBYE(status); break; default : go_BYE(-1); break; } } } } else { if ( is_sequential ) {
// last review 9/5/2013 //--------------------------------------------------------------- // START FUNC DECL int f1f2opf3( char *tbl, char *f1, char *f2, char *op, char *f3 ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *nn_f1_X = NULL; size_t nn_f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *nn_f2_X = NULL; size_t nn_f2_nX = 0; FLD_REC_TYPE f1_rec, nn_f1_rec; FLD_REC_TYPE f2_rec, nn_f2_rec; FLD_REC_TYPE f3_rec, nn_f3_rec; long long nR; TBL_REC_TYPE tbl_rec; int tbl_id = INT_MIN; FLD_TYPE f3type; int f1_id = INT_MIN, nn_f1_id = INT_MIN; int f2_id = INT_MIN, nn_f2_id = INT_MIN; int f3_id = INT_MIN, nn_f3_id = INT_MIN; char *op_X = NULL; size_t n_op_X = 0; char *nn_op_X = NULL; size_t n_nn_op_X = 0; int ddir_id = -1, fileno = -1; int nn_ddir_id = -1, nn_fileno = -1; //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( f3 == NULL ) || ( *f3 == '\0' ) ) { go_BYE(-1); } if ( ( op == NULL ) || ( *op == '\0' ) ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id, &tbl_rec); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); nR = tbl_rec.nR; //-------------------------------------------------------- status = is_fld(NULL, tbl_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); chk_range(f1_id, 0, g_n_fld); status = get_data(f1_rec, &f1_X, &f1_nX, false); cBYE(status); if ( nn_f1_id >= 0 ) { status = get_data(nn_f1_rec, &nn_f1_X, &nn_f1_nX, false); cBYE(status); } //-------------------------------------------------------- status = is_fld(NULL, tbl_id, f2, &f2_id, &f2_rec, &nn_f2_id, &nn_f2_rec); cBYE(status); chk_range(f2_id, 0, g_n_fld); status = get_data(f2_rec, &f2_X, &f2_nX, false); cBYE(status); if ( nn_f2_id >= 0 ) { status = get_data(nn_f2_rec, &nn_f2_X, &nn_f2_nX, false); cBYE(status); } //-------------------------------------------------------- if ( ( strcmp(op, "+") == 0 ) || ( strcmp(op, "-") == 0 ) || ( strcmp(op, "*") == 0 ) || ( strcmp(op, "/") == 0 ) || ( strcmp(op, "&&") == 0 ) || ( strcmp(op, "||") == 0 ) || ( strcmp(op, ">") == 0 ) || ( strcmp(op, "<") == 0 ) || ( strcmp(op, ">=") == 0 ) || ( strcmp(op, "<=") == 0 ) || ( strcmp(op, "!=") == 0 ) || ( strcmp(op, "==") == 0 ) || ( strcmp(op, "concat") == 0 ) || ( strcmp(op, "&&!") == 0 ) || /* a and not b */ ( strcmp(op, "&") == 0 ) || /* bitwise and */ ( strcmp(op, "|") == 0 ) || /* bitwise or */ ( strcmp(op, "^") == 0 ) || /* bitwise xor */ ( strcmp(op, "<<") == 0 ) || /* shift left */ ( strcmp(op, ">>") == 0 ) /* shift right */ ) { /*-START: Allocate space ------------------------------*/ status = get_type_op_fld("f1f2opf3", op, f1_rec.fldtype, f2_rec.fldtype, "", &f3type); cBYE(status); int fldsz= 0; size_t filesz = 0; if ( f3type == B ) { status = get_file_size_B(nR, &filesz); cBYE(status); } else { int fldsz; status = get_fld_sz(f3type, &fldsz); cBYE(status); filesz = fldsz * nR; } status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &op_X, &n_op_X, 1); cBYE(status); if ( ( nn_f1_X != NULL ) || ( nn_f2_X != NULL ) ) { status = get_fld_sz(I1, &fldsz); cBYE(status); filesz = fldsz * nR; status = mk_temp_file(filesz, &nn_ddir_id, &nn_fileno); cBYE(status); status = q_mmap(nn_ddir_id, nn_fileno, &nn_op_X, &n_nn_op_X, true); cBYE(status); } /*-STOP: Allocate space ------------------------------*/ status = vec_f1f2opf3(nR, f1_rec.fldtype, f2_rec.fldtype, f1_X, nn_f1_X, f2_X, nn_f2_X, op, f3type, op_X, nn_op_X); cBYE(status); zero_fld_rec(&f3_rec); f3_rec.fldtype = f3type; status = add_fld(tbl_id, f3, ddir_id, fileno, &f3_id, &f3_rec); cBYE(status); if ( nn_fileno > 0 ) { zero_fld_rec(&nn_f3_rec); nn_f3_rec.fldtype = I1; status = add_aux_fld(NULL, tbl_id, NULL, f3_id, nn_ddir_id, nn_fileno, nn, &nn_f3_id, &nn_f3_rec); cBYE(status); } } else { fprintf(stderr, "Invalid op = [%s] \n", op); go_BYE(-1); } BYE: rs_munmap(op_X, n_op_X); rs_munmap(nn_op_X, n_nn_op_X); rs_munmap(f1_X, f1_nX); rs_munmap(nn_f1_X, nn_f1_nX); rs_munmap(f2_X, f2_nX); rs_munmap(nn_f2_X, nn_f2_nX); return(status); }
// START FUNC DECL int udf_deg2( char *TM_tbl, /* member table */ char *TM_TC_lb, /* I8 */ char *TM_TC_cnt, /* I4 */ char *TD1_tbl, /* first degree connections */ char *TD1_fk_TM, /* I4 */ char *TC_tbl, /* connections */ char *TC_mid, /* I4 */ char *TD2_tbl, /* second degree connections */ char *TD2_mid /* I4 */ ) // STOP FUNC DECL { int status = 0; int *deg2 = NULL; int n_deg2 = 0; int *alt_deg2 = NULL, *xloc = NULL; unsigned int a, b; int n_alt_deg2; int block_size, nT; char *TM_TC_lb_X = NULL; size_t TM_TC_lb_nX = 0; char *TM_TC_cnt_X = NULL; size_t TM_TC_cnt_nX = 0; char *TC_mid_X = NULL; size_t TC_mid_nX = 0; char *TD2_mid_X = NULL; size_t TD2_mid_nX = 0; char *TD1_fk_TM_X = NULL; size_t TD1_fk_TM_nX = 0; int TM_tbl_id = -1; TBL_REC_TYPE TM_tbl_rec; long long nR_TM; int TC_tbl_id = -1; TBL_REC_TYPE TC_tbl_rec; long long nR_TC; int TD1_tbl_id = -1; TBL_REC_TYPE TD1_tbl_rec; long long nR_TD1; int TD2_tbl_id = -1; TBL_REC_TYPE TD2_tbl_rec; long long nR_TD2; int nn_fld_id = -1; FLD_REC_TYPE nn_fld_rec; int TM_TC_lb_fld_id = -1; FLD_REC_TYPE TM_TC_lb_fld_rec; int TM_TC_cnt_fld_id = -1; FLD_REC_TYPE TM_TC_cnt_fld_rec; int TC_mid_fld_id = -1; FLD_REC_TYPE TC_mid_fld_rec; int TD1_fk_TM_fld_id = -1; FLD_REC_TYPE TD1_fk_TM_fld_rec; int TD2_mid_fld_id = -1; FLD_REC_TYPE TD2_mid_fld_rec; if ( ( TM_tbl == NULL ) || ( *TM_tbl == '\0' ) ) { go_BYE(-1); } if ( ( TC_tbl == NULL ) || ( *TC_tbl == '\0' ) ) { go_BYE(-1); } if ( ( TD1_tbl == NULL ) || ( *TD1_tbl == '\0' ) ) { go_BYE(-1); } if ( ( TD2_tbl == NULL ) || ( *TD2_tbl == '\0' ) ) { go_BYE(-1); } if ( strcmp(TD2_tbl, TM_tbl) == 0 ) { go_BYE(-1); } if ( strcmp(TD2_tbl, TC_tbl) == 0 ) { go_BYE(-1); } if ( strcmp(TD2_tbl, TD1_tbl) == 0 ) { go_BYE(-1); } status = is_tbl(TM_tbl, &TM_tbl_id, &TM_tbl_rec); cBYE(status); if ( TM_tbl_id < 0 ) { go_BYE(-1); } nR_TM = TM_tbl_rec.nR; status = is_fld(NULL, TM_tbl_id, TM_TC_lb, &TM_TC_lb_fld_id, &TM_TC_lb_fld_rec, &nn_fld_id, &nn_fld_rec); if ( TM_TC_lb_fld_id < 0 ) { go_BYE(-1); } if ( nn_fld_id >= 0 ) { go_BYE(-1); } if ( TM_TC_lb_fld_rec.fldtype != I8 ) { go_BYE(-1); } status = get_data(TM_TC_lb_fld_rec, &TM_TC_lb_X, &TM_TC_lb_nX, 0); long long *TM_TC_lb_I8 = (long long *)TM_TC_lb_X; status = is_fld(NULL, TM_tbl_id, TM_TC_cnt, &TM_TC_cnt_fld_id, &TM_TC_cnt_fld_rec, &nn_fld_id, &nn_fld_rec); if ( TM_TC_cnt_fld_id < 0 ) { go_BYE(-1); } if ( nn_fld_id >= 0 ) { go_BYE(-1); } if ( TM_TC_cnt_fld_rec.fldtype != I4 ) { go_BYE(-1); } status = get_data(TM_TC_cnt_fld_rec, &TM_TC_cnt_X, &TM_TC_cnt_nX, 0); int *TM_TC_cnt_I4 = (int *)TM_TC_cnt_X; /*----------------------------------------------------------------*/ status = is_tbl(TC_tbl, &TC_tbl_id, &TC_tbl_rec); cBYE(status); if ( TC_tbl_id < 0 ) { go_BYE(-1); } nR_TC = TC_tbl_rec.nR; status = is_fld(NULL, TC_tbl_id, TC_mid, &TC_mid_fld_id, &TC_mid_fld_rec, &nn_fld_id, &nn_fld_rec); if ( TC_mid_fld_id < 0 ) { go_BYE(-1); } if ( nn_fld_id >= 0 ) { go_BYE(-1); } if ( TC_mid_fld_rec.fldtype != I4 ) { go_BYE(-1); } status = get_data(TC_mid_fld_rec, &TC_mid_X, &TC_mid_nX, 0); int *TC_mid_I4 = (int *)TC_mid_X; /*----------------------------------------------------------------*/ status = is_tbl(TD1_tbl, &TD1_tbl_id, &TD1_tbl_rec); cBYE(status); if ( TD1_tbl_id < 0 ) { go_BYE(-1); } nR_TD1 = TD1_tbl_rec.nR; status = is_fld(NULL, TD1_tbl_id, TD1_fk_TM, &TD1_fk_TM_fld_id, &TD1_fk_TM_fld_rec, &nn_fld_id, &nn_fld_rec); if ( TD1_fk_TM_fld_id < 0 ) { go_BYE(-1); } if ( nn_fld_id >= 0 ) { go_BYE(-1); } if ( TD1_fk_TM_fld_rec.fldtype != I4 ) { go_BYE(-1); } status = get_data(TD1_fk_TM_fld_rec, &TD1_fk_TM_X, &TD1_fk_TM_nX, 0); int *TD1_fk_TM_I4 = (int *)TD1_fk_TM_X; /*----------------------------------------------------------------*/ // Set up data structures for each thread #define MAX_NUM_SECOND_DEGREE 32*1048576 deg2 = malloc(MAX_NUM_SECOND_DEGREE * sizeof(int)); return_if_malloc_failed(deg2); int *bak_deg2 = deg2; //-------------------------------------------------- unsigned long long t0 = get_time_usec(); for ( int i = 0; i < nR_TD1; i++ ) { int TM_idx = TD1_fk_TM_I4[i]; long long TC_idx = TM_TC_lb_I8[TM_idx]; int TC_cnt = TM_TC_cnt_I4[TM_idx]; // TODO P1 TO BE FIXED: realloc needed if ( n_deg2 + TC_cnt > MAX_NUM_SECOND_DEGREE ) { continue; } memcpy(deg2, TC_mid_I4 + TC_idx, ((sizeof(int) * TC_cnt))); deg2 += TC_cnt; n_deg2 += TC_cnt; } // fprintf(stderr, "Need to process %d people \n", cnt); unsigned long long t1 = get_time_usec(); deg2 = bak_deg2; if ( n_deg2 > 2*1048576 ) { // TODO P2 Document this choice n_alt_deg2 = max(1024, (n_deg2 * 2)); n_alt_deg2 = prime_geq(n_alt_deg2); alt_deg2 = malloc(n_alt_deg2 * sizeof(int)); return_if_malloc_failed(alt_deg2); xloc = malloc(n_deg2 * sizeof(int)); return_if_malloc_failed(xloc); unsigned long long seed = get_time_usec(); srand48(seed); a = (unsigned long long)mrand48(); a = prime_geq(a); b = (unsigned long long)mrand48(); b = prime_geq(b); nT = g_num_cores; //------------------------------------- block_size = n_deg2 / nT; #pragma omp parallel for for ( int tid = 0; tid < nT; tid++ ) { int lb = tid * block_size; int ub = lb + block_size; if ( tid == (nT-1) ) { ub = n_deg2; } for ( int i = lb; i < ub; i++ ) { xloc[i] = ( ( deg2[i] * a ) + b ) % n_alt_deg2; } } //------------------------------------- block_size = n_alt_deg2 / nT; #pragma omp parallel for for ( int tid = 0; tid < nT; tid++ ) { int lb = tid * block_size; int ub = lb + block_size; if ( tid == (nT-1) ) { ub = n_alt_deg2; } for ( int i = lb; i < ub; i++ ) { alt_deg2[i] = -1; // indicates unused } } //------------------------------------- // sequential loop for ( int i = 0; i < n_deg2; i++ ) { int loc = xloc[i]; int val = deg2[i]; if ( alt_deg2[loc] == -1 ) { alt_deg2[loc] = val; } else if ( alt_deg2[loc] == val ) { // nothing to do } else { // sequential search for new spot bool found = false; for ( int j = loc; j < n_alt_deg2; j++ ) { if ( alt_deg2[j] == -1 ) { alt_deg2[j] = val; found = true; break; } else if ( alt_deg2[j] == val ) { found = true; break; } } if ( found == false ) { for ( int j = 0; j < loc; j++ ) { if ( alt_deg2[j] == -1 ) { alt_deg2[j] = val; found = true; break; } else if ( alt_deg2[j] == val ) { found = true; break; } } if ( found == false ) { go_BYE(-1); } } } } //------------------------------------- block_size = n_deg2 / nT; #pragma omp parallel for for ( int tid = 0; tid < nT; tid++ ) { int lb = tid * block_size; int ub = lb + block_size; if ( tid == (nT-1) ) { ub = n_deg2; } for ( int i = lb; i < ub; i++ ) { deg2[i] = -1; // indicates unused } } //------------------------------------- nR_TD2 = 0; for ( int i = 0; i < n_alt_deg2; i++ ) { int val = alt_deg2[i]; if ( val < 0 ) { continue; } deg2[nR_TD2++] = val; } } else { #ifdef IPP ippsSortAscend_32s_I(deg2, n_deg2); #else qsort_asc_I4 (deg2, n_deg2, sizeof(int), NULL); #endif unsigned long long t2 = get_time_usec(); /* Remove duplicates */ nR_TD2 = 1; /* first guy always gets in */ for ( int oldidx = 1; oldidx < n_deg2; oldidx++ ) { if ( deg2[oldidx] != deg2[oldidx-1] ) { deg2[nR_TD2++] = deg2[oldidx]; } } } unsigned long long t3 = get_time_usec(); /* Write output to file */ FLD_TYPE fldtype = I4; int fldsz = -1; size_t filesz = 0; int ddir_id = -1; int fileno = -1; status = get_fld_sz(fldtype, &fldsz); cBYE(status); filesz = fldsz * nR_TD2; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &TD2_mid_X, &TD2_mid_nX, true); cBYE(status); memcpy(TD2_mid_X, deg2, fldsz * nR_TD2); rs_munmap(TD2_mid_X, TD2_mid_nX); unsigned long long t4 = get_time_usec(); /*--------------------------------------------*/ char strbuf[32]; sprintf(strbuf, "%lld", nR_TD2); status = add_tbl(TD2_tbl, strbuf, &TD2_tbl_id, &TD2_tbl_rec); cBYE(status); zero_fld_rec(&TD2_mid_fld_rec); TD2_mid_fld_rec.fldtype = I4; status = add_fld(TD2_tbl_id, TD2_mid, ddir_id, fileno, &TD2_mid_fld_id, &TD2_mid_fld_rec); cBYE(status); /* fprintf(stderr, "t1 = %llu \n", t1 - t0); fprintf(stderr, "t2 = %llu \n", t2 - t1); fprintf(stderr, "t3 = %llu \n", t3 - t2); fprintf(stderr, "t4 = %llu \n", t4 - t3); */ BYE: rs_munmap(TM_TC_lb_X, TM_TC_lb_nX); rs_munmap(TM_TC_cnt_X, TM_TC_cnt_nX); rs_munmap(TC_mid_X, TC_mid_nX); rs_munmap(TD1_fk_TM_X, TD1_fk_TM_nX); rs_munmap(TD2_mid_X, TD2_mid_nX); free_if_non_null(deg2); free_if_non_null(alt_deg2); free_if_non_null(xloc); return(status); }
// START FUNC DECL int srt_join( char *src_tbl, char *src_lnk, char *src_val, char *dst_tbl, char *dst_lnk, char *dst_val, char *op ) // STOP FUNC DECL { int status = 0; char *src_val_X = NULL; size_t src_val_nX = 0; char *src_lnk_X = NULL; size_t src_lnk_nX = 0; char *nn_dst_val_X = NULL; size_t nn_dst_val_nX = 0; char *dst_val_X = NULL; size_t dst_val_nX = 0; char *dst_lnk_X = NULL; size_t dst_lnk_nX = 0; TBL_REC_TYPE src_tbl_rec, dst_tbl_rec; FLD_REC_TYPE src_val_rec, nn_src_val_rec; FLD_REC_TYPE src_lnk_rec, nn_src_lnk_rec; FLD_REC_TYPE dst_lnk_rec, nn_dst_lnk_rec; FLD_REC_TYPE dst_val_rec, nn_dst_val_rec; long long src_nR = INT_MIN, dst_nR = INT_MIN; int ijoin_op; bool is_any_null = false; int src_tbl_id = INT_MIN, dst_tbl_id = INT_MIN; int src_lnk_id = INT_MIN, nn_src_lnk_id = INT_MIN; int dst_lnk_id = INT_MIN, nn_dst_lnk_id = INT_MIN; int src_val_id = INT_MIN, nn_src_val_id = INT_MIN; int dst_val_id = INT_MIN, nn_dst_val_id = INT_MIN; char opfile[MAX_LEN_FILE_NAME+1]; char nn_opfile[MAX_LEN_FILE_NAME+1]; #define RSLT_LEN 32 char str_rslt[RSLT_LEN]; //---------------------------------------------------------------- zero_string(str_rslt, RSLT_LEN); status = mk_mjoin_op(op, &ijoin_op); cBYE(status); //-------------------------------------------------------- // We support a special operator called cnt which can be used only // when src_val is null. if ( ( src_val == NULL ) || ( *src_val == '\0' ) ) { if ( ( strcmp(op, "cnt") != 0 ) ) { go_BYE(-1); } zero_fld_rec(&src_val_rec); } else { if ( ( strcmp(op, "cnt") == 0 ) ) { go_BYE(-1); } } //---------------------------------------------------------------- // Get meta-data for all necessary fields status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = g_tbls[src_tbl_id].nR; status = is_tbl(dst_tbl, &dst_tbl_id, &dst_tbl_rec); cBYE(status); chk_range(dst_tbl_id, 0, g_n_tbl); dst_nR = g_tbls[dst_tbl_id].nR; status = is_fld(NULL, src_tbl_id, src_lnk, &src_lnk_id, &src_lnk_rec, &nn_src_lnk_id, &nn_src_lnk_rec); cBYE(status); chk_range(src_lnk_id, 0, g_n_fld); if ( ( src_val != NULL ) && ( *src_val != '\0' ) ) { status = is_fld(NULL, src_tbl_id, src_val, &src_val_id, &src_val_rec, &nn_src_val_id, &nn_src_val_rec); cBYE(status); chk_range(src_val_id, 0, g_n_fld); } status = is_fld(NULL, dst_tbl_id, dst_lnk, &dst_lnk_id, &dst_lnk_rec, &nn_dst_lnk_id, &nn_dst_lnk_rec); cBYE(status); chk_range(dst_lnk_id, 0, g_n_fld); //---------------------------------------------------------------- /* Make sure link fields are sorted ascending */ status = ext_f_to_s(src_tbl, src_lnk, "is_sorted", str_rslt, RSLT_LEN); cBYE(status); if ( strcmp(str_rslt, "ascending") != 0 ) { fprintf(stderr, "Field [%s] in Table [%s] not sorted ascending\n", src_lnk, src_tbl); go_BYE(-1); } status = ext_f_to_s(dst_tbl, dst_lnk, "is_sorted", str_rslt, RSLT_LEN); cBYE(status); if ( strcmp(str_rslt, "ascending") != 0 ) { fprintf(stderr, "Field [%s] in Table [%s] not sorted ascending\n", dst_lnk, dst_tbl); go_BYE(-1); } //---------------------------------------------------------------- // Get pointer access to all necessary fields status = get_data(src_lnk_rec, &src_lnk_X, &src_lnk_nX, false); cBYE(status); if ( ( src_val != NULL ) && ( *src_val != '\0' ) ) { status = get_data(src_val_rec, &src_val_X, &src_val_nX, false); cBYE(status); } status = get_data(dst_lnk_rec, &dst_lnk_X, &dst_lnk_nX, false); cBYE(status); // Create output data files if ( ijoin_op == MJOIN_OP_CNT ) { if ( src_nR < INT_MAX ) { src_val_rec.fldtype = I4; } else { src_val_rec.fldtype = I8; } } int fldsz = INT_MAX, ddir_id = INT_MAX, nn_ddir_id = INT_MAX; status = get_fld_sz(src_val_rec.fldtype, &fldsz); cBYE(status); status = mk_temp_file(opfile, (fldsz * dst_nR), &ddir_id); status = get_fld_sz(I1, &fldsz); cBYE(status); status = mk_temp_file(nn_opfile, (fldsz * dst_nR), &nn_ddir_id); status = q_mmap(ddir_id, opfile, &dst_val_X, &dst_val_nX, 1); cBYE(status); status = q_mmap(nn_ddir_id, nn_opfile, &nn_dst_val_X, &nn_dst_val_nX, 1); cBYE(status); //-------------------------------------------- long long block_size; int nT; status = partition(dst_nR, 1024, -1, &block_size, &nT); cBYE(status); for ( int tid = 0; tid < nT; tid++ ) { long long lb = 0 + (tid * block_size); long long ub = lb + block_size; if ( tid == (nT-1) ) { ub = dst_nR; } int *I4_src_lnk = (int *)src_lnk_X; int *I4_src_val = (int *)src_val_X; int *I4_dst_lnk = (int *)dst_lnk_X; int *I4_dst_val = (int *)dst_val_X; long long *I8_src_lnk = (long long *)src_lnk_X; long long *I8_src_val = (long long *)src_val_X; long long *I8_dst_lnk = (long long *)dst_lnk_X; long long *I8_dst_val = (long long *)dst_val_X; char *dst_nn = nn_dst_val_X; // Core join if ( ( src_lnk_rec.fldtype == I4 ) && ( src_val_rec.fldtype == I4 ) && ( dst_lnk_rec.fldtype == I4 ) ) { status = core_srt_join_I_I_I( I4_src_lnk, I4_src_val, src_nR, I4_dst_lnk, I4_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I4 ) && ( src_val_rec.fldtype == I4 ) && ( dst_lnk_rec.fldtype == I8 ) ) { status = core_srt_join_I_I_L( I4_src_lnk, I4_src_val, src_nR, I8_dst_lnk, I4_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I4 ) && ( src_val_rec.fldtype == I8 ) && ( dst_lnk_rec.fldtype == I4 ) ) { status = core_srt_join_I_L_I( I4_src_lnk, I8_src_val, src_nR, I4_dst_lnk, I8_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I4 ) && ( src_val_rec.fldtype == I8 ) && ( dst_lnk_rec.fldtype == I8 ) ) { status = core_srt_join_I_L_L( I4_src_lnk, I8_src_val, src_nR, I8_dst_lnk, I8_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I8 ) && ( src_val_rec.fldtype == I4 ) && ( dst_lnk_rec.fldtype == I4 ) ) { status = core_srt_join_L_I_I( I8_src_lnk, I4_src_val, src_nR, I4_dst_lnk, I4_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I8 ) && ( src_val_rec.fldtype == I4 ) && ( dst_lnk_rec.fldtype == I8 ) ) { status = core_srt_join_L_I_L( I8_src_lnk, I4_src_val, src_nR, I8_dst_lnk, I4_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I8 ) && ( src_val_rec.fldtype == I8 ) && ( dst_lnk_rec.fldtype == I4 ) ) { status = core_srt_join_L_L_I( I8_src_lnk, I8_src_val, src_nR, I4_dst_lnk, I8_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else if ( ( src_lnk_rec.fldtype == I8 ) && ( src_val_rec.fldtype == I8 ) && ( dst_lnk_rec.fldtype == I8 ) ) { status = core_srt_join_L_L_L( I8_src_lnk, I8_src_val, src_nR, I8_dst_lnk, I8_dst_val, dst_nn, lb, ub, ijoin_op, &is_any_null); } else { go_BYE(-1); } } if ( is_any_null ) { is_any_null = true; } //-------------------------------------------------------- // Add output field to meta data status = add_fld(dst_tbl_id, dst_val, ddir_id, opfile, src_val_rec.fldtype, -1, &dst_val_id, &dst_val_rec); cBYE(status); if ( is_any_null == true ) { status = add_aux_fld(NULL, dst_tbl_id, NULL, dst_val_id, nn_ddir_id, nn_opfile, "nn", &nn_dst_val_id, &nn_dst_val_rec); cBYE(status); } else { unlink(nn_opfile); } BYE: rs_munmap(src_val_X, src_val_nX); rs_munmap(src_lnk_X, src_lnk_nX); rs_munmap(dst_val_X, dst_val_nX); rs_munmap(nn_dst_val_X, nn_dst_val_nX); rs_munmap(dst_lnk_X, dst_lnk_nX); return(status); }
// START FUNC DECL int cfld_to_range( char *src_tbl, char *src_fld, char *dst_tbl, char *lb_fld, char *ub_fld ) // STOP FUNC DECL { int status = 0; char *src_X = NULL; size_t src_nX = 0; char *lb_X = NULL; size_t lb_nX = 0; int lb_ddir_id = -1, lb_fileno = -1; char *ub_X = NULL; size_t ub_nX = 0; int ub_ddir_id = -1, ub_fileno = -1; TBL_REC_TYPE src_tbl_rec, dst_tbl_rec; FLD_REC_TYPE lb_fld_rec, ub_fld_rec; FLD_REC_TYPE src_fld_rec, nn_src_fld_rec; long long src_nR = LLONG_MIN, dst_nR = LLONG_MIN; int src_tbl_id = INT_MIN, dst_tbl_id = INT_MIN; int src_fld_id = INT_MIN, nn_src_fld_id = INT_MIN; int lb_fld_id = INT_MIN; int ub_fld_id = INT_MIN; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( strcmp(src_tbl, dst_tbl) == 0 ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( lb_fld == NULL ) || ( *lb_fld == '\0' ) ) { go_BYE(-1); } if ( ( ub_fld == NULL ) || ( *ub_fld == '\0' ) ) { go_BYE(-1); } //---------------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = src_tbl_rec.nR; // Create space for output long long max_dst_nR = src_nR; long long filesz = max_dst_nR * sizeof(long long); status = mk_temp_file(filesz, &lb_ddir_id, &lb_fileno); cBYE(status); status = q_mmap(lb_ddir_id, lb_fileno, &lb_X, &lb_nX, true); cBYE(status); long long *lbI8 = (long long *)lb_X; status = mk_temp_file(filesz, &ub_ddir_id, &ub_fileno); cBYE(status); status = q_mmap(ub_ddir_id, ub_fileno, &ub_X, &ub_nX, true); cBYE(status); long long *ubI8 = (long long *)ub_X; // src_fld status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id, &src_fld_rec, &nn_src_fld_id, &nn_src_fld_rec); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); if ( nn_src_fld_id >= 0 ) { go_BYE(-1); } status = get_data(src_fld_rec, &src_X, &src_nX, false); cBYE(status); if ( src_fld_rec.fldtype != I1 ) { go_BYE(-1); } // TODO P0: Parallelize outer loop // Looking for the start of the range long long rangelb, rangeub; dst_nR = 0; for ( int i = 0; i < src_nR; i++ ) { if ( src_X[i] == 0 ) { continue; } rangelb = i; // inclusive rangeub = -1; // Looking for the end of the range for ( ; i < src_nR; i++ ) { if ( src_X[i] == 0 ) { rangeub = i; // exclusive break; } if ( rangeub < 0 ) { rangeub = src_nR; } } lbI8[dst_nR] = rangelb; ubI8[dst_nR] = rangeub; dst_nR++; } if ( dst_nR == 0 ) { goto BYE; } // Nothing todo // truncate file if necessary rs_munmap(lb_X, lb_nX); rs_munmap(ub_X, ub_nX); if ( dst_nR < src_nR ) { q_trunc(lb_ddir_id, lb_fileno, (dst_nR * sizeof(long long))); q_trunc(ub_ddir_id, ub_fileno, (dst_nR * sizeof(long long))); } char strbuf[32]; sprintf(strbuf, "%lld", dst_nR); status = add_tbl(dst_tbl, strbuf, &dst_tbl_id, &dst_tbl_rec); cBYE(status); zero_fld_rec(&lb_fld_rec); lb_fld_rec.fldtype = I8; status = add_fld(dst_tbl_id, lb_fld, lb_ddir_id, lb_fileno, &lb_fld_id, &lb_fld_rec); cBYE(status); zero_fld_rec(&ub_fld_rec); ub_fld_rec.fldtype = I8; status = add_fld(dst_tbl_id, ub_fld, ub_ddir_id, ub_fileno, &ub_fld_id, &ub_fld_rec); cBYE(status); BYE: rs_munmap(src_X, src_nX); rs_munmap(lb_X, lb_nX); rs_munmap(ub_X, ub_nX); return(status); }
// START FUNC DECL int chk_fld_meta( int fld_id, long long nR ) // STOP FUNC DECL { int status = 0; char *X = NULL; size_t nX = 0; int fldsz; char *cptr; FLD_REC_TYPE M; chk_range(fld_id, 0, g_n_fld); M = g_flds[fld_id]; chk_range(M.tbl_id, 0, g_n_tbl); chk_range(M.ddir_id, 0, g_n_ddir); if ( strlen(g_ddirs[M.ddir_id].name) == 0 ) { go_BYE(-1); } if ( dir_exists(g_ddirs[M.ddir_id].name) == false ) { go_BYE(-1); } // B cannot have an nn field */ if ( ( M.fldtype == B ) && ( M.nn_fld_id >= 0 ) ) { go_BYE(-1); } if ( M.parent_id >= 0 ) { /* this is an auxilary field */ char prefix[8]; zero_string(prefix, 8); switch ( M.auxtype ) { case nn : if ( ( M.fldtype != I1 ) && ( M.fldtype != B ) ) { go_BYE(-1); } strcpy(prefix, ".nn."); break; case len : if ( M.fldtype != I2 ) { go_BYE(-1); } strcpy(prefix, ".len."); break; case off : if ( M.fldtype != I8 ) { go_BYE(-1); } strcpy(prefix, ".off."); break; default : go_BYE(-1); break; } /* name of nn field must start with ".nn." */ cptr = strstr(M.name, prefix); if ( cptr != M.name ) { go_BYE(-1); } cptr = M.name + strlen(prefix); /* name of nn field must be ".nn." + name of parent */ if ( strcmp(cptr, g_flds[M.parent_id].name) != 0 ) { go_BYE(-1); } /* parent of nn field must have nn field */ if ( M.auxtype == nn ) { if ( g_flds[M.parent_id].nn_fld_id < 0 ) { go_BYE(-1); } } /* aux field cannot have nn field */ if ( M.nn_fld_id >= 0 ) { go_BYE(-1); } } // If a field is a fk to a lookup table, then it can be I1/I2/I4/I8 // TODO P1 if ( M.len >= 1 ) { if ( M.fldtype != SC ) { go_BYE(-1); } if ( M.len > MAX_LEN_STR ) { go_BYE(-1); } } else { if ( M.fldtype == SC ) { go_BYE(-1); } if ( M.len != 0 ) { go_BYE(-1); } } int len_fld_id, off_fld_id; status = get_fld_info(fk_fld_len, fld_id, &len_fld_id); cBYE(status); status = get_fld_info(fk_fld_off, fld_id, &off_fld_id); cBYE(status); if ( M.fldtype == SV ) { if ( len_fld_id < 0 ) { go_BYE(-1); } if ( off_fld_id < 0 ) { go_BYE(-1); } if ( g_flds[len_fld_id].fldtype != I2 ) { go_BYE(-1); } if ( g_flds[off_fld_id].fldtype != I8 ) { go_BYE(-1); } } else { if ( len_fld_id != -1 ) { go_BYE(-1); } if ( off_fld_id != -1 ) { go_BYE(-1); } } if ( M.fldtype == SC ) { if ( ( M.len < 1 ) || ( M.len > MAX_LEN_STR ) ) { go_BYE(-1); } // TODO P1: Think about this. if ( M.nn_fld_id >= 0 ) { go_BYE(-1); } } status = chk_srttype(M.srttype); cBYE(status); status = chk_fldtype(M.fldtype); cBYE(status); status = chk_auxtype(M.auxtype); cBYE(status); if ( M.auxtype == undef_auxtype ) { status = chk_fld_name(M.name); cBYE(status); } switch ( M.fldtype ) { case SC : case SV : if ( ( M.is_max_nn == true ) || ( M.is_min_nn == true ) || ( M.is_sum_nn == true ) ) { go_BYE(-1); } break; case B : if ( ( M.is_max_nn == true ) || ( M.is_min_nn == true ) ) { go_BYE(-1); } break; default : break; } if ( ( M.is_min_nn == false ) && ( M.minval.minF8 != 0 ) ) { go_BYE(-1); } if ( ( M.is_max_nn == false ) && ( M.maxval.maxF8 != 0 ) ) { go_BYE(-1); } if ( ( M.is_sum_nn == false ) && ( M.sumval.sumF8 != 0 ) ) { go_BYE(-1); } if ( ( M.is_min_nn == false ) && ( M.minval.minI8 != 0 ) ) { go_BYE(-1); } if ( ( M.is_max_nn == false ) && ( M.maxval.maxI8 != 0 ) ) { go_BYE(-1); } if ( ( M.is_sum_nn == false ) && ( M.sumval.sumI8 != 0 ) ) { go_BYE(-1); } if ( M.fileno <= 0 ) { go_BYE(-1); } if ( ( M.is_max_nn == true ) && ( M.is_min_nn == true ) ) { switch ( M.fldtype) { case I1 : case I2: case I4 : case I8 : if ( M.minval.minI8 > M.maxval.maxI8 ) { go_BYE(-1); } break; case F4 : case F8 : if ( M.minval.minF8 > M.maxval.maxF8 ) { go_BYE(-1); } break; default : go_BYE(-1); break; } } status = q_mmap(M.ddir_id, M.fileno, &X, &nX, 0); cBYE(status); if ( nX != M.filesz ) { fprintf(stderr, "fileno = %d \n", M.fileno); fprintf(stderr, "nX = %d \n", (int)nX); fprintf(stderr, "filesz = %d \n", (int)(M.filesz)); go_BYE(-1); } // if a field has an nn field, it cannot be sorted switch ( M.srttype ) { case unknown : break; case ascending : if ( M.nn_fld_id>= 0 ) { go_BYE(-1); } break; case descending : if ( M.nn_fld_id>= 0 ) { go_BYE(-1); } break; case unsorted : if ( M.nn_fld_id>= 0 ) { go_BYE(-1); } break; default : go_BYE(-1); break; } status = get_fld_sz(M.fldtype, &fldsz); cBYE(status); /* Check on file size */ if ( ( M.fldtype != SC ) && ( M.fldtype != SV ) ) { if ( nR > 0 ) { long long exp_filesize = fldsz * nR; if ( exp_filesize != nX ) { go_BYE(-1); } } } BYE: if ( status < 0 ) { fprintf(stderr, "Error on field %s \n", M.name); } rs_munmap(X, nX); return status ; }
//--------------------------------------------------------------- // START FUNC DECL int xfer( char *src_tbl, char *src_fld, char *dst_tbl, char *dst_idx, // this is an index into the source table char *dst_fld ) // STOP FUNC DECL { int status = 0; TBL_REC_TYPE src_tbl_rec, dst_tbl_rec; FLD_REC_TYPE src_fld_rec, dst_idx_meta, dst_fld_rec; FLD_REC_TYPE nn_src_fld_rec, nn_dst_idx_meta, nn_dst_fld_rec; char opfile[MAX_LEN_FILE_NAME+1]; char nn_opfile[MAX_LEN_FILE_NAME+1]; long long src_nR, dst_nR; char *src_fld_X = NULL; size_t src_fld_nX = 0; char *dst_fld_X = NULL; size_t dst_fld_nX = 0; char *dst_idx_X = NULL; size_t dst_idx_nX = 0; char *nn_src_fld_X = NULL; size_t nn_src_fld_nX = 0; char *nn_dst_idx_X = NULL; size_t nn_dst_idx_nX = 0; char *nn_dst_fld_X = NULL; size_t nn_dst_fld_nX = 0; int src_tbl_id = INT_MIN, dst_tbl_id = INT_MIN; int src_fld_id = INT_MIN, nn_src_fld_id = INT_MIN; int dst_idx_id = INT_MIN, nn_dst_idx_id = INT_MIN; int dst_fld_id = INT_MIN, nn_dst_fld_id = INT_MIN; //---------------------------------------------------------------- zero_string(opfile, MAX_LEN_FILE_NAME+1); zero_string(nn_opfile, MAX_LEN_FILE_NAME+1); if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( dst_idx == NULL ) || ( *dst_idx == '\0' ) ) { go_BYE(-1); } if ( ( dst_fld == NULL ) || ( *dst_fld == '\0' ) ) { go_BYE(-1); } if ( strcmp(dst_idx, dst_fld) == 0 ) { go_BYE(-1); } //---------------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = g_tbls[src_tbl_id].nR; status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id, &src_fld_rec, &nn_src_fld_id, &nn_src_fld_rec); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); status = get_data(src_fld_rec, &src_fld_X, &src_fld_nX, false); cBYE(status); status = is_tbl(dst_tbl, &dst_tbl_id, &dst_tbl_rec); cBYE(status); chk_range(dst_tbl_id, 0, g_n_tbl); dst_nR = g_tbls[dst_tbl_id].nR; status = is_fld(NULL, dst_tbl_id, dst_idx, &dst_idx_id, &dst_idx_meta, &nn_dst_idx_id, &nn_dst_idx_meta); cBYE(status); chk_range(dst_idx_id, 0, g_n_fld); status = get_data(dst_idx_meta, &dst_idx_X, &dst_idx_nX, false); cBYE(status); bool is_nn_needed = false; if ( nn_src_fld_id >= 0 ) { status = get_data(nn_src_fld_rec, &nn_src_fld_X, &nn_src_fld_nX, false); cBYE(status); is_nn_needed = true; } if ( nn_dst_idx_id >= 0 ) { status = get_data(nn_dst_idx_meta, &nn_dst_idx_X, &nn_dst_idx_nX, false); cBYE(status); is_nn_needed = true; } //---------------------------------------------------------------- // allocate space for output int fldsz = INT_MAX, ddir_id = INT_MAX, nn_ddir_id = INT_MAX; status = get_fld_sz(src_fld_rec.fldtype, &fldsz); cBYE(status); status = mk_temp_file(opfile, (dst_nR * fldsz), &ddir_id); cBYE(status); status = q_mmap(ddir_id, opfile, &dst_fld_X, &dst_fld_nX, true); cBYE(status); if ( is_nn_needed ) { status = mk_temp_file(nn_opfile, (dst_nR * sizeof(char)), &nn_ddir_id); cBYE(status); status = q_mmap(nn_ddir_id, nn_opfile, &nn_dst_fld_X, &nn_dst_fld_nX, true); cBYE(status); } //---------------------------------------------------------------- switch ( src_fld_rec.fldtype ) { #include "incl_xfer_I1.c" #include "incl_xfer_I2.c" #include "incl_xfer_I4.c" #include "incl_xfer_I8.c" #include "incl_xfer_F4.c" #include "incl_xfer_F8.c" default : go_BYE(-1); break; } status = add_fld(dst_tbl_id, dst_fld, ddir_id, opfile, src_fld_rec.fldtype, -1, &dst_fld_id, &dst_fld_rec); cBYE(status); // Note that is_nn_needed is overly pessimistic in its assessment of // whether an nn field is needed. It may not be needed. We should // tighten this up at some point in time TODO P2 if ( is_nn_needed ) { status = add_aux_fld(NULL, dst_tbl_id, NULL, dst_fld_id, nn_ddir_id, nn_opfile, "nn", &nn_dst_fld_id, &nn_dst_fld_rec); } char strbuf[32]; status = int_get_meta(src_tbl_id, src_fld_id, "dict_tbl_id", strbuf); cBYE(status); char *endptr; int dict_tbl_id = strtoll(strbuf, &endptr, 10); if ( dict_tbl_id > 0 ) { status = int_set_meta(dst_tbl_id, dst_fld_id, "dict_tbl_id", strbuf); cBYE(status); } BYE: rs_munmap(src_fld_X, src_fld_nX); rs_munmap(dst_idx_X, dst_idx_nX); rs_munmap(dst_fld_X, dst_fld_nX); rs_munmap(nn_src_fld_X, nn_src_fld_nX); rs_munmap(nn_dst_idx_X, nn_dst_idx_nX); rs_munmap(nn_dst_fld_X, nn_dst_fld_nX); return(status); }
/* START FUNC DECL */ int get_t2f2_val( char *t1, char *lnk1, char *t2, char *lnk2, char *t2f2, /* field in t2 */ char *t1f2 /* field in t1 */ ) /* STOP FUNC DECL */ { int status = 0; char *t2f2_X = NULL; size_t n_t2f2_X = 0; char *nn_t2f2_X = NULL; size_t nn_n_t2f2_X = 0; char *X = NULL; size_t nX = 0; char *nn_X = NULL; size_t nn_nX = 0; char *X1 = NULL; size_t nX1 = 0; char *X2 = NULL; size_t nX2 = 0; char *nn_X1 = NULL; size_t nn_nX1 = 0; char *nn_X2 = NULL; size_t nn_nX2 = 0; TBL_REC_TYPE t1_rec, t2_rec; FLD_REC_TYPE lnk1_rec, nn_lnk1_rec; int lnk1_id, nn_lnk1_id; FLD_REC_TYPE lnk2_rec, nn_lnk2_rec;int lnk2_id, nn_lnk2_id; FLD_REC_TYPE t2f2_rec, nn_t2f2_rec; int t2f2_id, nn_t2f2_id; FLD_REC_TYPE t1f2_rec, nn_t1f2_rec; int t1f2_id, nn_t1f2_id; int t1_id, t2_id; long long nR1, nR2; #define BUFLEN 32 char rslt_buf[BUFLEN]; char **Y = NULL; int nY = 0; HT_I8_TYPE *ht = NULL; int sz_ht; // size of hash table int n_ht = 0; // number of occupied entries in hash table /*-------------------------------------------------------------*/ if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( strcmp(t1, t2) == 0 ) && ( strcmp(t1f2, t2f2) == 0 ) ) { go_BYE(-1); } if ( ( lnk1 == NULL ) || ( *lnk1 == '\0' ) ) { go_BYE(-1); } if ( ( lnk2 == NULL ) || ( *lnk2 == '\0' ) ) { go_BYE(-1); } /*-------------------------------------------------------------*/ status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); chk_range(t1_id, 0, g_n_tbl); nR1 = g_tbls[t1_id].nR; status = is_tbl(t2, &t2_id, &t2_rec); cBYE(status); chk_range(t2_id, 0, g_n_tbl); nR2 = g_tbls[t2_id].nR; /*-------------------------------------------------------------*/ status = is_fld(NULL, t1_id, lnk1, &lnk1_id, &lnk1_rec, &nn_lnk1_id, &nn_lnk1_rec); cBYE(status); chk_range(lnk1_id, 0, g_n_fld); if ( nn_lnk1_id >= 0 ) { status = get_data(nn_lnk1_rec, &nn_X1, &nn_nX1, false); cBYE(status); } status = get_data(lnk1_rec, &X1, &nX1, false); cBYE(status); /*-------------------------------------------------------------*/ status = is_fld(NULL, t2_id, lnk2, &lnk2_id, &lnk2_rec, &nn_lnk2_id, &nn_lnk2_rec); cBYE(status); chk_range(lnk2_id, 0, g_n_fld); if ( nn_lnk2_id >= 0 ) { go_BYE(-1); } status = get_data(lnk2_rec, &X2, &nX2, false); cBYE(status); if ( nn_lnk2_id >= 0 ) { status = get_data(nn_lnk2_rec, &nn_X2, &nn_nX2, false); cBYE(status); } /*-------------------------------------------------------- */ if ( lnk1_rec.fldtype != lnk2_rec.fldtype ) { go_BYE(-1); } /*-------------------------------------------------------- */ status = is_fld(NULL, t2_id, t2f2, &t2f2_id, &t2f2_rec, &nn_t2f2_id, &nn_t2f2_rec); cBYE(status); chk_range(t2f2_id, 0, g_n_fld); status = get_data(t2f2_rec, &t2f2_X, &n_t2f2_X, false); cBYE(status); if ( nn_t2f2_id >= 0 ) { status = get_data(nn_t2f2_rec, &nn_t2f2_X, &nn_n_t2f2_X, false); cBYE(status); } /*--------------------------------------------------------*/ /* Check supported field types */ if ( lnk1_rec.fldtype != I4 ) { go_BYE(-1); } if ( lnk2_rec.fldtype != I4 ) { go_BYE(-1); } if ( t2f2_rec.fldtype != I4 ) { go_BYE(-1); } /*-------------------------------------------------------- */ /* Create space for output */ int fldsz = 0; size_t filesz = 0; int ddir_id = -1, fileno = -1; int nn_ddir_id = -1, nn_fileno = -1; status = get_fld_sz(I4, &fldsz); cBYE(status); filesz = fldsz * nR1; status = mk_temp_file(filesz, &ddir_id, &fileno); status = q_mmap(ddir_id, fileno, &X, &nX, 1); cBYE(status); status = get_fld_sz(I1, &fldsz); cBYE(status); filesz = fldsz * nR1; status = mk_temp_file(filesz, &nn_ddir_id, &nn_fileno); status = q_mmap(nn_ddir_id, nn_fileno, &nn_X, &nn_nX, 1); cBYE(status); /*-------------------------------------------------------- */ bool is_fast = false; bool definitely_has_null_vals = false; long long nn_nR2 = 0; if ( nn_X2 != NULL ) { for ( long long i = 0; i < nR2; i++ ) { if ( nn_X2[i] == TRUE ) { nn_nR2++; } } } else { nn_nR2 = nR2; } if ( nn_nR2 == 0 ) { definitely_has_null_vals = true; assign_const_I4((int *)X, nR1, 0); assign_const_I1((char *)nn_X, nR1, 0); } else { cilkfor ( long long i = 0; i < nR1; i++ ) { int *it2f2 = (int *)t2f2_X; int *I4_lnk1 = (int *)X1; int I4val1; int *I4_lnk2 = (int *)X2; int I4val2; int *out = (int *)X; int outval; char *out_nn = (char *)nn_X; char c_nn; I4val1 = I4_lnk1[i]; /* If the link value is null, then the output value is null */ if ( ( nn_X1 != NULL ) && ( nn_X1[i] == FALSE ) ) { out_nn[i] = FALSE; out[i] = 0; } else { /* We are now looking for I4val1 in the link field of t2 */ if ( is_fast == false ) { outval = 0; c_nn = FALSE; for ( long long j = 0; j < nR2; j++ ) { if ( ( nn_X2 != NULL ) && ( nn_X2[j] == FALSE ) ) { continue; } I4val2 = I4_lnk2[j]; if ( I4val1 == I4val2 ) { if ( ( nn_t2f2_X == NULL ) || (nn_t2f2_X[j] == TRUE ) ) { c_nn = TRUE; } outval = it2f2[j]; break; } } out_nn[i] = c_nn; out[i] = outval; } else { WHEREAMI; status = -1; continue; } } } cBYE(status); #ifdef CILK __cilkrts_end_cilk(); #endif } rs_munmap(X, nX); zero_fld_rec(&t1f2_rec); t1f2_rec.fldtype = I4; status = add_fld(t1_id, t1f2, ddir_id, fileno, &t1f2_id, &t1f2_rec); cBYE(status); if ( definitely_has_null_vals == true ) { /* No need to check */ rs_munmap(nn_X, nn_nX); zero_fld_rec(&nn_t1f2_rec); nn_t1f2_rec.fldtype = I1; status = add_aux_fld(NULL, t1_id, NULL, t1f2_id, nn_ddir_id, nn_fileno, nn, &nn_t1f2_id, &nn_t1f2_rec); cBYE(status); } else { status = vec_f_to_s(nn_X, I1, NULL, nR1, "sum", rslt_buf, BUFLEN); cBYE(status); status = explode(rslt_buf, ':', &Y, &nY); if ( nY != 2 ) { go_BYE(-1); } rs_munmap(nn_X, nn_nX); if ( strcmp(Y[0], Y[1]) == 0 ) { /* num non null values == nR => nn field not needed */ status = q_delete(nn_ddir_id, nn_fileno); cBYE(status); } else { zero_fld_rec(&nn_t1f2_rec); nn_t1f2_rec.fldtype = I1; status = add_aux_fld(NULL, t1_id, NULL, t1f2_id, nn_ddir_id, nn_fileno, nn, &nn_t1f2_id, &nn_t1f2_rec); cBYE(status); } } /*-------------------------------------------------------*/ BYE: if ( Y != NULL ) { for ( int i = 0; i < nY; i++ ) { free_if_non_null(Y[i]); } free_if_non_null(Y); } rs_munmap(X, nX); rs_munmap(nn_X, nn_nX); rs_munmap(X1, nX1); rs_munmap(X2, nX2); rs_munmap(nn_X1, nn_nX1); rs_munmap(nn_X2, nn_nX2); rs_munmap(t2f2_X, n_t2f2_X); rs_munmap(nn_t2f2_X, nn_n_t2f2_X); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int ext_stride( const char *t1, const char *f1, char *str_start, char *str_stride, const char *t2, const char *f2, char *str_nR2 ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; long long nR1, nR2; TBL_REC_TYPE t1_rec; int t1_id; TBL_REC_TYPE t2_rec; int t2_id; FLD_REC_TYPE f1_rec; int f1_id; FLD_REC_TYPE nn_f1_rec; int nn_f1_id; FLD_REC_TYPE f2_rec; int f2_id; long long start, stride; int fileno = -1, ddir_id = INT_MAX; int fldsz = INT_MAX; size_t filesz = 0; int *f1I4 = NULL, *f2I4 = NULL; long long *f1I8 = NULL, *f2I8 = NULL; //---------------------------------------------------------------- if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( strcmp(t1, t2) == 0 ) { go_BYE(-1); } status = stoI8(str_start, &start); cBYE(status); status = stoI8(str_stride, &stride); cBYE(status); nR2 = stoI8(str_nR2, &nR2); cBYE(status); //-------------------------------------------------------- status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); nR1 = t1_rec.nR; status = is_fld(t1, -1, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); status = get_data(f1_rec, &f1_X, &f1_nX, false); cBYE(status); if ( nn_f1_id >= 0 ) { go_BYE(-1); } status = del_tbl(t2, -1); cBYE(status); if ( ( f1_rec.fldtype == I4 ) || ( f1_rec.fldtype == I8 ) ) { /* all is well */ } else { go_BYE(-1); } if ( ( start < 0 ) || ( start >= nR1 ) ) { go_BYE(-1); } if ( ( stride <= 1 ) || ( stride >= nR1 ) ) { go_BYE(-1); } if ( nR2 < 1 ) { go_BYE(-1); } // reduce nR2 if necessary while ( start + (stride * nR2) > nR1 ) { nR2--; } if ( nR2 < 1 ) { go_BYE(-1); } //-------------------------------------------------------- // allocate space for output status = get_fld_sz(f1_rec.fldtype, &fldsz); cBYE(status); filesz = fldsz * nR2; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &f2_X, &f2_nX, true); cBYE(status); switch ( f1_rec.fldtype ) { case I4 : f1I4 = (int *)f1_X; f2I4 = (int *)f2_X; status = stride_I4(f1I4, nR1, start, stride, f2I4, nR2); cBYE(status); break; case I8 : f1I8 = (long long *)f1_X; f2I8 = (long long *)f2_X; status = stride_I8(f1I8, nR1, start, stride, f2I8, nR2); cBYE(status); break; default : go_BYE(-1); break; } cBYE(status); status = add_tbl(t2, str_nR2, &t2_id, &t2_rec); cBYE(status); zero_fld_rec(&f2_rec); f2_rec.fldtype = f1_rec.fldtype; status = add_fld(t2_id, f2, ddir_id, fileno, &f2_id, &f2_rec); cBYE(status); BYE: rs_munmap(f1_X, f1_nX); rs_munmap(f2_X, f2_nX); return status ; }
// last review 9/11/2013 //--------------------------------------------------------------- // START FUNC DECL int num_in_range( char *t1, char *f1, char *t2, /* We expect this table to be small */ char *lb, char *ub, char *cnt, char *rslt_buf ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *lb_X = NULL; size_t lb_nX = 0; char *ub_X = NULL; size_t ub_nX = 0; char *cnt_X = NULL; size_t cnt_nX = 0; int t1_id = INT_MIN, t2_id = INT_MIN; int f1_id = INT_MIN, lb_id = INT_MIN, ub_id = INT_MIN, cnt_id = INT_MIN; int nn_f1_id = INT_MIN, nn_lb_id = INT_MIN, nn_ub_id = INT_MIN; TBL_REC_TYPE t1_rec, t2_rec; FLD_REC_TYPE f1_rec, lb_rec, ub_rec, cnt_rec; FLD_REC_TYPE nn_f1_rec, nn_lb_rec, nn_ub_rec; long long nR1 = INT_MIN, nR2 = INT_MIN; long long *offsets = NULL; #define BUFLEN 32 char f1_buf[BUFLEN], lb_ub_buf[BUFLEN]; int ddir_id = -1, fileno = -1 ; FLD_TYPE cnt_fldtype = I4; int fldsz = 0; size_t filesz = 0; //---------------------------------------------------------------- if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( lb == NULL ) || ( *lb == '\0' ) ) { go_BYE(-1); } if ( ( ub == NULL ) || ( *ub == '\0' ) ) { go_BYE(-1); } if ( ( cnt == NULL ) || ( *cnt == '\0' ) ) { go_BYE(-1); } if ( strcmp(t1, t2) == 0 ) { go_BYE(-1); } if ( strcmp(lb, ub) == 0 ) { go_BYE(-1); } if ( strcmp(ub, cnt) == 0 ) { go_BYE(-1); } if ( strcmp(cnt, lb) == 0 ) { go_BYE(-1); } zero_string(f1_buf, BUFLEN); zero_string(lb_ub_buf, BUFLEN); //-------------------------------------------------------- status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); chk_range(t1_id, 0, g_n_tbl); nR1 = t1_rec.nR; if ( nR1 > INT_MAX ) { go_BYE(-1); } //-------------------------------------------------------- status = is_fld(NULL, t1_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); chk_range(f1_id, 0, g_n_fld); status = get_data(f1_rec, &f1_X, &f1_nX, 0); cBYE(status); if ( nn_f1_id >= 0 ) { go_BYE(-1); } if ( f1_rec.fldtype != I4 ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(t2, &t2_id, &t2_rec); cBYE(status); chk_range(t2_id, 0, g_n_tbl); nR2 = t2_rec.nR; //-------------------------------------------------------- status = is_fld(NULL, t2_id, lb, &lb_id, &lb_rec, &nn_lb_id, &nn_lb_rec); cBYE(status); chk_range(lb_id, 0, g_n_fld); status = get_data(lb_rec, &lb_X, &lb_nX, 0); cBYE(status); if ( nn_lb_id >= 0 ) { go_BYE(-1); } if ( lb_rec.fldtype != I4 ) { cBYE(-1); } //-------------------------------------------------------- status = is_fld(NULL, t2_id, ub, &ub_id, &ub_rec, &nn_ub_id, &nn_ub_rec); cBYE(status); chk_range(ub_id, 0, g_n_fld); status = get_data(ub_rec, &ub_X, &ub_nX, 0); cBYE(status); if ( nn_ub_id >= 0 ) { go_BYE(-1); } if ( ub_rec.fldtype != I4 ) { cBYE(-1); } //-------------------------------------------------------- /*-- Current implementation assumes that numbers sorted in particular * manner. This is checked below */ status = f_to_s(t1, f1, "is_sorted", f1_buf, BUFLEN); cBYE(status); if ( ( strcmp(f1_buf, "ascending") != 0 ) && ( strcmp(f1_buf, "descending") != 0 ) ) { fprintf(stderr , "fld [%s] in tbl [%s] should be sorted\n", f1, t1); go_BYE(-1); } status = f_to_s(t2, lb, "is_sorted", lb_ub_buf, BUFLEN); cBYE(status); if ( strcmp(lb_ub_buf, f1_buf) != 0 ) { fprintf(stderr , "Expect fld %s in tbl %s to be sorted ", lb, t2); fprintf(stderr , "same as fld %s in tbl %s\n", f1, t2); go_BYE(-1); } status = f_to_s(t2, ub, "is_sorted", lb_ub_buf, BUFLEN); cBYE(status); if ( strcmp(lb_ub_buf, f1_buf) != 0 ) { fprintf(stderr , "Expect fld %s in tbl %s to be sorted ", ub, t2); fprintf(stderr , "same as fld %s in tbl %s\n", f1, t2); go_BYE(-1); } /* Make space for output */ cnt_fldtype = I4; status = get_fld_sz(cnt_fldtype, &fldsz); cBYE(status); filesz = fldsz * nR2; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &cnt_X, &cnt_nX, 1); int *cntptr = (int *)cnt_X; assign_const_I4(cntptr, nR2, 0); long long chk_nR1 = 0; //----------------------------------------------------------- int *inptr = (int *)f1_X; int *lbptr = (int *)lb_X; int *ubptr = (int *)ub_X; int t2idx = 0; int lbval = lbptr[0]; int ubval = ubptr[0]; if ( strcmp(f1_buf, "ascending") == 0 ) { for ( long long i = 0; i < nR1; i++ ) { int valI4 = inptr[i]; if ( valI4 < lbval ) { continue; } for ( ; t2idx < nR2 ; ) { if ( valI4 < ubval ) { cntptr[t2idx]++; chk_nR1++; break; } t2idx++; if ( t2idx >= nR2 ) { break; } lbval = lbptr[t2idx]; ubval = ubptr[t2idx]; if ( valI4 < lbval ) { break; } } if ( t2idx == nR2 ) { break; } } } else if ( strcmp(f1_buf, "descending") == 0 ) { for ( long long i = 0; i < nR1; i++ ) { int valI4 = inptr[i]; if ( valI4 >= ubval ) { continue; } for ( ; t2idx < nR2 ; ) { if ( valI4 >= lbval ) { cntptr[t2idx]++; chk_nR1++; break; } t2idx++; if ( t2idx >= nR2 ) { break; } lbval = lbptr[t2idx]; ubval = ubptr[t2idx]; if ( valI4 >= ubval ) { break; } } if ( t2idx == nR2 ) { break; } } } else { go_BYE(-1); } sprintf(rslt_buf, "%lld", chk_nR1); // return number of matches zero_fld_rec(&cnt_rec); cnt_rec.fldtype = cnt_fldtype; status = add_fld(t2_id, cnt, ddir_id, fileno, &cnt_id, &cnt_rec); cBYE(status); BYE: rs_munmap(f1_X, f1_nX); rs_munmap(lb_X, lb_nX); rs_munmap(ub_X, ub_nX); rs_munmap(cnt_X, cnt_nX); free_if_non_null(offsets); return(status); }
/* START FUNC DECL */ int lkp_sort( char *t1, char *f1, char *t2, char *cnt, /* t2[i] is the number of times t1.f1 has value i */ /* Hence, t1.f1 < num_rows(t2) */ char *idx_f1, char *srt_f1 ) /* STOP FUNC DECL */ { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *srt_f1_X = NULL; size_t srt_f1_nX = 0; char *idx_f1_X = NULL; size_t idx_f1_nX = 0; char *cnt_X = NULL; size_t cnt_nX = 0; int t1_id = INT_MIN, t2_id = INT_MIN; int f1_id = INT_MIN, nn_f1_id = INT_MIN; int cnt_id = INT_MIN, nn_cnt_id = INT_MIN; int srt_f1_id = INT_MIN; int idx_f1_id = INT_MIN; TBL_REC_TYPE t1_rec, t2_rec; FLD_REC_TYPE f1_rec, nn_f1_rec; FLD_REC_TYPE cnt_rec, nn_cnt_rec; FLD_REC_TYPE srt_f1_rec, idx_f1_rec; FLD_TYPE srt_fldtype, idx_fldtype; long long nR1, nR2; long long *offsets = NULL; bool mk_idx; long long *idx_f1I8 = NULL; /*----------------------------------------------------------------*/ if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( cnt == NULL ) || ( *cnt == '\0' ) ) { go_BYE(-1); } if ( ( srt_f1 == NULL ) || ( *srt_f1 == '\0' ) ) { go_BYE(-1); } if ( idx_f1 == NULL ) { go_BYE(-1); } if ( strcmp(t1, t2) == 0 ) { go_BYE(-1); } if ( strcmp(f1, srt_f1) == 0 ) { go_BYE(-1); } if ( strcmp(f1, idx_f1) == 0 ) { go_BYE(-1); } if ( strcmp(srt_f1, idx_f1) == 0 ) { go_BYE(-1); } /*--------------------------------------------------------*/ status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); if ( t1_id < 0 ) { go_BYE(-1); } nR1 = t1_rec.nR; /*--------------------------------------------------------*/ status = is_fld(NULL, t1_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); if ( f1_id < 0 ) { go_BYE(-1); } status = get_data(f1_rec, &f1_X, &f1_nX, false); cBYE(status); /* Restrictions of current implementation */ if ( f1_rec.fldtype != I4 ) { go_BYE(-1); } if ( nn_f1_id >= 0 ) { go_BYE(-1); } int *f1I4 = (int *)f1_X; /*--------------------------------------------------------*/ status = is_tbl(t2, &t2_id, &t2_rec); cBYE(status); if ( t2_id < 0 ) { go_BYE(-1); } nR2 = t2_rec.nR; /*--------------------------------------------------------*/ status = is_fld(NULL, t2_id, cnt, &cnt_id, &cnt_rec, &nn_cnt_id, &nn_cnt_rec); cBYE(status); if ( cnt_id < 0 ) { go_BYE(-1); } status = get_data(cnt_rec, &cnt_X, &cnt_nX, false); cBYE(status); if ( cnt_rec.fldtype != I8 ) { go_BYE(-1); } if ( nn_cnt_id >= 0 ) { go_BYE(-1); } long long *cntI8 = (long long *)cnt_X; /*--------------------------------------------------------*/ /* Make output storage */ int fldsz = 0; size_t filesz = 0; int srt_ddir_id = -1, srt_fileno = -1; int idx_ddir_id = -1, idx_fileno = -1; srt_fldtype = f1_rec.fldtype; status = get_fld_sz(srt_fldtype, &fldsz); cBYE(status); filesz = nR1 * fldsz; status = mk_temp_file(filesz, &srt_ddir_id, &srt_fileno); cBYE(status); status = q_mmap(srt_ddir_id, srt_fileno, &srt_f1_X, &srt_f1_nX, true); cBYE(status); int *srt_f1I4 = (int *)srt_f1_X; if ( *idx_f1 != '\0' ) { mk_idx = true; idx_fldtype = I8; status = get_fld_sz(idx_fldtype, &fldsz); cBYE(status); filesz = nR1 * fldsz; status = mk_temp_file(filesz, &idx_ddir_id, &idx_fileno); cBYE(status); status = q_mmap(idx_ddir_id, idx_fileno, &idx_f1_X, &idx_f1_nX, true); cBYE(status); idx_f1I8 = (long long *)idx_f1_X; } else { mk_idx = false; } /*--------------------------------------------------------*/ offsets = malloc(nR2 * sizeof(long long)); return_if_malloc_failed(offsets); offsets[0] = 0; for ( int i = 1; i < nR2; i++ ) { offsets[i] = offsets[i-1] + cntI8[i-1] ; } //-------------------------------------------------------------- for ( long long i = 0; i < nR1; i++ ) { int I4val = f1I4[i]; if ( ( I4val < 0 ) || ( I4val >= nR2 ) ) { go_BYE(-1); } int offset = offsets[I4val]; if ( ( offset < 0 ) || ( offset >= nR1 ) ) { go_BYE(-1); } srt_f1I4[offset] = I4val; if ( mk_idx == true ) { idx_f1I8[offset] = i; } offsets[I4val] = offset + 1; } /*-----------------------------------------------------------*/ rs_munmap(srt_f1_X, srt_f1_nX); rs_munmap(idx_f1_X, idx_f1_nX); zero_fld_rec(&srt_f1_rec); srt_f1_rec.fldtype = srt_fldtype; status = add_fld(t1_id, srt_f1, srt_ddir_id, srt_fileno, &srt_f1_id, &srt_f1_rec); cBYE(status); if ( mk_idx == true ) { zero_fld_rec(&idx_f1_rec); idx_f1_rec.fldtype = idx_fldtype; status = add_fld(t1_id, idx_f1, idx_ddir_id, idx_fileno, &idx_f1_id, &idx_f1_rec); cBYE(status); } /*-----------------------------------------------------------*/ BYE: free_if_non_null(offsets); rs_munmap(f1_X, f1_nX); rs_munmap(srt_f1_X, srt_f1_nX); rs_munmap(idx_f1_X, idx_f1_nX); rs_munmap(cnt_X, cnt_nX); return(status); }
// last review 9/5/2013 //--------------------------------------------------------------- // START FUNC DECL int xfer( char *src_tbl, char *src_fld, char *dst_tbl, char *dst_idx, // this is an index into the source table char *dst_fld ) // STOP FUNC DECL { int status = 0; TBL_REC_TYPE src_tbl_rec, dst_tbl_rec; FLD_REC_TYPE src_fld_rec, dst_idx_meta, dst_fld_rec; FLD_REC_TYPE nn_src_fld_rec, nn_dst_idx_meta, nn_dst_fld_rec; char strbuf[32]; int dict_tbl_id; long long src_nR, dst_nR; char *src_fld_X = NULL; size_t src_fld_nX = 0; char *dst_fld_X = NULL; size_t dst_fld_nX = 0; char *dst_idx_X = NULL; size_t dst_idx_nX = 0; char *nn_src_fld_X = NULL; size_t nn_src_fld_nX = 0; char *nn_dst_idx_X = NULL; size_t nn_dst_idx_nX = 0; char *nn_dst_fld_X = NULL; size_t nn_dst_fld_nX = 0; int src_tbl_id = INT_MIN, dst_tbl_id = INT_MIN; int src_fld_id = INT_MIN, nn_src_fld_id = INT_MIN; int dst_idx_id = INT_MIN, nn_dst_idx_id = INT_MIN; int dst_fld_id = INT_MIN, nn_dst_fld_id = INT_MIN; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( dst_idx == NULL ) || ( *dst_idx == '\0' ) ) { go_BYE(-1); } if ( ( dst_fld == NULL ) || ( *dst_fld == '\0' ) ) { go_BYE(-1); } if ( strcmp(dst_idx, dst_fld) == 0 ) { go_BYE(-1); } //---------------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); src_nR = g_tbls[src_tbl_id].nR; status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id, &src_fld_rec, &nn_src_fld_id, &nn_src_fld_rec); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); status = get_data(src_fld_rec, &src_fld_X, &src_fld_nX, false); cBYE(status); status = is_tbl(dst_tbl, &dst_tbl_id, &dst_tbl_rec); cBYE(status); chk_range(dst_tbl_id, 0, g_n_tbl); dst_nR = g_tbls[dst_tbl_id].nR; status = is_fld(NULL, dst_tbl_id, dst_idx, &dst_idx_id, &dst_idx_meta, &nn_dst_idx_id, &nn_dst_idx_meta); cBYE(status); chk_range(dst_idx_id, 0, g_n_fld); status = get_data(dst_idx_meta, &dst_idx_X, &dst_idx_nX, false); cBYE(status); if ( nn_src_fld_id >= 0 ) { status = get_data(nn_src_fld_rec, &nn_src_fld_X, &nn_src_fld_nX, false); cBYE(status); } if ( nn_dst_idx_id >= 0 ) { status = get_data(nn_dst_idx_meta, &nn_dst_idx_X, &nn_dst_idx_nX, false); cBYE(status); } //---------------------------------------------------------------- // allocate space for output int fldsz = 0; size_t filesz = 0; int ddir_id = -1, fileno = -1; int nn_ddir_id = -1, nn_fileno = -1; if ( src_fld_rec.fldtype == SC ) { int len = src_fld_rec.len; if ( len <= 0 ) { go_BYE(-1); } filesz = (len+1) * dst_nR; } else if ( ( src_fld_rec.fldtype == SV ) ||( src_fld_rec.fldtype == B ) ) { fprintf(stderr, "NOT IMPLEMENTED\n"); go_BYE(-1); } else { status = get_fld_sz(src_fld_rec.fldtype, &fldsz); cBYE(status); filesz = fldsz * dst_nR; } status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &dst_fld_X, &dst_fld_nX, true); cBYE(status); if ( nn_src_fld_id < 0 ) { // No nulls in source means no nulls possible in destination } else { // may not need the nn field but will discover this only later status = get_fld_sz(I1, &fldsz); cBYE(status); filesz = fldsz * dst_nR; status = mk_temp_file(filesz, &nn_ddir_id, &nn_fileno); cBYE(status); status = q_mmap(nn_ddir_id, nn_fileno, &nn_dst_fld_X, &nn_dst_fld_nX,true); cBYE(status); } //---------------------------------------------------------------- switch ( src_fld_rec.fldtype ) { #include "incl_xfer_I1.c" #include "incl_xfer_I2.c" #include "incl_xfer_I4.c" #include "incl_xfer_I8.c" #include "incl_xfer_F4.c" #include "incl_xfer_F8.c" case SC : switch ( dst_idx_meta.fldtype ) { case I4 : xfer_SC_I4(src_fld_X, src_fld_rec.len, src_nR, dst_idx_X, nn_dst_idx_X, dst_nR, dst_fld_X); break; default : go_BYE(-1); break; } break; default : go_BYE(-1); break; } #define BUFLEN 32 char buf[BUFLEN], buf2[BUFLEN]; long long nn_dst_nR; zero_fld_rec(&dst_fld_rec); dst_fld_rec.fldtype = src_fld_rec.fldtype; status = add_fld(dst_tbl_id, dst_fld, ddir_id, fileno, &dst_fld_id, &dst_fld_rec); cBYE(status); // Find out whether an nn field is really needed status = vec_f_to_s(nn_dst_fld_X, I1, NULL, dst_nR, "sum", buf, BUFLEN); cBYE(status); status = read_nth_val(buf, ":", 0, buf2, BUFLEN); cBYE(status); status = stoI8(buf2, &nn_dst_nR); cBYE(status); if ( dst_nR == nn_dst_nR ) { // nn field not needed status = q_delete(nn_ddir_id, nn_fileno); cBYE(status); } else { zero_fld_rec(&nn_dst_fld_rec); nn_dst_fld_rec.fldtype = I1; status = add_aux_fld(NULL, dst_tbl_id, NULL, dst_fld_id, nn_ddir_id, nn_fileno, nn, &nn_dst_fld_id, &nn_dst_fld_rec); } status = int_get_meta(src_tbl_id, src_fld_id, "dict_tbl_id", strbuf); cBYE(status); status = stoI4(strbuf, &dict_tbl_id); cBYE(status); if ( dict_tbl_id >= 0 ) { status = int_set_meta(dst_tbl_id, dst_fld_id, "dict_tbl_id", strbuf, true); cBYE(status); } BYE: rs_munmap(src_fld_X, src_fld_nX); rs_munmap(dst_idx_X, dst_idx_nX); rs_munmap(dst_fld_X, dst_fld_nX); rs_munmap(nn_src_fld_X, nn_src_fld_nX); rs_munmap(nn_dst_idx_X, nn_dst_idx_nX); rs_munmap(nn_dst_fld_X, nn_dst_fld_nX); return status ; }
// START FUNC DECL int ext_add_fld( const char *tbl, const char *fld, const char *fldspec ) // STOP FUNC DECL { int status = 0; TBL_REC_TYPE tbl_rec; int tbl_id = -1; FLD_REC_TYPE fld_rec; int fld_id = -1; FLD_TYPE fldtype = undef_fldtype; bool is_null; int fileno = -1; int ddir_id = -1; size_t filesz = 0; int len = 0; char *inX = NULL; size_t in_nX = 0; char *opX = NULL; size_t op_nX = 0; char from_dir[MAX_LEN_DIR_NAME+1]; zero_string(from_dir, MAX_LEN_DIR_NAME+1); char ext_filename[MAX_LEN_EXT_FILE_NAME+1]; zero_string(ext_filename, MAX_LEN_EXT_FILE_NAME+1); char str_fldtype[32]; zero_string(str_fldtype, 32); status = is_tbl(tbl, &tbl_id, &tbl_rec); chk_range(tbl_id, 0, g_n_tbl); status = del_fld(NULL, tbl_id, fld, -1); cBYE(status); status = chk_aux_info(fldspec); cBYE(status); status = extract_S(fldspec, "file=[", "]", ext_filename, MAX_LEN_EXT_FILE_NAME, &is_null); if ( is_null ){ go_BYE(-1); } // Check that file name is not underscore followed by digits bool is_first_char_uscore = false; bool is_other_char_digit = true; if ( ext_filename[0] == '_' ) { is_first_char_uscore = true; } for ( char *cptr = ext_filename + 1; *cptr != '\0'; cptr++ ) { if ( !isdigit(*cptr) ) { is_other_char_digit = false; break; } } if ( ( is_first_char_uscore ) && ( is_other_char_digit ) ) { fprintf(stderr, "File name is not valid\n"); go_BYE(-1); } //---------------------------------------------- status = extract_S(fldspec, "fldtype=[", "]", str_fldtype, 32, &is_null); if ( is_null ){ go_BYE(-1); } status = unstr_fldtype(str_fldtype, &fldtype); cBYE(status); if ( fldtype == SC ) { status = extract_I4(fldspec, "len=[", "]", &len, &is_null); if ( is_null ){ go_BYE(-1); } } status = unstr_fldtype(str_fldtype, &fldtype); cBYE(status); // Import of field of type SV not implemented if ( fldtype == SV ) { go_BYE(-1); } status = extract_S(fldspec, "dir=[", "]", from_dir, MAX_LEN_DIR_NAME, &is_null); if ( is_null ){ strcpy(from_dir, g_cwd); } /* determine whether you have to "cp" or "mv" original file */ char buffer[8]; bool is_keep_original = true; status = extract_S(fldspec, "keep_original=[", "]", buffer, 8, &is_null); if ( is_null ) { is_keep_original = true; } else { if ( strcasecmp(buffer, "true") == 0 ) { is_keep_original = true; } else if ( strcasecmp(buffer, "false") == 0 ) { is_keep_original = false; } else { go_BYE(-1); } } // Move to directory where file exists status = chdir(from_dir); if ( status < 0 ) { fprintf(stderr, "Unable to cd to %s \n", from_dir); } cBYE(status); if ( file_exists(ext_filename) == false ) { fprintf(stderr, "File not found [%s] in [%s] \n", ext_filename, from_dir); go_BYE(-1); } // "mv" or "cp" input file to data directory and rename status = rs_mmap(ext_filename, &inX, &in_nX, 0); cBYE(status); status = mk_temp_file(in_nX, &ddir_id, &fileno); cBYE(status); if ( is_keep_original == true ) { status = q_mmap(ddir_id, fileno, &opX, &op_nX, 1); cBYE(status); memcpy(opX, inX, in_nX); } else { char newfile[MAX_LEN_DIR_NAME+1+MAX_LEN_FILE_NAME+1]; char *q_data_dir = g_ddirs[ddir_id].name; sprintf(newfile, "%s/_%d", q_data_dir, fileno); rename(ext_filename, newfile); } status = chdir(g_cwd); cBYE(status); zero_fld_rec(&fld_rec); fld_rec.fldtype = fldtype; fld_rec.fileno = fileno; fld_rec.ddir_id = ddir_id; fld_rec.filesz = filesz; fld_rec.len = len; // Check that file exists and is of correct size status = chk_file_size(tbl_rec.nR, fld_rec, &filesz); cBYE(status); status = add_fld(tbl_id, fld, ddir_id, fileno, &fld_id, &fld_rec); cBYE(status); BYE: rs_munmap(inX, in_nX); rs_munmap(opX, op_nX); chdir(g_cwd); return status ; }
//--------------------------------------------------------------- // START FUNC DECL int wisifxthenyelsez( char *tbl, char *w, char *x, char *y, char *z ) // STOP FUNC DECL { int status = 0; char *W = NULL; size_t nW = 0; char *X = NULL; size_t nX = 0; char *Y = NULL; size_t nY = 0; char *Z = NULL; size_t nZ = 0; char *nn_X = NULL; size_t nn_nX = 0; char *nn_W = NULL; size_t nn_nW = 0; char *nn_Y = NULL; size_t nn_nY = 0; char *nn_Z = NULL; size_t nn_nZ = 0; TBL_REC_TYPE tbl_rec; FLD_REC_TYPE x_rec, nn_x_rec; FLD_REC_TYPE y_rec, nn_y_rec; FLD_REC_TYPE z_rec, nn_z_rec; FLD_REC_TYPE w_rec, nn_w_rec; char opfile[MAX_LEN_FILE_NAME+1]; char nn_opfile[MAX_LEN_FILE_NAME+1]; int tbl_id = INT_MIN; int x_id = INT_MIN, nn_x_id = INT_MIN; int y_id = INT_MIN, nn_y_id = INT_MIN; int z_id = INT_MIN, nn_z_id = INT_MIN; int w_id = INT_MIN, nn_w_id = INT_MIN; long long nR; bool is_some_null = false; char *endptr; FLD_TYPE w_fldtype; int yvalI4 = INT_MAX; long long yvalI8 = LLONG_MAX; char yvalI1 = SCHAR_MAX; float yvalF4 = FLT_MAX; int zvalI4 = INT_MAX; long long zvalI8 = LLONG_MAX; char zvalI1 = SCHAR_MAX; float zvalF4 = FLT_MAX; //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( w == NULL ) || ( *w == '\0' ) ) { go_BYE(-1); } if ( ( x == NULL ) || ( *x == '\0' ) ) { go_BYE(-1); } if ( ( y == NULL ) || ( *y == '\0' ) ) { go_BYE(-1); } if ( ( z == NULL ) || ( *z == '\0' ) ) { go_BYE(-1); } if ( strcmp(w, x) == 0 ) { go_BYE(-1); } if ( strcmp(x, y) == 0 ) { go_BYE(-1); } if ( strcmp(y, z) == 0 ) { go_BYE(-1); } if ( strcmp(z, w) == 0 ) { go_BYE(-1); } if ( strcmp(y, w) == 0 ) { go_BYE(-1); } zero_string(opfile, (MAX_LEN_FILE_NAME+1)); zero_string(nn_opfile, (MAX_LEN_FILE_NAME+1)); zero_fld_rec(&y_rec); zero_fld_rec(&nn_y_rec); zero_fld_rec(&z_rec); zero_fld_rec(&nn_z_rec); //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id, &tbl_rec); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); nR = tbl_rec.nR; //-------------------------------------------------------- // Get info about field x status = is_fld(NULL, tbl_id, x, &x_id, &x_rec, &nn_x_id, &nn_x_rec); cBYE(status); chk_range(x_id, 0, g_n_fld); if ( x_rec.fldtype != I1 ) { go_BYE(-1); } status = get_data(x_rec, &X, &nX, 0); cBYE(status); if ( nn_x_id >= 0 ) { status = get_data(nn_x_rec, &nn_X, &nn_nX, 0); cBYE(status); } //-======================================================= // Get info about field y status = is_fld(NULL, tbl_id, y, &y_id, &y_rec, &nn_y_id, &nn_y_rec); cBYE(status); // Get info about field z status = is_fld(NULL, tbl_id, z, &z_id, &z_rec, &nn_z_id, &nn_z_rec); cBYE(status); if ( y_id < 0 ) { /* Then y is a scalar */ switch ( z_rec.fldtype ) { case I1 : yvalI8 = strtoll(y, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( ( yvalI8 < SCHAR_MIN ) || ( yvalI8 > SCHAR_MAX ) ) { go_BYE(-1); } yvalI1 = yvalI8; break; case I4 : yvalI8 = strtoll(y, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( ( yvalI8 < INT_MIN ) || ( yvalI8 > INT_MAX ) ) { go_BYE(-1); } yvalI4 = yvalI8; break; case I8 : yvalI8 = strtoll(y, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } break; case F4 : yvalI4 = strtod(y, &endptr); if ( *endptr != '\0' ) { go_BYE(-1); } break; case I2 : case clob : case F8 : default : go_BYE(-1); break; } } else { chk_range(y_id, 0, g_n_fld); status = get_data(y_rec, &Y, &nY, 0); cBYE(status); if ( nn_y_id >= 0 ) { status = get_data(nn_y_rec, &nn_Y, &nn_nY, 0); cBYE(status); } } //-======================================================= // Get info about field z status = is_fld(NULL, tbl_id, z, &z_id, &z_rec, &nn_z_id, &nn_z_rec); cBYE(status); if ( z_id < 0 ) { /* Then z is a scalar */ switch ( y_rec.fldtype ) { case I1 : zvalI8 = strtoll(z, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( ( zvalI8 < SCHAR_MIN ) || ( zvalI8 > SCHAR_MAX ) ) { go_BYE(-1); } zvalI1 = zvalI8; break; case I4 : zvalI8 = strtoll(z, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } if ( ( zvalI8 < INT_MIN ) || ( zvalI8 > INT_MAX ) ) { go_BYE(-1); } zvalI4 = zvalI8; break; case I8 : zvalI8 = strtoll(z, &endptr, 10); if ( *endptr != '\0' ) { go_BYE(-1); } break; case F4 : zvalI4 = strtod(z, &endptr); if ( *endptr != '\0' ) { go_BYE(-1); } break; case I2 : case clob : case F8 : default : go_BYE(-1); break; } } else { chk_range(z_id, 0, g_n_fld); status = get_data(z_rec, &Z, &nZ, 0); cBYE(status); if ( nn_z_id >= 0 ) { status = get_data(nn_z_rec, &nn_Z, &nn_nZ, 0); cBYE(status); } } if ( ( y_id >= 0 ) && ( z_id >= 0 ) ) { if ( y_rec.fldtype != z_rec.fldtype ) { go_BYE(-1); } } w_fldtype = xunknown; if ( y_id >= 0 ) { w_fldtype = y_rec.fldtype; } if ( z_id >= 0 ) { w_fldtype = z_rec.fldtype; } if ( ( y_id < 0 ) && ( z_id < 0 ) ) { go_BYE(-1); } //-------------------------------------------------------- int fldsz, ddir_id = INT_MAX, nn_ddir_id = INT_MAX; status = get_fld_sz(w_fldtype, &fldsz); status = mk_temp_file(opfile, (nR * fldsz), &ddir_id); status = q_mmap(ddir_id, opfile, &W, &nW, 1); cBYE(status); status = mk_temp_file(nn_opfile, (nR * sizeof(char)), &nn_ddir_id); status = q_mmap(nn_ddir_id, nn_opfile, &nn_W, &nn_nW, 1); cBYE(status); switch ( w_fldtype ) { case I4 : core_wisifxthenyelsez_I4(X, nn_X, nR, yvalI4, Y, nn_Y, zvalI4, Z, nn_Z, W, nn_W, &is_some_null); break; case I8 : core_wisifxthenyelsez_I8(X, nn_X, nR, yvalI8, Y, nn_Y, zvalI8, Z, nn_Z, W, nn_W, &is_some_null); break; case I1 : core_wisifxthenyelsez_I1(X, nn_X, nR, yvalI1, Y, nn_Y, zvalI1, Z, nn_Z, W, nn_W, &is_some_null); break; case F4 : core_wisifxthenyelsez_F4(X, nn_X, nR, yvalF4, Y, nn_Y, zvalF4, Z, nn_Z, W, nn_W, &is_some_null); break; case I2 : case F8 : default : go_BYE(-1); break; } status = add_fld(tbl_id, w, ddir_id, opfile, w_fldtype, -1, &w_id, &w_rec); cBYE(status); if ( is_some_null ) { status = add_aux_fld(NULL, tbl_id, NULL, w_id, nn_ddir_id, nn_opfile, "nn", &nn_w_id, &nn_w_rec); cBYE(status); } else { unlink(nn_opfile); } BYE: rs_munmap(X, nX); rs_munmap(Y, nY); rs_munmap(Z, nZ); rs_munmap(W, nW); rs_munmap(nn_Y, nn_nY); rs_munmap(nn_Z, nn_nZ); rs_munmap(nn_W, nn_nW); return(status); }
// TODO: What about duplicating strings? Does it work? //--------------------------------------------------------------- // START FUNC DECL int dup_fld( char *tbl, char *f1, char *f2 ) // STOP FUNC DECL { int status = 0; char *Y = NULL; size_t nY = 0; char *X = NULL; size_t nX = 0; char *nn_X = NULL; size_t nn_nX = 0; int tbl_id, f1_id, f2_id, nn_f1_id, nn_f2_id; int ddir_id = -1, fileno = -1; int nn_ddir_id = -1, nn_fileno = -1; TBL_REC_TYPE tbl_rec; FLD_REC_TYPE f1_rec, nn_f1_rec, f2_rec, nn_f2_rec; //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( strcmp(f1, f2) == 0 ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id, &tbl_rec); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); status = is_fld(NULL, tbl_id, f2, &f2_id, &f2_rec, &nn_f2_id, &nn_f2_rec); cBYE(status); if ( f2_id >= 0 ) { status = del_fld(NULL, tbl_id, NULL, f2_id); cBYE(status); } status = is_fld(NULL, tbl_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); chk_range(f1_id, 0, g_n_fld); // Open input file status = get_data(g_flds[f1_id], &X, &nX, false); cBYE(status); // Create a copy of the data status = mk_temp_file(nX, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &Y, &nY, 1); cBYE(status); memcpy(Y, X, nX); // Get empty spot for f2 status = get_empty_fld(tbl_id, f2, g_flds, g_n_fld, &f2_id); cBYE(status); // f2 is same as f1 except for data and "is_external" g_flds[f2_id] = g_flds[f1_id]; g_flds[f2_id].is_external = false; g_flds[f2_id].nn_fld_id = -1; status = chk_fld_name(f2); zero_string(g_flds[f2_id].name, MAX_LEN_FLD_NAME+1); g_flds[f2_id].fileno = -1; strcpy(g_flds[f2_id].name, f2); g_flds[f2_id].fileno = fileno; g_flds[f2_id].ddir_id = ddir_id; // Add to hash table //-------------------------------------------------------- // Add auxiliary field nn if if it exists nn_f1_id = g_flds[f1_id].nn_fld_id; if ( nn_f1_id >= 0 ) { // open input file status = get_data(g_flds[nn_f1_id], &nn_X, &nn_nX, false); cBYE(status); // create copy of data status = mk_temp_file(nn_nX, &nn_ddir_id, &nn_fileno); cBYE(status); status = q_mmap(nn_ddir_id, nn_fileno, &Y, &nY, true); memcpy(Y, nn_X, nn_nX); rs_munmap(Y, nY); rs_munmap(nn_X, nn_nX); nn_f1_id = INT_MIN; zero_fld_rec(&nn_f2_rec); nn_f2_rec.fldtype = nn_f1_rec.fldtype; status = add_aux_fld(NULL, tbl_id, NULL, f2_id, nn_ddir_id, nn_fileno, nn, &nn_f2_id, &nn_f2_rec); cBYE(status); } //-------------------------------------------------------- BYE: rs_munmap(X, nX); rs_munmap(nn_X, nn_nX); rs_munmap(Y, nY); return status ; }
/* START FUNC DECL */ int t1f1t2f2opt3f3( char *t1, char *f1, char *t2, char *f2, char *op, char *options, char *t3, char *f3 ) /* STOP FUNC DECL */ { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *f3_X = NULL; size_t f3_nX = 0; int t1_id = INT_MIN, t2_id = INT_MIN, t3_id = INT_MIN; int f1_id = INT_MIN, f2_id = INT_MIN, f3_id = INT_MIN; int nn_f1_id = INT_MIN, nn_f2_id = INT_MIN; TBL_REC_TYPE t1_rec, t2_rec, t3_rec; FLD_REC_TYPE f1_rec, nn_f1_rec; FLD_REC_TYPE f2_rec, nn_f2_rec; FLD_REC_TYPE f3_rec; FLD_TYPE f3_type = undef_fldtype; long long nR1 = INT_MIN, nR2 = INT_MIN; long long nR3 = INT_MIN, nR3_allocated = INT_MIN; int f3_fldsz = INT_MAX; size_t filesz; int ddir_id = -1, fileno = -1; /*----------------------------------------------------------------*/ if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( op == NULL ) || ( *op == '\0' ) ) { go_BYE(-1); } if ( ( f3 == NULL ) || ( *f3 == '\0' ) ) { go_BYE(-1); } if ( strcmp(t1, t3) == 0 ) { go_BYE(-1); } if ( ( t3 == NULL ) || ( *t3 == '\0' ) ) { go_BYE(-1); } if ( strcmp(t2, t3) == 0 ) { go_BYE(-1); } /*--------------------------------------------------------*/ status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); if ( t1_id < 0 ) { go_BYE(-1); } nR1 = t1_rec.nR; /*--------------------------------------------------------*/ status = is_fld(NULL, t1_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); cBYE(status); if ( f1_id < 0 ) { go_BYE(-1); } status = get_data(f1_rec, &f1_X, &f1_nX, false); cBYE(status); /*--------------------------------------------------------*/ status = is_tbl(t2, &t2_id, &t2_rec); cBYE(status); if ( t2_id < 0 ) { go_BYE(-1); } nR2 = t2_rec.nR; /*--------------------------------------------------------*/ status = is_fld(NULL, t2_id, f2, &f2_id, &f2_rec, &nn_f2_id, &nn_f2_rec); cBYE(status); if ( f2_id < 0 ) { go_BYE(-1); } status = get_data(f2_rec, &f2_X, &f2_nX, false); cBYE(status); /*--------------------------------------------------------*/ /* f1, f2 must be sorted ascending */ if ( f1_rec.srttype != ascending ) { go_BYE(-1); } if ( f2_rec.srttype != ascending ) { go_BYE(-1); } /* Have not implemented case where f1 has null field */ if ( nn_f1_id >= 0 ) { go_BYE(-1); } if ( nn_f2_id >= 0 ) { go_BYE(-1); } if ( f1_rec.fldtype != f2_rec.fldtype ) { go_BYE(-1); } switch ( f1_rec.fldtype ) { case I4 : case I8 : break; default : go_BYE(-1); break; } f3_type = f1_rec.fldtype; status = get_fld_sz(f3_type, &f3_fldsz); cBYE(status); if ( strcmp(op, "intersection") == 0 ) { nR3_allocated = min(nR1 , nR2); } else if ( strcmp(op, "a_minus_b") == 0 ) { nR3_allocated = nR1; /* upper bound. Truncate this later */ } else if ( ( strcmp(op, "union") == 0 ) || ( strcmp(op, "pvalcalc") == 0 ) ) { nR3_allocated = nR1 + nR2; /* nR1 + nR2 is upper bound. Reduce later */ } else { go_BYE(-1); } filesz = f3_fldsz * nR3_allocated; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &f3_X, &f3_nX, true); cBYE(status); if ( strcmp(op, "intersection") == 0 ) { switch ( f1_rec.fldtype ) { case I4 : status = intersection_I4((int *)f1_X, nR1, (int *)f2_X, nR2, (int *)f3_X, &nR3); break; case I8 : status = intersection_I8((long long *)f1_X, nR1, (long long *)f2_X, nR2, (long long *)f3_X, &nR3); break; default : go_BYE(-1); break; } cBYE(status); } else if ( strcmp(op, "a_minus_b") == 0 ) { switch ( f1_rec.fldtype ) { case I4 : a_minus_b_I4((int *)f1_X, nR1, (int *)f2_X, nR2, (int *)f3_X, &nR3); break; case I8 : a_minus_b_I8((long long *)f1_X, nR1, (long long *)f2_X, nR2, (long long *)f3_X, &nR3); break; default : go_BYE(-1); break; } cBYE(status); } else if ( ( strcmp(op, "union") == 0 ) || ( strcmp(op, "pvalcalc") == 0 ) ) { unsigned long long mask = 0; if ( strcmp(op, "pvalcalc") == 0 ) { bool is_null = false; status = extract_UI8(options, "mask=[", "]", &mask, &is_null); if ( is_null ) { go_BYE(-1); } } switch ( f1_rec.fldtype ) { case I4 : status = union_I4((int *)f1_X, nR1, (int *)f2_X, nR2, (int *)f3_X, &nR3, (unsigned int)mask); cBYE(status); break; case I8 : status = union_I8((long long *)f1_X, nR1, (long long *)f2_X, nR2, (long long *)f3_X, &nR3, (unsigned long long)mask); cBYE(status); break; default : go_BYE(-1); break; } } else { go_BYE(-1); } rs_munmap(f3_X, f3_nX); if ( nR3 == 0 ) { status = q_delete(ddir_id, fileno); cBYE(status); ddir_id = fileno = -1; go_BYE(0); } else if ( nR3 < nR3_allocated ) { status = q_trunc(ddir_id, fileno, (nR3 * f3_fldsz)); cBYE(status); } else if ( nR3 > nR3_allocated ) { go_BYE(-1); } char str_nR3[32]; sprintf(str_nR3, "%lld", nR3); status = add_tbl(t3, str_nR3, &t3_id, &t3_rec); cBYE(status); zero_fld_rec(&f3_rec); f3_rec.fldtype = f3_type; status = add_fld(t3_id, f3, ddir_id, fileno, &f3_id, &f3_rec); cBYE(status); /* Since f1 and f2 are sorted ascending, so must f3 be */ if ( ( strcmp(op, "union") == 0 ) || ( strcmp(op, "intersection") == 0 ) || ( strcmp(op, "a_minus_b") == 0 ) ) { status = int_set_meta(t3_id, f3_id, "srttype", "ascending", true); cBYE(status); } /*-----------------------------------------------------------*/ BYE: rs_munmap(f1_X, f1_nX); rs_munmap(f2_X, f2_nX); rs_munmap(f3_X, f3_nX); return status ; }
// last review 9/7/2013 //--------------------------------------------------------------- // START FUNC DECL int f1s1opf2( char *tbl, char *f1, char *str_scalar, char *op, char *f2 ) // STOP FUNC DECL { int status = 0; char *f1_X = NULL; size_t f1_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *nn_f1_X = NULL; size_t nn_f1_nX = 0; char *nn_f2_X = NULL; size_t nn_f2_nX = 0; long long nR; TBL_REC_TYPE tbl_rec; int tbl_id; FLD_REC_TYPE f1_rec, nn_f1_rec; int f1_id, nn_f1_id; FLD_REC_TYPE f2_rec, nn_f2_rec; int f2_id, nn_f2_id; int ddir_id = -1, fileno = -1; int nn_ddir_id = -1, nn_fileno = -1; FLD_TYPE f1_fldtype = undef_fldtype, f2_fldtype = undef_fldtype; //---------------------------------------------------------------- if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); } if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); } if ( ( str_scalar == NULL ) || ( *str_scalar == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( op == NULL ) || ( *op == '\0' ) ) { go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(tbl, &tbl_id, &tbl_rec); cBYE(status); chk_range(tbl_id, 0, g_n_tbl); nR = tbl_rec.nR; status = is_fld(NULL, tbl_id, f1, &f1_id, &f1_rec, &nn_f1_id, &nn_f1_rec); chk_range(f1_id, 0, g_n_fld); status = get_data(f1_rec, &f1_X, &f1_nX, false); cBYE(status); // Get nn field for f1 if if it exists if ( nn_f1_id >= 0 ) { status = get_data(nn_f1_rec, &nn_f1_X, &nn_f1_nX, false); cBYE(status); } f1_fldtype = f1_rec.fldtype; status = get_type_op_fld("f1s1opf2", op, f1_fldtype, f1_fldtype, "", &f2_fldtype); cBYE(status); int fldsz = INT_MAX; size_t filesz = LLONG_MAX; /*------------------------------------------ */ status = get_fld_sz(f2_fldtype, &fldsz); cBYE(status); filesz = fldsz * nR; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &f2_X, &f2_nX, true); cBYE(status); /*--------------------------------------------*/ if ( nn_f1_X == NULL ) { /* No need for nn file */ } else { status = get_fld_sz(I1, &fldsz); filesz = fldsz * nR; status = mk_temp_file(filesz, &nn_ddir_id, &nn_fileno); cBYE(status); status = q_mmap(nn_ddir_id, nn_fileno, &nn_f2_X, &nn_f2_nX, 1); cBYE(status); } //-------------------------------------------------------- if ( ( strcmp(op, "+") == 0 ) || ( strcmp(op, "-") == 0 ) || ( strcmp(op, "*") == 0 ) || ( strcmp(op, "/") == 0 ) || ( strcmp(op, "%") == 0 ) || ( strcmp(op, "&") == 0 ) || ( strcmp(op, "|") == 0 ) || ( strcmp(op, "^") == 0 ) || ( strcmp(op, ">") == 0 ) || ( strcmp(op, "<") == 0 ) || ( strcmp(op, ">=") == 0 ) || ( strcmp(op, "<=") == 0 ) || ( strcmp(op, "!=") == 0 ) || ( strcmp(op, "==") == 0 ) || ( strcmp(op, "<<") == 0 ) || ( strcmp(op, ">>") == 0 ) || ( strcmp(op, "<||>") == 0 ) || ( strcmp(op, "<=||>=") == 0 ) || ( strcmp(op, ">&&<") == 0 ) || ( strcmp(op, ">=&&<=") == 0 ) ) { // all is well status = vec_f1s1opf2(nR, f1_rec.fldtype, f1_X, nn_f1_X, str_scalar, op, f2_X, nn_f2_X, f2_fldtype); cBYE(status); zero_fld_rec(&f2_rec); f2_rec.fldtype = f2_fldtype; status = add_fld(tbl_id, f2, ddir_id, fileno, &f2_id, &f2_rec); cBYE(status); if ( nn_fileno > 0 ) { /* there is an nn file */ zero_fld_rec(&nn_f2_rec); nn_f2_rec.fldtype = I1; status = add_aux_fld(NULL, tbl_id, NULL, f2_id, nn_ddir_id, nn_fileno, nn, &nn_f2_id, &nn_f2_rec); cBYE(status); } } else { fprintf(stderr, "Invalid op = [%s] \n", op); go_BYE(-1); } BYE: rs_munmap(f1_X, f1_nX); rs_munmap(nn_f1_X, nn_f1_nX); rs_munmap(f2_X, f2_nX); rs_munmap(nn_f2_X, nn_f2_nX); return status ; }
//--------------------------------------------------------------- // START FUNC DECL int countf( char *src_tbl, char *src_fld, char *fk_dst, char *cfld, char *dst_tbl, char *cnt_fld, char *options ) // STOP FUNC DECL { int status = 0; char *fk_dst_X = NULL; size_t fk_dst_nX = 0; char *op_X = NULL; size_t op_nX = 0; char *cfld_X = NULL; size_t cfld_nX = 0; char *src_fld_X = NULL; size_t src_fld_nX = 0; TBL_REC_TYPE src_tbl_rec; int src_tbl_id = -1; TBL_REC_TYPE dst_tbl_rec; int dst_tbl_id = -1; FLD_REC_TYPE fk_dst_rec; int fk_dst_id = -1; FLD_REC_TYPE nn_fk_dst_rec; int nn_fk_dst_id; FLD_REC_TYPE src_fld_rec; int src_fld_id = -1; FLD_REC_TYPE nn_src_fld_rec; int nn_src_fld_id; FLD_REC_TYPE cfld_rec; int cfld_id = -1; FLD_REC_TYPE nn_cfld_rec; int nn_cfld_id; FLD_REC_TYPE cnt_fld_rec; int cnt_fld_id; bool is_safe = true; long long block_size = 0; int nT = 0; long long **partial_cntI8 = NULL; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( fk_dst == NULL ) || ( *fk_dst == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( ( cnt_fld == NULL ) || ( *cnt_fld == '\0' ) ) { go_BYE(-1); } //-------------------------------------------------------- if ( ( options != NULL ) && ( *options != '\0' ) ) { #define BUFLEN 8 bool is_null; char buffer[BUFLEN]; zero_string(buffer, BUFLEN); status = extract_S(options, "safe_mode=[", "]", buffer, BUFLEN, &is_null); if ( is_null ) { go_BYE(-1); } if ( strcasecmp(buffer, "true") == 0 ) { is_safe = true; } else if ( strcasecmp(buffer, "false") == 0 ) { is_safe = false; } else { go_BYE(-1); } } status = is_tbl(dst_tbl, &dst_tbl_id, &dst_tbl_rec); cBYE(status); chk_range(dst_tbl_id, 0, g_n_tbl); long long dst_nR = g_tbls[dst_tbl_id].nR; if ( dst_nR >= INT_MAX ) { go_BYE(-1); } status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); chk_range(src_tbl_id, 0, g_n_tbl); long long src_nR = g_tbls[src_tbl_id].nR; if ( src_nR >= INT_MAX ) { go_BYE(-1); } status = is_fld(NULL, src_tbl_id, fk_dst, &fk_dst_id, &fk_dst_rec, &nn_fk_dst_id, &nn_fk_dst_rec); cBYE(status); chk_range(fk_dst_id, 0, g_n_fld); status = get_data(fk_dst_rec, &fk_dst_X, &fk_dst_nX, 0); cBYE(status); if ( nn_fk_dst_id >= 0 ) { go_BYE(-1); } status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id, &src_fld_rec, &nn_src_fld_id, &nn_src_fld_rec); cBYE(status); chk_range(src_fld_id, 0, g_n_fld); status = get_data(src_fld_rec, &src_fld_X, &src_fld_nX, 0); cBYE(status); /* if src_fld has a nn field, then cannot have a cfld */ if ( nn_src_fld_id >= 0 ) { if ( ( cfld != NULL ) && ( *cfld != '\0' ) ) { go_BYE(-1); } status = get_data(nn_src_fld_rec, &cfld_X, &cfld_nX, 0); cBYE(status); } if ( ( src_fld_rec.fldtype != I4 ) && ( src_fld_rec.fldtype != I8 ) ) { go_BYE(-1); } int *I4_src_ptr = (int *)src_fld_X; if ( ( cfld != NULL ) && ( *cfld != '\0' ) ) { status = is_fld(NULL, src_tbl_id, cfld, &cfld_id, &cfld_rec, &nn_cfld_id, &nn_cfld_rec); if ( cfld_id < 0 ) { go_BYE(-1); } if ( cfld_rec.fldtype != I1 ) { go_BYE(-1); } if ( nn_cfld_id >= 0 ) { go_BYE(-1); } status = get_data(cfld_rec, &cfld_X, &cfld_nX, 0); cBYE(status); } //------------------------------------------------------ FLD_TYPE cnt_fldtype = I8; int ddir_id = -1, fileno = -1; int cnt_fldsz = -1; size_t filesz = 0; status = get_fld_sz(cnt_fldtype, &cnt_fldsz); cBYE(status); filesz = dst_nR * cnt_fldsz; status = mk_temp_file(filesz, &ddir_id, &fileno); cBYE(status); status = q_mmap(ddir_id, fileno, &op_X, &op_nX, true); cBYE(status); // START: Initialize destination count to 0 block_size = 0; long long *cntI8 = (long long *)op_X; status = partition(dst_nR, 4096, -1, &block_size, &nT); cBYE(status); if ( nT > 1 ) { #pragma omp parallel for for ( int tid = 0; tid < nT; tid++ ) { long long lb = tid * block_size; long long ub = lb + block_size; if ( tid == (nT-1) ) { ub = dst_nR; } #ifdef IPP ippsZero_64s(cntI8+lb, (ub-lb)); #else assign_const_I8(cntI8+lb, (ub-lb), 0); #endif } } else { #ifdef IPP ippsZero_64s(cntI8, dst_nR); #else assign_const_I8(cntI8, dst_nR, 0); #endif } // STOP: Initialize destination count to 0 nT = 0; // reset necessary for free of partial_cntI8 block_size = 0; //------------------------------------------------------ bool is_serial = true; if ( ( src_nR > 1048576 ) && ( ( src_nR / dst_nR ) > 64 ) ) { is_serial = false; nT = 2; for ( ; ; nT *= 2 ) { if ( nT >= (1*g_num_cores) ) { break; } if ( ( dst_nR * nT * 32 ) > src_nR ) { break; } } partial_cntI8 = malloc(nT * sizeof(long long *)); return_if_malloc_failed(partial_cntI8); for ( int i = 0; i < nT; i++ ) { partial_cntI8[i] = NULL; partial_cntI8[i] = malloc(dst_nR * sizeof(long long)); return_if_malloc_failed(partial_cntI8[i]); } block_size = src_nR / nT; } if ( is_serial == true ) { if ( cfld_id >= 0 ) { switch ( fk_dst_rec.fldtype ) { case I1 : status = countf_nn_I1(I4_src_ptr, (char *)fk_dst_X, src_nR, cfld_X, cntI8, dst_nR, is_safe); cBYE(status); break; case I2 : status = countf_nn_I2(I4_src_ptr, (short *)fk_dst_X, src_nR, cfld_X, cntI8, dst_nR, is_safe); cBYE(status); break; case I4 : status = countf_nn_I4(I4_src_ptr, (int *)fk_dst_X, src_nR, cfld_X, cntI8, dst_nR, is_safe); cBYE(status); break; case I8 : status = countf_nn_I8(I4_src_ptr, (long long *)fk_dst_X, src_nR, cfld_X, cntI8, dst_nR, is_safe); cBYE(status); break; default : go_BYE(-1); break; } } else { switch ( fk_dst_rec.fldtype ) { case I1 : status = countf_I1(I4_src_ptr, (char *)fk_dst_X, src_nR, cntI8, dst_nR, is_safe); cBYE(status); break; case I2 : status = countf_I2(I4_src_ptr, (short *)fk_dst_X, src_nR, cntI8, dst_nR, is_safe); cBYE(status); break; case I4 : status = countf_I4(I4_src_ptr, (int *)fk_dst_X, src_nR, cntI8, dst_nR, is_safe); cBYE(status); break; case I8 : status = countf_I8(I4_src_ptr, (long long *)fk_dst_X, src_nR, cntI8, dst_nR, is_safe); cBYE(status); break; default : go_BYE(-1); break; } } } else { char *inI1 = (char *) fk_dst_X; short *inI2 = (short *) fk_dst_X; int *inI4 = (int *) fk_dst_X; long long *inI8 = (long long *) fk_dst_X; #pragma omp parallel for for ( int tid = 0; tid < nT; tid++ ) { assign_const_I8(partial_cntI8[tid], dst_nR, 0); long long lb = tid * block_size; long long ub = lb + block_size; if ( tid == (nT-1) ) { ub = src_nR; } char *t_inI1 = inI1 + lb; short *t_inI2 = inI2 + lb; int *t_inI4 = inI4 + lb; long long *t_inI8 = inI8 + lb; int *t_src_I4 = I4_src_ptr + lb; long long t_src_nR = ub - lb; if ( status == -1 ) { continue; } if ( cfld_id >= 0 ) { char *t_cfld = cfld_X + lb; switch ( fk_dst_rec.fldtype ) { case I1 : status = countf_nn_I1(t_src_I4, t_inI1, t_src_nR, t_cfld, partial_cntI8[tid], dst_nR, is_safe); break; case I2 : status = countf_nn_I2(t_src_I4, t_inI2, t_src_nR, t_cfld, partial_cntI8[tid], dst_nR, is_safe); break; case I4 : status = countf_nn_I4(t_src_I4, t_inI4, t_src_nR, t_cfld, partial_cntI8[tid], dst_nR, is_safe); break; case I8 : status = countf_nn_I8(t_src_I4, t_inI8, t_src_nR, t_cfld, partial_cntI8[tid], dst_nR, is_safe); break; default : status = -1; break; } } else { switch ( fk_dst_rec.fldtype ) { case I1 : status = countf_I1(t_src_I4, t_inI1, t_src_nR, partial_cntI8[tid], dst_nR, is_safe); break; case I2 : status = countf_I2(t_src_I4, t_inI2, t_src_nR, partial_cntI8[tid], dst_nR, is_safe); break; case I4 : status = countf_I4(t_src_I4, t_inI4, t_src_nR, partial_cntI8[tid], dst_nR, is_safe); break; case I8 : status = countf_I8(t_src_I4, t_inI8, t_src_nR, partial_cntI8[tid], dst_nR, is_safe); break; default : status = -1; break; } } } for ( int tid = 0; tid < nT; tid++ ) { long long *partial_cntI8_tid = partial_cntI8[tid]; for ( int j = 0; j < dst_nR; j++ ) { cntI8[j] += partial_cntI8_tid[j]; } } } rs_munmap(op_X, op_nX); zero_fld_rec(&cnt_fld_rec); cnt_fld_rec.fldtype = cnt_fldtype; status = add_fld(dst_tbl_id, cnt_fld, ddir_id, fileno, &cnt_fld_id, &cnt_fld_rec); cBYE(status); BYE: if ( nT > 0 ) { for ( int i = 0; i < nT; i++ ) { free_if_non_null(partial_cntI8[i]); } free_if_non_null(partial_cntI8); } rs_munmap(fk_dst_X, fk_dst_nX); rs_munmap(op_X, op_nX); rs_munmap(cfld_X, cfld_nX); rs_munmap(src_fld, src_fld_nX); return(status); }
/* START FUNC DECL */ int find_conns( char *t1, char *flb, char *fub, char *t2, char *f2, char *t3, char *f3 ) /* STOP FUNC DECL */ { int status = 0; char *flb_X = NULL; size_t flb_nX = 0; char *fub_X = NULL; size_t fub_nX = 0; char *nn_flb_X = NULL; size_t nn_flb_nX = 0; char *nn_fub_X = NULL; size_t nn_fub_nX = 0; char *f2_X = NULL; size_t f2_nX = 0; char *f3_X = NULL; size_t f3_nX = 0; int t1_id = INT_MIN, t2_id = INT_MIN, t3_id = INT_MIN; int flb_id = INT_MIN, fub_id = INT_MIN, f2_id = INT_MIN, f3_id = INT_MIN; int nn_flb_id = INT_MIN, nn_fub_id = INT_MIN, nn_f2_id = INT_MIN, nn_f3_id = INT_MIN; TBL_REC_TYPE t1_rec, t2_rec, t3_rec; FLD_REC_TYPE flb_rec, nn_flb_rec; FLD_REC_TYPE fub_rec, nn_fub_rec; FLD_REC_TYPE f2_rec, nn_f2_rec; FLD_REC_TYPE f3_rec, nn_f3_rec; long long nR3; long long nR1 = INT_MIN, nR2 = INT_MIN; char opfile[MAX_LEN_FILE_NAME+1]; char nn_opfile[MAX_LEN_FILE_NAME+1]; int ddir_id = INT_MAX; /*----------------------------------------------------------------*/ if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); } if ( ( flb == NULL ) || ( *flb == '\0' ) ) { go_BYE(-1); } if ( ( fub == NULL ) || ( *fub == '\0' ) ) { go_BYE(-1); } if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); } if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); } if ( ( t3 == NULL ) || ( *t3 == '\0' ) ) { go_BYE(-1); } if ( ( f3 == NULL ) || ( *f3 == '\0' ) ) { go_BYE(-1); } if ( strcmp(flb, fub) == 0 ) { go_BYE(-1); } if ( strcmp(t1, t3) == 0 ) { go_BYE(-1); } if ( strcmp(t2, t3) == 0 ) { go_BYE(-1); } zero_string(opfile, (MAX_LEN_FILE_NAME+1)); zero_string(nn_opfile, (MAX_LEN_FILE_NAME+1)); /*--------------------------------------------------------*/ status = is_tbl(t1, &t1_id, &t1_rec); cBYE(status); chk_range(t1_id, 0, g_n_tbl); nR1 = g_tbls[t1_id].nR; /*--------------------------------------------------------*/ status=is_fld(NULL, t1_id, flb, &flb_id, &flb_rec, &nn_flb_id, &nn_flb_rec); cBYE(status); chk_range(flb_id, 0, g_n_fld); status = get_data(flb_rec, &flb_X, &flb_nX, false); cBYE(status); if ( nn_flb_id >= 0 ) { status = get_data(nn_flb_rec, &nn_flb_X, &nn_flb_nX, false); cBYE(status); } if ( flb_rec.fldtype != I8 ) { go_BYE(-1); } long long *flbI8 = (long long *)flb_X; /*--------------------------------------------------------*/ status=is_fld(NULL, t1_id, fub, &fub_id, &fub_rec, &nn_fub_id, &nn_fub_rec); cBYE(status); chk_range(fub_id, 0, g_n_fld); status = get_data(fub_rec, &fub_X, &fub_nX, false); cBYE(status); if ( nn_fub_id >= 0 ) { status = get_data(nn_fub_rec, &nn_fub_X, &nn_fub_nX, false); cBYE(status); } if ( fub_rec.fldtype != I8 ) { go_BYE(-1); } long long *fubI8 = (long long *)fub_X; /*--------------------------------------------------------*/ status = is_tbl(t2, &t2_id, &t2_rec); cBYE(status); chk_range(t2_id, 0, g_n_tbl); nR2 = g_tbls[t2_id].nR; /*--------------------------------------------------------*/ status = is_fld(NULL, t2_id, f2, &f2_id, &f2_rec, &nn_f2_id, &nn_f2_rec); cBYE(status); chk_range(f2_id, 0, g_n_fld); status = get_data(f2_rec, &f2_X, &f2_nX, false); cBYE(status); if ( nn_f2_id >= 0 ) { go_BYE(-1); } if ( f2_rec.fldtype != I4 ) { go_BYE(-1); } int *f2I4 = (int *)f2_X; /*--------------------------------------------------------*/ status = is_tbl(t3, &t3_id, &t3_rec); cBYE(status); chk_range(t3_id, 0, g_n_tbl); nR3 = g_tbls[t3_id].nR; /*--------------------------------------------------------*/ /* Make space for output if necessary */ int *f3I4 = NULL; status = is_fld(NULL, t3_id, f3, &f3_id, &f3_rec, &nn_f3_id, &nn_f3_rec); cBYE(status); if ( f3_id >= 0 ) { status = get_data(f3_rec, &f3_X, &f3_nX, true); cBYE(status); if ( nn_f3_id >= 0 ) { go_BYE(-1); } f3I4 = (int *)f3_X; } else { status = mk_temp_file(opfile, nR3 * sizeof(int), &ddir_id); cBYE(status); status = q_mmap(ddir_id, opfile, &f3_X, &f3_nX, true); cBYE(status); f3I4 = (int *)f3_X; #ifdef IPP ippsSet_32s(0, f3I4, nR3); // TODO: P3: cilkfor? #else assign_const_I4(f3I4, nR3, 0); #endif status = add_fld(t3_id, f3, ddir_id, opfile, I4, -1, &f3_id, &f3_rec); cBYE(status); } core_find_conns(flbI8, fubI8, nR1, nn_flb_X, nn_fub_X, f2I4, nR2, f3I4, nR3); rs_munmap(f3_X, f3_nX); /*-----------------------------------------------------------*/ BYE: rs_munmap(flb_X, flb_nX); rs_munmap(fub_X, fub_nX); rs_munmap(nn_flb_X, nn_flb_nX); rs_munmap(nn_fub_X, nn_fub_nX); rs_munmap(f2_X, f2_nX); rs_munmap(f3_X, f3_nX); return(status); }
//--------------------------------------------------------------- // START FUNC DECL int percentiles( char *src_tbl, char *src_fld, char *dst_tbl, char *str_n_out ) // STOP FUNC DECL { int status = 0; char *src_fld_X = NULL; size_t src_fld_nX = 0; TBL_REC_TYPE src_tbl_rec; int src_tbl_id = -1; long long src_nR = -1; TBL_REC_TYPE dst_tbl_rec; int dst_tbl_id = -1; long long dst_nR = -1; FLD_REC_TYPE src_fld_rec; int src_fld_id = -1; FLD_REC_TYPE nn_src_fld_rec; int nn_src_fld_id = -1; FLD_REC_TYPE min_rec; int min_id = -1; FLD_REC_TYPE max_rec; int max_id = -1; FLD_REC_TYPE avg_rec; int avg_id = -1; FLD_REC_TYPE cnt_rec; int cnt_id = -1; char *minX = NULL; size_t min_nX = 0; int min_ddir_id = -1, min_fileno = 0; char *maxX = NULL; size_t max_nX = 0; int max_ddir_id = -1, max_fileno = 0; char *cntX = NULL; size_t cnt_nX = 0; int cnt_ddir_id = -1, cnt_fileno = 0; char *avgX = NULL; size_t avg_nX = 0; int avg_ddir_id = -1, avg_fileno = 0; int fldsz = 0; long long filesz = 0; int bin_size; //---------------------------------------------------------------- if ( ( src_tbl == NULL ) || ( *src_tbl == '\0' ) ) { go_BYE(-1); } if ( ( src_fld == NULL ) || ( *src_fld == '\0' ) ) { go_BYE(-1); } if ( ( dst_tbl == NULL ) || ( *dst_tbl == '\0' ) ) { go_BYE(-1); } if ( strcmp(src_tbl, dst_tbl) == 0 ) { go_BYE(-1); } //-------------------------------------------------------- status = stoI8(str_n_out, &dst_nR); cBYE(status); if ( ( dst_nR >= MAX_BINS_FOR_PERCENTILE ) || ( dst_nR <= 1 ) ) { fprintf(stderr, "num_bins = %s not in valid range [2, %d] \n", str_n_out, MAX_BINS_FOR_PERCENTILE); go_BYE(-1); } //-------------------------------------------------------- status = is_tbl(src_tbl, &src_tbl_id, &src_tbl_rec); cBYE(status); if (src_tbl_id < 0 ) { go_BYE(-1); } src_nR = src_tbl_rec.nR; if ( dst_nR >= src_nR ) { fprintf(stderr, "Source Table [%s] has insufficient rows [%lld]\n", src_tbl, src_nR); go_BYE(-1); } status = is_fld(NULL, src_tbl_id, src_fld, &src_fld_id, &src_fld_rec, &nn_src_fld_id, &nn_src_fld_rec); if ( src_fld_id < 0 ) { go_BYE(-1); } switch ( src_fld_rec.fldtype ) { case I4 : case I8 : case F4 : case F8 : break; default : go_BYE(-1); break; } if ( nn_src_fld_id >= 0 ) { go_BYE(-1); } /* Make sure src_fld field is sorted ascending */ char srttype[32]; zero_string(srttype, 32); status = f_to_s(src_tbl, src_fld, "is_sorted", srttype, 32); cBYE(status); if ( strcmp(srttype, "ascending") != 0 ) { fprintf(stderr, "Field [%s] in Table [%s] not sorted ascending\n", src_fld, src_tbl); go_BYE(-1); } //-------------------------------------------------------- status = get_data(src_fld_rec, &src_fld_X, &src_fld_nX, 0); cBYE(status); //-------------------------------------------------------- // Create 4 files for the 4 fields to be created // min, max, cnt, avg int n1, n2; status = get_fld_sz(I8, &n1); cBYE(status); status = get_fld_sz(F8, &n2); cBYE(status); if ( n1 != n2 ) { go_BYE(-1); } status = get_fld_sz(I8, &fldsz); cBYE(status); filesz = fldsz * dst_nR; status = mk_temp_file(filesz, &min_ddir_id, &min_fileno); cBYE(status); status = mk_temp_file(filesz, &max_ddir_id, &max_fileno); cBYE(status); status = mk_temp_file(filesz, &cnt_ddir_id, &cnt_fileno); cBYE(status); status = get_fld_sz(F8, &fldsz); cBYE(status); filesz = fldsz * dst_nR; status = mk_temp_file(filesz, &avg_ddir_id, &avg_fileno); cBYE(status); status = q_mmap(min_ddir_id, min_fileno, &minX, &min_nX, 1); cBYE(status); status = q_mmap(max_ddir_id, max_fileno, &maxX, &max_nX, 1); cBYE(status); status = q_mmap(cnt_ddir_id, cnt_fileno, &cntX, &cnt_nX, 1); cBYE(status); status = q_mmap(avg_ddir_id, avg_fileno, &avgX, &avg_nX, 1); cBYE(status); //------------------------------------------------------ /* Delete table if it exists. Create brand new table */ status = is_tbl(dst_tbl, &dst_tbl_id , &dst_tbl_rec); cBYE(status); if ( dst_tbl_id >= 0 ) { status = del_tbl(NULL, dst_tbl_id); cBYE(status); } zero_tbl_rec(&dst_tbl_rec); status = add_tbl(dst_tbl, str_n_out, &dst_tbl_id, &dst_tbl_rec); cBYE(status); //----------------------------------------------------------- // START: Here starts the processing /* b is the bin number */ long long *minI8 = (long long *)minX; long long *maxI8 = (long long *)maxX; double *minF8 = (double *)minX; double *maxF8 = (double *)maxX; long long *cntI8 = (long long *)cntX; double *avgF8 = (double *)avgX; bin_size = src_nR / dst_nR; cilkfor ( int b = 0; b < dst_nR; b++ ) { long long lb = b * bin_size; long long ub = lb + bin_size; if ( b == ( dst_nR -1 ) ) { ub = src_nR; } int *valsI4 = (int *) src_fld_X; valsI4 += lb; float *valsF4 = (float *) src_fld_X; valsF4 += lb; long long *valsI8 = (long long *) src_fld_X; valsI8 += lb; double *valsF8 = (double *) src_fld_X; valsF8 += lb; //----------------------------------------------- double lvalF8 = 0; long long lvalI8 = 0; double lsumF8 = 0; long long lsumI8 = 0; long long lminI8 = LLONG_MAX, lmaxI8 = LLONG_MIN; double lminF8 = DBL_MIN, lmaxF8 = DBL_MIN; //----------------------------------------------- for ( long long i = 0; i < (ub - lb); i++ ) { switch ( src_fld_rec.fldtype ) { case I4 : lvalI8 = valsI4[i]; break; case I8 : lvalI8 = valsI8[i]; break; case F4 : lvalF8 = valsI4[i]; break; case F8 : lvalF8 = valsI8[i]; break; default : status = -1; continue; break; } switch ( src_fld_rec.fldtype ) { case I4 : case I8 : lminI8 = min(lminI8 , lvalI8); lmaxI8 = max(lmaxI8 , lvalI8); lsumI8 += lvalI8; break; case F4 : case F8 : lminF8 = min(lminF8 , lvalF8); lmaxF8 = max(lmaxF8 , lvalF8); lsumF8 += lvalF8; break; default : status = -1; continue; break; } } // Write out the values cntI8[b] = (ub - lb); double n = ub - lb; switch ( src_fld_rec.fldtype ) { case I4 : case I8 : minI8[b] = lminI8; maxI8[b] = lmaxI8; avgF8[b] = (double)lsumI8 / n; break; case F4 : case F8 : minF8[b] = lminF8; maxF8[b] = lmaxF8; avgF8[b] = (double)lsumF8 / n; break; default : status = -1; continue; break; } } //----------------------------------------------------------- status = add_tbl(dst_tbl, str_n_out, &dst_tbl_id, &dst_tbl_rec); cBYE(status); // Add output field(s) to meta data zero_fld_rec(&min_rec); zero_fld_rec(&max_rec); switch ( src_fld_rec.fldtype ) { case I4 : case I8 : min_rec.fldtype = I8; max_rec.fldtype = I8; break; case F4 : case F8 : min_rec.fldtype = F8; max_rec.fldtype = F8; break; default : go_BYE(-1); break; } status = add_fld(dst_tbl_id, "min", min_ddir_id, min_fileno, &min_id, &min_rec); cBYE(status); status = add_fld(dst_tbl_id, "max", max_ddir_id, max_fileno, &max_id, &max_rec); cBYE(status); zero_fld_rec(&avg_rec); avg_rec.fldtype = F8; status = add_fld(dst_tbl_id, "avg", avg_ddir_id, avg_fileno, &avg_id, &avg_rec); cBYE(status); zero_fld_rec(&cnt_rec); cnt_rec.fldtype = I8; status = add_fld(dst_tbl_id, "cnt", cnt_ddir_id, cnt_fileno, &cnt_id, &cnt_rec); cBYE(status); //----------------------------------------------------------- BYE: rs_munmap(src_fld_X, src_fld_nX); rs_munmap(minX, min_nX); rs_munmap(maxX, max_nX); rs_munmap(cntX, cnt_nX); rs_munmap(avgX, avg_nX); return(status); }