static str renderTerm(MalBlkPtr mb, MalStkPtr stk, InstrPtr p, int idx, int flg) { char *buf =0; char *nme =0; int nameused = 0; size_t len = 0, maxlen = BUFSIZ; ValRecord *val = 0; char *cv =0; str tpe; int showtype = 0, closequote=0; int varid = getArg(p,idx); buf = GDKzalloc(maxlen); if( buf == NULL) { addMalException(mb, "renderTerm:Failed to allocate"); return NULL; } // show the name when required or is used if ((flg & LIST_MAL_NAME) && !isVarConstant(mb,varid) && !isVarTypedef(mb,varid)) { nme = getVarName(mb,varid); len +=snprintf(buf, maxlen, "%s", nme); nameused =1; } // show the value when required or being a constant if( ((flg & LIST_MAL_VALUE) && stk != 0) || isVarConstant(mb,varid) ){ if (nameused){ strcat(buf + len,"="); len++; } // locate value record if (isVarConstant(mb,varid)){ val = &getVarConstant(mb, varid); showtype= getVarType(mb,varid) != TYPE_str && getVarType(mb,varid) != TYPE_bit; } else if( stk) val = &stk->stk[varid]; if ((cv = VALformat(val)) == NULL) { addMalException(mb, "renderTerm:Failed to allocate"); GDKfree(buf); return NULL; } if (len + strlen(cv) >= maxlen) { char *nbuf= GDKrealloc(buf, maxlen =len + strlen(cv) + BUFSIZ); if( nbuf == 0){ GDKfree(buf); GDKfree(cv); addMalException(mb,"renderTerm:Failed to allocate"); return NULL; } buf = nbuf; } if( strcmp(cv,"nil") == 0){ strcat(buf+len,cv); len += strlen(buf+len); GDKfree(cv); showtype = showtype || getBatType(getVarType(mb,varid)) > TYPE_str || ((isVarUDFtype(mb,varid) || isVarTypedef(mb,varid)) && isVarConstant(mb,varid)) || isaBatType(getVarType(mb,varid)); } else{ if ( !isaBatType(getVarType(mb,varid)) && getBatType(getVarType(mb,varid)) > TYPE_str ){ closequote = 1; strcat(buf+len,"\""); len++; } strcat(buf+len,cv); len += strlen(buf+len); GDKfree(cv); if( closequote ){ strcat(buf+len,"\""); len++; } showtype = showtype || closequote > TYPE_str || ((isVarUDFtype(mb,varid) || isVarTypedef(mb,varid) || (flg & (LIST_MAL_REMOTE | LIST_MAL_TYPE))) && isVarConstant(mb,varid)) || (isaBatType(getVarType(mb,varid)) && idx < p->retc); if (stk && isaBatType(getVarType(mb,varid)) && stk->stk[varid].val.bval ){ BAT *d= BBPquickdesc(stk->stk[varid].val.bval, true); if( d) len += snprintf(buf+len,maxlen-len,"[" BUNFMT "]", BATcount(d)); } } } // show the type when required or frozen by the user // special care should be taken with constants, they may have been casted if ((flg & LIST_MAL_TYPE) || (isVarUDFtype(mb, varid) && idx < p->retc) || isVarTypedef(mb,varid) || showtype){ strcat(buf + len,":"); len++; tpe = getTypeName(getVarType(mb, varid)); len += snprintf(buf+len,maxlen-len,"%s",tpe); GDKfree(tpe); } if( len >= maxlen) addMalException(mb,"renderTerm:Value representation too large"); return buf; }
str RAPIeval(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bit grouped) { sql_func * sqlfun = NULL; str exprStr = *getArgReference_str(stk, pci, pci->retc + 1); SEXP x, env, retval; SEXP varname = R_NilValue; SEXP varvalue = R_NilValue; ParseStatus status; int i = 0; char argbuf[64]; char *argnames = NULL; size_t argnameslen; size_t pos; char* rcall = NULL; size_t rcalllen; int ret_cols = 0; /* int because pci->retc is int, too*/ str *args; int evalErr; char *msg = MAL_SUCCEED; BAT *b; node * argnode; int seengrp = FALSE; rapiClient = cntxt; if (!RAPIEnabled()) { throw(MAL, "rapi.eval", "Embedded R has not been enabled. Start server with --set %s=true", rapi_enableflag); } if (!rapiInitialized) { throw(MAL, "rapi.eval", "Embedded R initialization has failed"); } if (!grouped) { sql_subfunc *sqlmorefun = (*(sql_subfunc**) getArgReference(stk, pci, pci->retc)); if (sqlmorefun) sqlfun = (*(sql_subfunc**) getArgReference(stk, pci, pci->retc))->func; } else { sqlfun = *(sql_func**) getArgReference(stk, pci, pci->retc); } args = (str*) GDKzalloc(sizeof(str) * pci->argc); if (args == NULL) { throw(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); } // get the lock even before initialization of the R interpreter, as this can take a second and must be done only once. MT_lock_set(&rapiLock); env = PROTECT(eval(lang1(install("new.env")), R_GlobalEnv)); assert(env != NULL); // first argument after the return contains the pointer to the sql_func structure // NEW macro temporarily renamed to MNEW to allow including sql_catalog.h if (sqlfun != NULL && sqlfun->ops->cnt > 0) { int carg = pci->retc + 2; argnode = sqlfun->ops->h; while (argnode) { char* argname = ((sql_arg*) argnode->data)->name; args[carg] = GDKstrdup(argname); carg++; argnode = argnode->next; } } // the first unknown argument is the group, we don't really care for the rest. argnameslen = 2; for (i = pci->retc + 2; i < pci->argc; i++) { if (args[i] == NULL) { if (!seengrp && grouped) { args[i] = GDKstrdup("aggr_group"); seengrp = TRUE; } else { snprintf(argbuf, sizeof(argbuf), "arg%i", i - pci->retc - 1); args[i] = GDKstrdup(argbuf); } } argnameslen += strlen(args[i]) + 2; /* extra for ", " */ } // install the MAL variables into the R environment // we can basically map values to int ("INTEGER") or double ("REAL") for (i = pci->retc + 2; i < pci->argc; i++) { int bat_type = getBatType(getArgType(mb,pci,i)); // check for BAT or scalar first, keep code left if (!isaBatType(getArgType(mb,pci,i))) { b = COLnew(0, getArgType(mb, pci, i), 0, TRANSIENT); if (b == NULL) { msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } if ( getArgType(mb,pci,i) == TYPE_str) { if (BUNappend(b, *getArgReference_str(stk, pci, i), false) != GDK_SUCCEED) { BBPreclaim(b); b = NULL; msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } } else { if (BUNappend(b, getArgReference(stk, pci, i), false) != GDK_SUCCEED) { BBPreclaim(b); b = NULL; msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } } } else { b = BATdescriptor(*getArgReference_bat(stk, pci, i)); if (b == NULL) { msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } } // check the BAT count, if it is bigger than RAPI_MAX_TUPLES, fail if (BATcount(b) > RAPI_MAX_TUPLES) { msg = createException(MAL, "rapi.eval", "Got "BUNFMT" rows, but can only handle "LLFMT". Sorry.", BATcount(b), (lng) RAPI_MAX_TUPLES); BBPunfix(b->batCacheid); goto wrapup; } varname = PROTECT(Rf_install(args[i])); varvalue = bat_to_sexp(b, bat_type); if (varvalue == NULL) { msg = createException(MAL, "rapi.eval", "unknown argument type "); goto wrapup; } BBPunfix(b->batCacheid); // install vector into R environment Rf_defineVar(varname, varvalue, env); UNPROTECT(2); } /* we are going to evaluate the user function within an anonymous function call: * ret <- (function(arg1){return(arg1*2)})(42) * the user code is put inside the {}, this keeps our environment clean (TM) and gives * a clear path for return values, namely using the builtin return() function * this is also compatible with PL/R */ pos = 0; argnames = malloc(argnameslen); if (argnames == NULL) { msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } argnames[0] = '\0'; for (i = pci->retc + 2; i < pci->argc; i++) { pos += snprintf(argnames + pos, argnameslen - pos, "%s%s", args[i], i < pci->argc - 1 ? ", " : ""); } rcalllen = 2 * pos + strlen(exprStr) + 100; rcall = malloc(rcalllen); if (rcall == NULL) { msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } snprintf(rcall, rcalllen, "ret <- as.data.frame((function(%s){%s})(%s), nm=NA, stringsAsFactors=F)\n", argnames, exprStr, argnames); free(argnames); argnames = NULL; #ifdef _RAPI_DEBUG_ printf("# R call %s\n",rcall); #endif x = R_ParseVector(mkString(rcall), 1, &status, R_NilValue); if (LENGTH(x) != 1 || status != PARSE_OK) { msg = createException(MAL, "rapi.eval", "Error parsing R expression '%s'. ", exprStr); goto wrapup; } retval = R_tryEval(VECTOR_ELT(x, 0), env, &evalErr); if (evalErr != FALSE) { char* errormsg = strdup(R_curErrorBuf()); size_t c; if (errormsg == NULL) { msg = createException(MAL, "rapi.eval", "Error running R expression."); goto wrapup; } // remove newlines from error message so it fits into a MAPI error (lol) for (c = 0; c < strlen(errormsg); c++) { if (errormsg[c] == '\r' || errormsg[c] == '\n') { errormsg[c] = ' '; } } msg = createException(MAL, "rapi.eval", "Error running R expression: %s", errormsg); free(errormsg); goto wrapup; } // ret should be a data frame with exactly as many columns as we need from retc ret_cols = LENGTH(retval); if (ret_cols != pci->retc) { msg = createException(MAL, "rapi.eval", "Expected result of %d columns, got %d", pci->retc, ret_cols); goto wrapup; } // collect the return values for (i = 0; i < pci->retc; i++) { SEXP ret_col = VECTOR_ELT(retval, i); int bat_type = getBatType(getArgType(mb,pci,i)); if (bat_type == TYPE_any || bat_type == TYPE_void) { getArgType(mb,pci,i) = bat_type; msg = createException(MAL, "rapi.eval", "Unknown return value, possibly projecting with no parameters."); goto wrapup; } // hand over the vector into a BAT b = sexp_to_bat(ret_col, bat_type); if (b == NULL) { msg = createException(MAL, "rapi.eval", "Failed to convert column %i", i); goto wrapup; } // bat return if (isaBatType(getArgType(mb,pci,i))) { *getArgReference_bat(stk, pci, i) = b->batCacheid; } else { // single value return, only for non-grouped aggregations BATiter li = bat_iterator(b); if (VALinit(&stk->stk[pci->argv[i]], bat_type, BUNtail(li, 0)) == NULL) { // TODO BUNtail here msg = createException(MAL, "rapi.eval", SQLSTATE(HY001) MAL_MALLOC_FAIL); goto wrapup; } } msg = MAL_SUCCEED; } /* unprotect environment, so it will be eaten by the GC. */ UNPROTECT(1); wrapup: MT_lock_unset(&rapiLock); if (argnames) free(argnames); if (rcall) free(rcall); for (i = 0; i < pci->argc; i++) GDKfree(args[i]); GDKfree(args); return msg; }
/* * The generic solution to the multiplex operators is to translate * them to a MAL loop. * The call optimizer.multiplex(MOD,FCN,A1,...An) introduces the following code * structure: * * resB:= bat.new(A1); * barrier (h,t1):= iterator.new(A1); * t2:= algebra.fetch(A2,h) * ... * cr:= MOD.FCN(t1,...,tn); * bat.append(resB,cr); * redo (h,t):= iterator.next(A1); * end h; * * The algorithm consists of two phases: phase one deals with * collecting the relevant information, phase two is the actual * code construction. */ static str OPTexpandMultiplex(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { int i = 2, iter = 0; int hvar, tvar; str mod, fcn; int *alias, *resB; InstrPtr q; int tt; int bat = (getModuleId(pci) == batmalRef) ; //if ( optimizerIsApplied(mb,"multiplex")) //return 0; (void) cntxt; (void) stk; for (i = 0; i < pci->retc; i++) { tt = getBatType(getArgType(mb, pci, i)); if (tt== TYPE_any) throw(MAL, "optimizer.multiplex", SQLSTATE(HY002) "Target tail type is missing"); if (isAnyExpression(getArgType(mb, pci, i))) throw(MAL, "optimizer.multiplex", SQLSTATE(HY002) "Target type is missing"); } mod = VALget(&getVar(mb, getArg(pci, pci->retc))->value); mod = putName(mod); fcn = VALget(&getVar(mb, getArg(pci, pci->retc+1))->value); fcn = putName(fcn); if(mod == NULL || fcn == NULL) throw(MAL, "optimizer.multiplex", SQLSTATE(HY001) MAL_MALLOC_FAIL); #ifndef NDEBUG fprintf(stderr,"#WARNING To speedup %s.%s a bulk operator implementation is needed\n#", mod,fcn); fprintInstruction(stderr, mb, stk, pci, LIST_MAL_DEBUG); #endif /* search the iterator bat */ for (i = pci->retc+2; i < pci->argc; i++) if (isaBatType(getArgType(mb, pci, i))) { iter = getArg(pci, i); break; } if( i == pci->argc) throw(MAL, "optimizer.multiplex", SQLSTATE(HY002) "Iterator BAT type is missing"); #ifdef DEBUG_OPT_MULTIPLEX { char *tpenme; fprintf(stderr,"#calling the optimize multiplex script routine\n"); fprintFunction(stderr,mb, 0, LIST_MAL_ALL ); tpenme = getTypeName(getVarType(mb,iter)); fprintf(stderr,"#multiplex against operator %d %s\n",iter, tpenme); GDKfree(tpenme); fprintInstruction(stderr,mb, 0, pci,LIST_MAL_ALL); } #endif /* * Beware, the operator constant (arg=1) is passed along as well, * because in the end we issue a recursive function call that should * find the actual arguments at the proper place of the callee. */ alias= (int*) GDKmalloc(sizeof(int) * pci->maxarg); resB = (int*) GDKmalloc(sizeof(int) * pci->retc); if (alias == NULL || resB == NULL) { GDKfree(alias); GDKfree(resB); return NULL; } /* resB := new(refBat) */ for (i = 0; i < pci->retc; i++) { q = newFcnCall(mb, batRef, newRef); resB[i] = getArg(q, 0); tt = getBatType(getArgType(mb, pci, i)); setVarType(mb, getArg(q, 0), newBatType(tt)); q = pushType(mb, q, tt); } /* barrier (h,r) := iterator.new(refBat); */ q = newFcnCall(mb, iteratorRef, newRef); q->barrier = BARRIERsymbol; hvar = newTmpVariable(mb, TYPE_any); getArg(q,0) = hvar; tvar = newTmpVariable(mb, TYPE_any); q= pushReturn(mb, q, tvar); (void) pushArgument(mb,q,iter); /* $1:= algebra.fetch(Ai,h) or constant */ for (i = pci->retc+2; i < pci->argc; i++) { if (getArg(pci, i) != iter && isaBatType(getArgType(mb, pci, i))) { q = newFcnCall(mb, algebraRef, "fetch"); alias[i] = newTmpVariable(mb, getBatType(getArgType(mb, pci, i))); getArg(q, 0) = alias[i]; q= pushArgument(mb, q, getArg(pci, i)); (void) pushArgument(mb, q, hvar); } } /* cr:= mod.CMD($1,...,$n); */ q = newFcnCall(mb, mod, fcn); for (i = 0; i < pci->retc; i++) { int nvar = 0; if (bat) { tt = getBatType(getArgType(mb, pci, i)); nvar = newTmpVariable(mb, newBatType(tt)); } else { nvar = newTmpVariable(mb, TYPE_any); } if (i) q = pushReturn(mb, q, nvar); else getArg(q, 0) = nvar; } for (i = pci->retc+2; i < pci->argc; i++) { if (getArg(pci, i) == iter) { q = pushArgument(mb, q, tvar); } else if (isaBatType(getArgType(mb, pci, i))) { q = pushArgument(mb, q, alias[i]); } else { q = pushArgument(mb, q, getArg(pci, i)); } } for (i = 0; i < pci->retc; i++) { InstrPtr a = newFcnCall(mb, batRef, appendRef); a = pushArgument(mb, a, resB[i]); (void) pushArgument(mb, a, getArg(q,i)); } /* redo (h,r):= iterator.next(refBat); */ q = newFcnCall(mb, iteratorRef, nextRef); q->barrier = REDOsymbol; getArg(q,0) = hvar; q= pushReturn(mb, q, tvar); (void) pushArgument(mb,q,iter); q = newAssignment(mb); q->barrier = EXITsymbol; getArg(q,0) = hvar; (void) pushReturn(mb, q, tvar); for (i = 0; i < pci->retc; i++) { q = newAssignment(mb); getArg(q, 0) = getArg(pci, i); (void) pushArgument(mb, q, resB[i]); } GDKfree(alias); GDKfree(resB); return MAL_SUCCEED; }
static str CMDbatBINARY2(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, BAT *(*batfunc)(BAT *, BAT *, BAT *, int, int), BAT *(batfunc1)(BAT *, const ValRecord *, BAT *, int, int), BAT *(batfunc2)(const ValRecord *, BAT *, BAT *, int, int), int (*typefunc)(int, int), int abort_on_error, const char *malfunc) { bat *bid; BAT *bn, *b, *s = NULL; int tp1, tp2, tp3; tp1 = stk->stk[getArg(pci, 1)].vtype; tp2 = stk->stk[getArg(pci, 2)].vtype; tp3 = getArgType(mb, pci, 0); assert(isaBatType(tp3)); tp3 = getBatType(tp3); if (pci->argc == 4) { bat *sid = getArgReference_bat(stk, pci, 3); if (*sid && (s = BATdescriptor(*sid)) == NULL) throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); } if (tp1 == TYPE_bat || isaBatType(tp1)) { BAT *b2 = NULL; bid = getArgReference_bat(stk, pci, 1); b = BATdescriptor(*bid); if (b == NULL) { if (s) BBPunfix(s->batCacheid); throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); } if (tp2 == TYPE_bat || isaBatType(tp2)) { bid = getArgReference_bat(stk, pci, 2); b2 = BATdescriptor(*bid); if (b2 == NULL) { BBPunfix(b->batCacheid); if (s) BBPunfix(s->batCacheid); throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); } } if (b2) { if (tp3 == TYPE_any) tp3 = (*typefunc)(b->ttype, b2->ttype); bn = (*batfunc)(b, b2, s, tp3, abort_on_error); BBPunfix(b2->batCacheid); } else { if (tp3 == TYPE_any) tp3 = (*typefunc)(b->ttype, tp2); bn = (*batfunc1)(b, &stk->stk[getArg(pci, 2)], s, tp3, abort_on_error); } } else { assert(tp1 != TYPE_bat && !isaBatType(tp1)); assert(tp2 == TYPE_bat || isaBatType(tp2)); bid = getArgReference_bat(stk, pci, 2); b = BATdescriptor(*bid); if (b == NULL) { if (s) BBPunfix(s->batCacheid); throw(MAL, malfunc, SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); } if (tp3 == TYPE_any) tp3 = (*typefunc)(tp1, b->ttype); bn = (*batfunc2)(&stk->stk[getArg(pci, 1)], b, s, tp3, abort_on_error); } BBPunfix(b->batCacheid); if (bn == NULL) { return mythrow(MAL, malfunc, OPERATION_FAILED); } bid = getArgReference_bat(stk, pci, 0); BBPkeepref(*bid = bn->batCacheid); return MAL_SUCCEED; }
str SQLwindow_bound(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { str msg = MAL_SUCCEED; bool preceding; lng first_half; int unit, bound, excl, part_offset = (pci->argc > 6); if ((pci->argc != 6 && pci->argc != 7) || getArgType(mb, pci, part_offset + 2) != TYPE_int || getArgType(mb, pci, part_offset + 3) != TYPE_int || getArgType(mb, pci, part_offset + 4) != TYPE_int) { throw(SQL, "sql.window_bound", SQLSTATE(42000) "Invalid arguments"); } unit = *getArgReference_int(stk, pci, part_offset + 2); bound = *getArgReference_int(stk, pci, part_offset + 3); excl = *getArgReference_int(stk, pci, part_offset + 4); assert(unit >= 0 && unit <= 3); assert(bound >= 0 && bound <= 5); assert(excl >= 0 && excl <= 2); preceding = (bound % 2 == 0); first_half = (bound < 2 || bound == 4); (void)cntxt; if (isaBatType(getArgType(mb, pci, 1))) { bat *res = getArgReference_bat(stk, pci, 0); BAT *b = BATdescriptor(*getArgReference_bat(stk, pci, part_offset + 1)), *p = NULL, *r, *l = NULL; int tp1 = getBatType(getArgType(mb, pci, part_offset + 1)), tp2 = getArgType(mb, pci, part_offset + 5); void* limit = NULL; bool is_negative = false, is_null = false, is_a_bat; gdk_return gdk_code; if (!b) throw(SQL, "sql.window_bound", SQLSTATE(HY005) "Cannot access column descriptor"); if (excl != 0) { BBPunfix(b->batCacheid); throw(SQL, "sql.window_bound", SQLSTATE(42000) "Only EXCLUDE NO OTHERS exclusion is currently implemented"); } is_a_bat = isaBatType(tp2); if(is_a_bat) tp2 = getBatType(tp2); voidresultBAT(r, TYPE_lng, BATcount(b), b, "sql.window_bound"); if(is_a_bat) { //SQL_CURRENT_ROW shall never fall in limit validation l = BATdescriptor(*getArgReference_bat(stk, pci, part_offset + 5)); if (!l) { BBPunfix(b->batCacheid); throw(SQL, "sql.window_bound", SQLSTATE(HY005) "Cannot access column descriptor"); } switch (tp2) { case TYPE_bte: CHECK_NULLS_AND_NEGATIVES_COLUMN(bte) break; case TYPE_sht: CHECK_NULLS_AND_NEGATIVES_COLUMN(sht) break; case TYPE_int: CHECK_NULLS_AND_NEGATIVES_COLUMN(int) break; case TYPE_lng: CHECK_NULLS_AND_NEGATIVES_COLUMN(lng) break; case TYPE_flt: CHECK_NULLS_AND_NEGATIVES_COLUMN(flt) break; case TYPE_dbl: CHECK_NULLS_AND_NEGATIVES_COLUMN(dbl) break; #ifdef HAVE_HGE case TYPE_hge: CHECK_NULLS_AND_NEGATIVES_COLUMN(hge) break; #endif default: { BBPunfix(b->batCacheid); BBPunfix(l->batCacheid); throw(SQL, "sql.window_bound", SQLSTATE(42000) "%s limit not available for %s", "sql.window_bound", ATOMname(tp2)); } } if(is_null || is_negative) { BBPunfix(b->batCacheid); BBPunfix(l->batCacheid); if(is_null) throw(SQL, "sql.window_bound", SQLSTATE(HY005) "All values on %s boundary must be non-null", preceding ? "PRECEDING" : "FOLLOWING"); throw(SQL, "sql.window_bound", SQLSTATE(HY005) "All values on %s boundary must be non-negative", preceding ? "PRECEDING" : "FOLLOWING"); } } else {