// 与えられた入力ベクトルを用いて、inputとhiddenとoutputを更新する void nn_compute_with_ts(nn_sys_t *nnins, int index) { int i, j; // 入力層が隠れ層に出力した値を覚えておく。 for (i = 0; i < nnins->input_n; i++) nnins->input[i] = getts(nnins, index, i); // 入力層からの出力を元に隠れ層の出力を計算する。 for (i = 0; i < nnins->hidden_n; i++) { float sum = 0.0; for (j = 0; j < nnins->input_n; j++) sum += nnins->ih_w[i][j] * nnins->input[j]; sum = sigmoid(sum); // 隠れ層が出力層に出力した値を覚えておく。 nnins->hidden[i] = sum; } // 隠れ層からの出力を元に出力層の出力を計算する。 for (i = 0; i < nnins->output_n; i++) { float sum = 0.0; for (j = 0; j < nnins->hidden_n; j++) sum += nnins->ho_w[i][j] * nnins->hidden[j]; sum = sigmoid(sum); // 出力層に現れた値を覚えておく。 nnins->output[i] = sum; } }
static int l_addhistory(lua_State *L) { TelnetState* ts = getts(L); // history is only available in edit mode if (ts->editmode != TELNET_EDIT_MODE) return 0; size_t len; const char* buf = luaL_checklstring(L, 2, &len); teel_addhistory(ts->teel, buf, len); return 0; }
static int l_showprompt(lua_State *L) { TelnetState* ts = getts(L); size_t len; const char* p = luaL_checklstring(L, 2, &len); if (ts->editmode == TELNET_EDIT_MODE) teel_showprompt(ts->teel, p, len); else WriteBytes(&ts->out, p, len); return 0; }
static int l_close(lua_State *L) { TelnetState* ts = getts(L); luaL_unref(L, LUA_REGISTRYINDEX, ts->autocompletefuncref); // erase the entry for that user data in the registry if (ts->teel) { teel_destroy(ts->teel); ts->teel = 0; } FreeBuffer(&ts->linemodebuffer); FreeBuffer(&ts->out); ReadBuffer* in = &ts->in; MEM_FREE(in->buffer); in->buffer = 0; return 0; }
static int l_output(lua_State *L) { TelnetState* ts = getts(L); return output(ts); }
// call: interpret(telnetstate, readbytes) // return: boolean: again, string: towritebytes, string: command, [string: arg] static int l_interpret(lua_State *L) { TelnetState* ts = getts(L); size_t inputlen = 0; const char* inputbuffer = 0; if (lua_type(L, 2) == LUA_TSTRING) inputbuffer = luaL_checklstring(L, 2, &inputlen); ReadBuffer* in = &ts->in; if (in->len && inputbuffer) // make the two buffers (old + new data) only one { int len = in->len + inputlen; char* buf = MEM_ALLOC(len); memcpy(buf, in->p, in->len); memcpy(buf+in->len, inputbuffer, inputlen); MEM_FREE(in->buffer); in->p = in->mark = in->buffer = buf; in->len = len; } else if (inputbuffer) // we only have new data { in->buffer = 0; // this buffer is not allocated in->p = in->mark = (char*) inputbuffer; in->len = inputlen; } InterpretCommand cmd; if (ts->editmode == TELNET_EDIT_MODE) cmd = processeditmode(ts); else cmd = processlinemode(ts); if (cmd != IC_EOS) ReadMark(in); else cmd = IC_NOP; // EOS command is equivalent to a NOP for the caller // Again ? if (in->len) // Only need to be called again if we did not exhaust the input buffers lua_pushboolean(L, 1); else lua_pushboolean(L, 0); // Store the unprocessed bytes, if any and if they are not already stored if (in->mark < in->p && !in->buffer) { int len = in->p-in->mark+in->len; in->buffer = MEM_ALLOC(len); memcpy(in->buffer, in->mark, len); in->p = in->mark = in->buffer; in->len = len; } // free the input buffer if it was allocated and is empty else if (in->mark == in->p && !in->len && in->buffer) { MEM_FREE(in->buffer); in->buffer = in->p = in->mark = 0; in->len = 0; } // Byte to send if (ts->out.buffer) { lua_pushlstring(L, ts->out.buffer, ts->out.len); FreeBuffer(&ts->out); } else lua_pushnil(L); // Command assert(cmd < IC_NB_OF_CMD); lua_pushstring(L, ic_names[cmd]); // push command // Optional argument int opt = 0; if (cmd == IC_LINE) { opt++; if (ts->editmode == TELNET_EDIT_MODE) { char* line; int len; teel_getline(ts->teel, &line, &len, 1); lua_pushlstring(L, line, len); MEM_FREE(line); } else // TELNET_LINE_MODE { lua_pushlstring(L, ts->linemodebuffer.buffer, ts->linemodebuffer.len); FreeBuffer(&ts->linemodebuffer); } } return 3 + opt; }
void dt_interpolation_resample( const struct dt_interpolation* itor, float *out, const dt_iop_roi_t* const roi_out, const int32_t out_stride, const float* const in, const dt_iop_roi_t* const roi_in, const int32_t in_stride) { int* hindex = NULL; int* hlength = NULL; float* hkernel = NULL; int* vindex = NULL; int* vlength = NULL; float* vkernel = NULL; int* vmeta = NULL; int r; debug_info( "resampling %p (%dx%d@%dx%d scale %f) -> %p (%dx%d@%dx%d scale %f)\n", in, roi_in->width, roi_in->height, roi_in->x, roi_in->y, roi_in->scale, out, roi_out->width, roi_out->height, roi_out->x, roi_out->y, roi_out->scale); // Fast code path for 1:1 copy, only cropping area can change if (roi_out->scale == 1.f) { const int x0 = roi_out->x*4*sizeof(float); const int l = roi_out->width*4*sizeof(float); #if DEBUG_RESAMPLING_TIMING int64_t ts_resampling = getts(); #endif #ifdef _OPENMP #pragma omp parallel for default(none) shared(out) #endif for (int y=0; y<roi_out->height; y++) { float* i = (float*)((char*)in + in_stride*(y + roi_out->y) + x0); float* o = (float*)((char*)out + out_stride*y); memcpy(o, i, l); } #if DEBUG_RESAMPLING_TIMING ts_resampling = getts() - ts_resampling; fprintf(stderr, "resampling %p plan:0us resampling:%"PRId64"us\n", in, ts_resampling); #endif // All done, so easy case return; } // Generic non 1:1 case... much more complicated :D #if DEBUG_RESAMPLING_TIMING int64_t ts_plan = getts(); #endif // Prepare resampling plans once and for all r = prepare_resampling_plan(itor, roi_in->width, roi_in->x, roi_out->width, roi_out->x, roi_out->scale, &hlength, &hkernel, &hindex, NULL); if (r) { goto exit; } r = prepare_resampling_plan(itor, roi_in->height, roi_in->y, roi_out->height, roi_out->y, roi_out->scale, &vlength, &vkernel, &vindex, &vmeta); if (r) { goto exit; } #if DEBUG_RESAMPLING_TIMING ts_plan = getts() - ts_plan; #endif #if DEBUG_RESAMPLING_TIMING int64_t ts_resampling = getts(); #endif // Process each output line #ifdef _OPENMP #pragma omp parallel for default(none) shared(out, hindex, hlength, hkernel, vindex, vlength, vkernel, vmeta) #endif for (int oy=0; oy<roi_out->height; oy++) { // Initialize column resampling indexes int vlidx = vmeta[3*oy + 0]; // V(ertical) L(ength) I(n)d(e)x int vkidx = vmeta[3*oy + 1]; // V(ertical) K(ernel) I(n)d(e)x int viidx = vmeta[3*oy + 2]; // V(ertical) I(ndex) I(n)d(e)x // Initialize row resampling indexes int hlidx = 0; // H(orizontal) L(ength) I(n)d(e)x int hkidx = 0; // H(orizontal) K(ernel) I(n)d(e)x int hiidx = 0; // H(orizontal) I(ndex) I(n)d(e)x // Number of lines contributing to the output line int vl = vlength[vlidx++]; // V(ertical) L(ength) // Process each output column for (int ox=0; ox < roi_out->width; ox++) { debug_extra("output %p [% 4d % 4d]\n", out, ox, oy); // This will hold the resulting pixel __m128 vs = _mm_setzero_ps(); // Number of horizontal samples contributing to the output int hl = hlength[hlidx++]; // H(orizontal) L(ength) for (int iy=0; iy < vl; iy++) { // This is our input line const float* i = (float*)((char*)in + in_stride*vindex[viidx++]); __m128 vhs = _mm_setzero_ps(); for (int ix=0; ix< hl; ix++) { // Apply the precomputed filter kernel int baseidx = hindex[hiidx++]*4; float htap = hkernel[hkidx++]; __m128 vhtap = _mm_set_ps1(htap); vhs = _mm_add_ps(vhs, _mm_mul_ps(*(__m128*)&i[baseidx], vhtap)); } // Accumulate contribution from this line float vtap = vkernel[vkidx++]; __m128 vvtap = _mm_set_ps1(vtap); vs = _mm_add_ps(vs, _mm_mul_ps(vhs, vvtap)); // Reset horizontal resampling context hkidx -= hl; hiidx -= hl; } // Output pixel is ready float* o = (float*)((char*)out + oy*out_stride + ox*4*sizeof(float)); _mm_stream_ps(o, vs); // Reset vertical resampling context viidx -= vl; vkidx -= vl; // Progress in horizontal context hiidx += hl; hkidx += hl; } // Progress in vertical context viidx += vl; vkidx += vl; } _mm_sfence(); #if DEBUG_RESAMPLING_TIMING ts_resampling = getts() - ts_resampling; fprintf(stderr, "resampling %p plan:%"PRId64"us resampling:%"PRId64"us\n", in, ts_plan, ts_resampling); #endif exit: /* Free the resampling plans. It's nasty to optimize allocs like that, but * it simplifies the code :-D. The length array is in fact the only memory * allocated. */ free(hlength); free(vlength); }