inline T join( const T& x, const T& y ) { ZI_ASSERT( x < size_ && x >= 0 ); ZI_ASSERT( y < size_ && y >= 0 ); if ( x == y ) { return x; } --sets_; if ( r_[ x ] >= r_[ y ] ) { p_[ y ] = x; if ( r_[ x ] == r_[ y ] ) { ++r_[ x ]; } return x; } p_[ x ] = y; return y; }
inline bool write_tensor( std::string const & fname, std::vector<cube_p<T>> vols ) { ZI_ASSERT(vols.size()>0); FILE* fvol = fopen(fname.c_str(), "w"); STRONG_ASSERT(fvol); F v; vec3i const & sz = size(*vols[0]); for ( auto& vol: vols ) { ZI_ASSERT(size(*vol)==sz); for ( long_t z = 0; z < sz[0]; ++z ) for ( long_t y = 0; y < sz[1]; ++y ) for ( long_t x = 0; x < sz[2]; ++x ) { v = static_cast<T>((*vol)[z][y][x]); static_cast<void>(fwrite(&v, sizeof(F), 1, fvol)); } } fclose(fvol); return export_size_info(fname, sz, vols.size()); }
condition_variable(): spinlock_(), semaphore_( win32::CreateSemaphore( NULL, 0, 0x7FFFFFFF, NULL ) ), last_event_( win32::CreateSemaphore( NULL, 0, 0x7FFFFFFF, NULL ) ), broadcasting_( false ), waiters_( 0 ) { ZI_ASSERT( semaphore_ ); ZI_ASSERT( last_event_ ); }
void backward( ccube_p<real> const & g ) { ZI_ASSERT(indices); ZI_ASSERT(insize==size(*g)+(filter_size-vec3i::one)*filter_stride); if ( in_nodes->is_input() ) { in_nodes->backward(in_num, cube_p<real>()); } else { in_nodes->backward(in_num, pooling_backprop(insize, *g, *indices)); } }
inline void convolve_sparse_inverse_add( cube<T> const & a, cube<T> const & b, vec3i const & s, cube<T> & r ) noexcept { if ( s == vec3i::one ) { convolve_inverse_add(a,b,r); return; } size_t ax = a.shape()[0]; size_t ay = a.shape()[1]; size_t az = a.shape()[2]; size_t bx = b.shape()[0]; size_t by = b.shape()[1]; size_t bz = b.shape()[2]; # ifndef NDEBUG size_t rbx = (bx-1) * s[0] + 1; size_t rby = (by-1) * s[1] + 1; size_t rbz = (bz-1) * s[2] + 1; size_t rx = ax + rbx - 1; size_t ry = ay + rby - 1; size_t rz = az + rbz - 1; ZI_ASSERT(r.shape()[0]==rx); ZI_ASSERT(r.shape()[1]==ry); ZI_ASSERT(r.shape()[2]==rz); # endif for ( size_t wx = 0; wx < bx; ++wx ) for ( size_t wy = 0; wy < by; ++wy ) for ( size_t wz = 0; wz < bz; ++wz ) { size_t fx = bx - 1 - wx; size_t fy = by - 1 - wy; size_t fz = bz - 1 - wz; size_t ox = fx * s[0]; size_t oy = fy * s[1]; size_t oz = fz * s[2]; for ( size_t x = 0; x < ax; ++x ) for ( size_t y = 0; y < ay; ++y ) for ( size_t z = 0; z < az; ++z ) r[x+ox][y+oy][z+oz] += a[x][y][z] * b[wx][wy][wz]; } }
inline std::vector<cube_p<real>> constrain_affinity( std::vector<cube_p<real>> const & true_affs, std::vector<cube_p<real>> const & affs, zalis_phase phase, real threshold = 0.5 ) { ZI_ASSERT(true_affs.size()==affs.size()); ZI_ASSERT(phase!=zalis_phase::BOTH); std::vector<cube_p<real>> constrained_affs; for ( size_t i = 0; i < true_affs.size(); ++i ) { cube<real> const & taff = *true_affs[i]; vec3i s = size(taff); constrained_affs.push_back(get_cube<real>(s)); cube<real>& aff = *constrained_affs.back(); aff = *affs[i]; ZI_ASSERT(size(taff)==size(aff)); for ( size_t z = 0; z < s[0]; ++z ) for ( size_t y = 0; y < s[1]; ++y ) for ( size_t x = 0; x < s[2]; ++x ) { // constrain merger to boundary if ( phase == zalis_phase::MERGER ) { if ( taff[z][y][x] > threshold ) { aff[z][y][x] = taff[z][y][x]; } } // constrain splitter to non-boundary if ( phase == zalis_phase::SPLITTER ) { if ( taff[z][y][x] < threshold ) { aff[z][y][x] = taff[z][y][x]; } } } } return constrained_affs; }
inline edges::edges( nodes * in, nodes * out, options const & opts, vec3i const & in_size, task_manager & tm, real_pooling_tag ) : options_(opts) , size_(in_size) , tm_(tm) { ZI_ASSERT(in->num_out_nodes()==out->num_in_nodes()); size_t n = in->num_out_nodes(); edges_.resize(n); waiter_.set(n); auto sz = opts.require_as<ovec3i>("size"); for ( size_t i = 0; i < n; ++i ) { edges_[i] = std::make_unique<real_pooling_edge> (in, i, out, i, tm_, sz); } }
void do_update( ccube_p<complex> const & g ) { ZI_ASSERT(enabled_); auto dEdW_fft = *last_input * *g; auto dEdW = fftw_.backward(std::move(dEdW_fft)); real norm = dEdW->num_elements(); if ( fftw_.size() != fftw_.actual_size() ) { dEdW = crop_left(*dEdW, fftw_.size()); } flip(*dEdW); // TODO(zlateski): WTH was happening with sparse_implode before // when I had to use sparse_implode_slow // ony happened on my laptop dEdW = sparse_implode_slow(*dEdW, filter_stride, size(filter_.W())); *dEdW /= norm; filter_.update(*dEdW, patch_sz_); #ifndef ZNN_DONT_CACHE_FFTS initialize(); #endif }
// dropout inline edges::edges( nodes * in, nodes * out, options const & opts, vec3i const & in_size, task_manager & tm, phase phs, dropout_tag ) : options_(opts) , size_(in_size) , tm_(tm) { ZI_ASSERT(in->num_out_nodes()==out->num_in_nodes()); size_t n = in->num_out_nodes(); edges_.resize(n); waiter_.set(n); auto ratio = opts.optional_as<real>("ratio", 0.5); for ( size_t i = 0; i < n; ++i ) { edges_[i] = std::make_unique<dropout_edge> (in, i, out, i, tm_, ratio, phs); } }
maxout_nodes( size_t s, vec3i const & fsize, options const & op, task_manager & tm, size_t fwd_p, size_t bwd_p, bool is_out ) : nodes(s,fsize,op,tm,fwd_p,bwd_p,false,is_out) , fwd_dispatch_(s) , bwd_dispatch_(s) , fwd_accumulators_(s) , bwd_accumulators_(s) , fs_(s) , is_(s) , fwd_done_(s) , waiter_(s) { for ( size_t i = 0; i < nodes::size(); ++i ) { fwd_accumulators_[i] = std::make_unique<max_accumulator>(); bwd_accumulators_[i] = std::make_unique<backward_accumulator>(fsize); } auto type = op.require_as<std::string>("type"); ZI_ASSERT(type=="maxout"); }
void backward(size_t n, size_t b, cube_p<complex>&& g) override { ZI_ASSERT((n<nodes::size())&&(!nodes::is_output())); if ( bwd_accumulators_[n]->add(b,std::move(g)) ) { do_backward(n,bwd_accumulators_[n]->reset()); } }
void forward(size_t n, size_t b, cube_p<complex>&& f) override { ZI_ASSERT(n<nodes::size()); if ( fwd_accumulators_[n]->add(b,std::move(f)) ) { do_forward(n); } }
void forward(size_t n, cube_p<real>&& f, int idx) { ZI_ASSERT(n<nodes::size()); if ( fwd_accumulators_[n]->add(std::move(f),idx) ) { do_forward(n); } }
void backward(size_t n, ccube_p<real> const & g, ccube_p<real> const & w, vec3i const & stride) override { ZI_ASSERT((n<nodes::size())&&(!nodes::is_output())); if ( bwd_accumulators_[n]->add(g,w,stride) ) { do_backward(n,bwd_accumulators_[n]->reset()); } }
inline void pairwise_div( cube<T>& a, const cube<T>& b ) { ZI_ASSERT(a.n_elem==b.n_elem); T* ap = a.memptr(); const T* bp = b.memptr(); for ( size_t i = 0; i < a.n_elem; ++i ) ap[i] /= bp[i]; }
inline void convolve_flipped( cube<T> const & a, cube<T> const & b, cube<T> const & r ) noexcept { ZI_ASSERT(size(r)==(vec3i::one+size(a)-size(b))); auto tmp = get_copy(a); flip(*tmp); return convolve(*tmp,b,r); }
void wait() { std::unique_lock<std::mutex> g(mutex_); while ( current_ < required_ ) { cv_.wait(g); } ZI_ASSERT(current_==required_); current_ = 0; }
void forward( ccube_p<real> const & f ) override { ZI_ASSERT(size(*f)==insize); auto r = pooling_filter(get_copy(*f), [](real a, real b){ return a>b; }, filter_size, filter_stride); indices = r.second; out_nodes->forward(out_num,std::move(r.first)); }
void backward(size_t n, size_t b, ccube_p<complex> const & g, ccube_p<complex> const & w) override { ZI_ASSERT((n<nodes::size())&&(!nodes::is_output())); if ( bwd_accumulators_[n]->add(b,g,w) ) { do_backward(n,bwd_accumulators_[n]->reset()); } }
void forward(size_t n, size_t b, ccube_p<complex> const & f, ccube_p<complex> const & w ) override { ZI_ASSERT(n<nodes::size()); if ( fwd_accumulators_[n]->add(b,f,w) ) { do_forward(n); } }
cube_p<real> reset() { ZI_ASSERT(current_==required_); cube_p<real> r = fftw_->backward(std::move(sum_)); sum_.reset(); current_ = 0; return r; }
void backward( ccube_p<real> const & g ) { guard gg(m); ZI_ASSERT(last_input); in_nodes->backward(in_num, convolve_sparse_inverse(*g, filter_.W(), filter_stride)); pending_ = manager.schedule_unprivileged(&filter_ds_edge::do_update, this, g); }
void forward(size_t n, ccube_p<real> const & f, ccube_p<real> const & w, vec3i const & stride) override { ZI_ASSERT(n<nodes::size()); if ( fwd_accumulators_[n]->add(f,w,stride) ) { do_forward(n); } }
inline void convolve_constant( cube<T> const & a, identity_t<T> b, cube<T> & r) noexcept { ZI_ASSERT(size(a)==size(r)); T const * ap = a.data(); T * rp = r.data(); for ( size_t i = 0; i < r.num_elements(); ++i ) rp[i] = ap[i] * b; }
void forward( ccube_p<real> const & f ) override { ZI_ASSERT(size(*f)==insize); auto fmap = get_copy(*f); if ( phase_ == phase::TRAIN ) { dropout_forward(*fmap); } out_nodes->forward(out_num, std::move(fmap)); }
void forward( cube<real>& in, cube<complex>& out ) { ZI_ASSERT(size(out)==fft_complex_size(in)); ZI_ASSERT(size(in)==sz); fft_plan plan = fft_plans.get_forward( vec3i(in.shape()[0],in.shape()[1],in.shape()[2])); MKL_LONG status; # ifdef MEASURE_FFT_RUNTIME zi::wall_timer wt; # endif status = DftiComputeForward(*plan, reinterpret_cast<real*>(in.data()), reinterpret_cast<real*>(out.data())); # ifdef MEASURE_FFT_RUNTIME fft_stats.add(wt.elapsed<real>()); # endif }
void forward( ccube_p<real> const & f ) override { ZI_ASSERT(size(*f)==insize); auto r = pooling_filter(get_copy(*f), [](real a, real b){ return a>b; }, filter_size, vec3i::one); indices = sparse_implode_slow(*r.second,filter_size,outsize); out_nodes->forward(out_num, sparse_implode_slow(*r.first,filter_size,outsize)); }
inline void convolve_sparse_add( cube<T> const & a, cube<T> const & b, vec3i const & s, cube<T> & r ) noexcept { if ( s == vec3i::one ) { convolve_add(a,b,r); return; } size_t ax = a.shape()[0]; size_t ay = a.shape()[1]; size_t az = a.shape()[2]; size_t bx = b.shape()[0]; size_t by = b.shape()[1]; size_t bz = b.shape()[2]; size_t rbx = (bx-1) * s[0] + 1; size_t rby = (by-1) * s[1] + 1; size_t rbz = (bz-1) * s[2] + 1; size_t rx = ax - rbx + 1; size_t ry = ay - rby + 1; size_t rz = az - rbz + 1; ZI_ASSERT(r.shape()[0]==rx); ZI_ASSERT(r.shape()[1]==ry); ZI_ASSERT(r.shape()[2]==rz); for ( size_t x = 0; x < rx; ++x ) for ( size_t y = 0; y < ry; ++y ) for ( size_t z = 0; z < rz; ++z ) for ( size_t dx = x, wx = bx-1; dx < rbx + x; dx += s[0], --wx ) for ( size_t dy = y, wy = by-1; dy < rby + y; dy += s[1], --wy ) for ( size_t dz = z, wz = bz-1; dz < rbz + z; dz += s[2], --wz ) r[x][y][z] += a[dx][dy][dz] * b[wx][wy][wz]; }
static void backward( cube<complex>& in, cube<real>& out ) { ZI_ASSERT(in.shape()[0]==out.shape()[0]); ZI_ASSERT(in.shape()[1]==out.shape()[1]); ZI_ASSERT((out.shape()[2]/2+1)==in.shape()[2]); fft_plan plan = fft_plans.get_backward( vec3i(out.shape()[0],out.shape()[1],out.shape()[2])); MKL_LONG status; # ifdef MEASURE_FFT_RUNTIME zi::wall_timer wt; # endif status = DftiComputeBackward(*plan, reinterpret_cast<real*>(in.data()), reinterpret_cast<real*>(out.data())); # ifdef MEASURE_FFT_RUNTIME fft_stats.add(wt.elapsed<real>()); # endif }
void init( T s ) { ZI_ASSERT( s >= 0 ); p_ = reinterpret_cast< T* >( malloc( s * sizeof( T ) )); r_ = reinterpret_cast< uint8_t* >( malloc( s * sizeof( uint8_t ) )); for ( T i = 0; i < s; ++i ) { p_[ i ] = i; r_[ i ] = 0; } size_ = sets_ = s; }