void CTwoStateModel::reshape_emission_params(SGVector< float64_t >& emission_weights, SGVector< float64_t > w, int32_t num_feats, int32_t num_obs) { emission_weights.zero(); // Legend for state indices: // 0 -> start state // 1 -> stop state // 2 -> negative state (label == 0) // 3 -> positive state (label == 1) // // start and stop states have no emission scores index_t em_idx, w_idx = m_num_transmission_params; for ( int32_t s = 2 ; s < m_num_states ; ++s ) { for ( int32_t f = 0 ; f < num_feats ; ++f ) { for ( int32_t o = 0 ; o < num_obs ; ++o ) { em_idx = s*num_feats*num_obs + f*num_obs + o; emission_weights[em_idx] = w[w_idx++]; } } } }
void CLinearTimeMMD::compute_statistic_and_variance( SGVector<float64_t>& statistic, SGVector<float64_t>& variance, bool multiple_kernels) { SG_DEBUG("entering %s::compute_statistic_and_variance()\n", get_name()) REQUIRE(m_streaming_p, "%s::compute_statistic_and_variance: streaming " "features p required!\n", get_name()); REQUIRE(m_streaming_q, "%s::compute_statistic_and_variance: streaming " "features q required!\n", get_name()); REQUIRE(m_kernel, "%s::compute_statistic_and_variance: kernel needed!\n", get_name()); /* make sure multiple_kernels flag is used only with a combined kernel */ REQUIRE(!multiple_kernels || m_kernel->get_kernel_type()==K_COMBINED, "%s::compute_statistic_and_variance: multiple kernels specified," "but underlying kernel is not of type K_COMBINED\n", get_name()); /* m is number of samples from each distribution, m_2 is half of it * using names from JLMR paper (see class documentation) */ index_t m_2=m_m/2; SG_DEBUG("m_m=%d\n", m_m) /* find out whether single or multiple kernels (cast is safe, check above) */ index_t num_kernels=1; if (multiple_kernels) { num_kernels=((CCombinedKernel*)m_kernel)->get_num_subkernels(); SG_DEBUG("computing MMD and variance for %d sub-kernels\n", num_kernels); } /* allocate memory for results if vectors are empty */ if (!statistic.vector) statistic=SGVector<float64_t>(num_kernels); if (!variance.vector) variance=SGVector<float64_t>(num_kernels); /* ensure right dimensions */ REQUIRE(statistic.vlen==num_kernels, "%s::compute_statistic_and_variance: " "statistic vector size (%d) does not match number of kernels (%d)\n", get_name(), statistic.vlen, num_kernels); REQUIRE(variance.vlen==num_kernels, "%s::compute_statistic_and_variance: " "variance vector size (%d) does not match number of kernels (%d)\n", get_name(), variance.vlen, num_kernels); /* temp variable in the algorithm */ float64_t current; float64_t delta; /* initialise statistic and variance since they are cumulative */ statistic.zero(); variance.zero(); /* needed for online mean and variance */ SGVector<index_t> term_counters(num_kernels); term_counters.set_const(1); /* term counter to compute online mean and variance */ index_t num_examples_processed=0; while (num_examples_processed<m_2) { /* number of example to look at in this iteration */ index_t num_this_run=CMath::min(m_blocksize, CMath::max(0, m_2-num_examples_processed)); SG_DEBUG("processing %d more examples. %d so far processed. Blocksize " "is %d\n", num_this_run, num_examples_processed, m_blocksize); /* stream data from both distributions */ CFeatures* p1=m_streaming_p->get_streamed_features(num_this_run); CFeatures* p2=m_streaming_p->get_streamed_features(num_this_run); CFeatures* q1=m_streaming_q->get_streamed_features(num_this_run); CFeatures* q2=m_streaming_q->get_streamed_features(num_this_run); /* check whether h0 should be simulated and permute if so */ if (m_simulate_h0) { /* create merged copy of all feature instances to permute */ CList* list=new CList(); list->append_element(p2); list->append_element(q1); list->append_element(q2); CFeatures* merged=p1->create_merged_copy(list); SG_UNREF(list); /* permute */ SGVector<index_t> inds(merged->get_num_vectors()); inds.range_fill(); inds.permute(); merged->add_subset(inds); /* copy back, replacing old features */ SG_UNREF(p1); SG_UNREF(p2); SG_UNREF(q1); SG_UNREF(q2); SGVector<index_t> copy(num_this_run); copy.range_fill(); p1=merged->copy_subset(copy); copy.add(num_this_run); p2=merged->copy_subset(copy); copy.add(num_this_run); q1=merged->copy_subset(copy); copy.add(num_this_run); q2=merged->copy_subset(copy); /* clean up and note that copy_subset does a SG_REF */ SG_UNREF(merged); } else { /* reference produced features (only if copy_subset was not used) */ SG_REF(p1); SG_REF(p2); SG_REF(q1); SG_REF(q2); } /* if multiple kernels are used, compute all of them on streamed data, * if multiple kernels flag is false, the above loop will be executed * only once */ CKernel* kernel=m_kernel; if (multiple_kernels) { SG_DEBUG("using multiple kernels\n"); } /* iterate through all kernels for this data */ for (index_t i=0; i<num_kernels; ++i) { /* if multiple kernels should be computed, set next kernel */ if (multiple_kernels) { kernel=((CCombinedKernel*)m_kernel)->get_kernel(i); } /* compute kernel matrix diagonals */ kernel->init(p1, p2); SGVector<float64_t> pp=kernel->get_kernel_diagonal(); kernel->init(q1, q2); SGVector<float64_t> qq=kernel->get_kernel_diagonal(); kernel->init(p1, q2); SGVector<float64_t> pq=kernel->get_kernel_diagonal(); kernel->init(q1, p2); SGVector<float64_t> qp=kernel->get_kernel_diagonal(); /* single variances for all kernels. Update mean and variance * using Knuth's online variance algorithm. * C.f. for example Wikipedia */ for (index_t j=0; j<num_this_run; ++j) { /* compute sum of current h terms for current kernel */ current=pp[j]+qq[j]-pq[j]-qp[j]; /* D. Knuth's online variance algorithm for current kernel */ delta=current-statistic[i]; statistic[i]+=delta/term_counters[i]++; variance[i]+=delta*(current-statistic[i]); SG_DEBUG("burst: current=%f, delta=%f, statistic=%f, " "variance=%f, kernel_idx=%d\n", current, delta, statistic[i], variance[i], i); } if (multiple_kernels) { SG_UNREF(kernel); } } /* clean up streamed data */ SG_UNREF(p1); SG_UNREF(p2); SG_UNREF(q1); SG_UNREF(q2); /* add number of processed examples for this run */ num_examples_processed+=num_this_run; } SG_DEBUG("Done compouting statistic, processed 2*%d examples.\n", num_examples_processed); /* mean of sum all traces is linear time mmd, copy entries for all kernels */ if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG) statistic.display_vector("statistics"); /* variance of terms can be computed using mean (statistic). * Note that the variance needs to be divided by m_2 in order to get * variance of null-distribution */ for (index_t i=0; i<num_kernels; ++i) variance[i]=variance[i]/(m_2-1)/m_2; if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG) variance.display_vector("variances"); SG_DEBUG("leaving %s::compute_statistic_and_variance()\n", get_name()) }
void CLinearTimeMMD::compute_statistic_and_Q( SGVector<float64_t>& statistic, SGMatrix<float64_t>& Q) { SG_DEBUG("entering %s::compute_statistic_and_Q()\n", get_name()) REQUIRE(m_streaming_p, "%s::compute_statistic_and_Q: streaming " "features p required!\n", get_name()); REQUIRE(m_streaming_q, "%s::compute_statistic_and_Q: streaming " "features q required!\n", get_name()); REQUIRE(m_kernel, "%s::compute_statistic_and_Q: kernel needed!\n", get_name()); /* make sure multiple_kernels flag is used only with a combined kernel */ REQUIRE(m_kernel->get_kernel_type()==K_COMBINED, "%s::compute_statistic_and_Q: underlying kernel is not of " "type K_COMBINED\n", get_name()); /* cast combined kernel */ CCombinedKernel* combined=(CCombinedKernel*)m_kernel; /* m is number of samples from each distribution, m_4 is quarter of it */ REQUIRE(m_m>=4, "%s::compute_statistic_and_Q: Need at least m>=4\n", get_name()); index_t m_4=m_m/4; SG_DEBUG("m_m=%d\n", m_m) /* find out whether single or multiple kernels (cast is safe, check above) */ index_t num_kernels=combined->get_num_subkernels(); REQUIRE(num_kernels>0, "%s::compute_statistic_and_Q: At least one kernel " "is needed\n", get_name()); /* allocate memory for results if vectors are empty */ if (!statistic.vector) statistic=SGVector<float64_t>(num_kernels); if (!Q.matrix) Q=SGMatrix<float64_t>(num_kernels, num_kernels); /* ensure right dimensions */ REQUIRE(statistic.vlen==num_kernels, "%s::compute_statistic_and_variance: " "statistic vector size (%d) does not match number of kernels (%d)\n", get_name(), statistic.vlen, num_kernels); REQUIRE(Q.num_rows==num_kernels, "%s::compute_statistic_and_variance: " "Q number of rows does (%d) not match number of kernels (%d)\n", get_name(), Q.num_rows, num_kernels); REQUIRE(Q.num_cols==num_kernels, "%s::compute_statistic_and_variance: " "Q number of columns (%d) does not match number of kernels (%d)\n", get_name(), Q.num_cols, num_kernels); /* initialise statistic and variance since they are cumulative */ statistic.zero(); Q.zero(); /* produce two kernel lists to iterate doubly nested */ CList* list_i=new CList(); CList* list_j=new CList(); for (index_t k_idx=0; k_idx<combined->get_num_kernels(); k_idx++) { CKernel* kernel = combined->get_kernel(k_idx); list_i->append_element(kernel); list_j->append_element(kernel); SG_UNREF(kernel); } /* needed for online mean and variance */ SGVector<index_t> term_counters_statistic(num_kernels); SGMatrix<index_t> term_counters_Q(num_kernels, num_kernels); term_counters_statistic.set_const(1); term_counters_Q.set_const(1); index_t num_examples_processed=0; while (num_examples_processed<m_4) { /* number of example to look at in this iteration */ index_t num_this_run=CMath::min(m_blocksize, CMath::max(0, m_4-num_examples_processed)); SG_DEBUG("processing %d more examples. %d so far processed. Blocksize " "is %d\n", num_this_run, num_examples_processed, m_blocksize); /* stream data from both distributions */ CFeatures* p1a=m_streaming_p->get_streamed_features(num_this_run); CFeatures* p1b=m_streaming_p->get_streamed_features(num_this_run); CFeatures* p2a=m_streaming_p->get_streamed_features(num_this_run); CFeatures* p2b=m_streaming_p->get_streamed_features(num_this_run); CFeatures* q1a=m_streaming_q->get_streamed_features(num_this_run); CFeatures* q1b=m_streaming_q->get_streamed_features(num_this_run); CFeatures* q2a=m_streaming_q->get_streamed_features(num_this_run); CFeatures* q2b=m_streaming_q->get_streamed_features(num_this_run); /* check whether h0 should be simulated and permute if so */ if (m_simulate_h0) { /* create merged copy of all feature instances to permute */ CList* list=new CList(); list->append_element(p1b); list->append_element(p2a); list->append_element(p2b); list->append_element(q1a); list->append_element(q1b); list->append_element(q2a); list->append_element(q2b); CFeatures* merged=p1a->create_merged_copy(list); SG_UNREF(list); /* permute */ SGVector<index_t> inds(merged->get_num_vectors()); inds.range_fill(); inds.permute(); merged->add_subset(inds); /* copy back, replacing old features */ SG_UNREF(p1a); SG_UNREF(p1b); SG_UNREF(p2a); SG_UNREF(p2b); SG_UNREF(q1a); SG_UNREF(q1b); SG_UNREF(q2a); SG_UNREF(q2b); SGVector<index_t> copy(num_this_run); copy.range_fill(); p1a=merged->copy_subset(copy); copy.add(num_this_run); p1b=merged->copy_subset(copy); copy.add(num_this_run); p2a=merged->copy_subset(copy); copy.add(num_this_run); p2b=merged->copy_subset(copy); copy.add(num_this_run); q1a=merged->copy_subset(copy); copy.add(num_this_run); q1b=merged->copy_subset(copy); copy.add(num_this_run); q2a=merged->copy_subset(copy); copy.add(num_this_run); q2b=merged->copy_subset(copy); /* clean up and note that copy_subset does a SG_REF */ SG_UNREF(merged); } else { /* reference the produced features (only if copy subset was not used) */ SG_REF(p1a); SG_REF(p1b); SG_REF(p2a); SG_REF(p2b); SG_REF(q1a); SG_REF(q1b); SG_REF(q2a); SG_REF(q2b); } /* now for each of these streamed data instances, iterate through all * kernels and update Q matrix while also computing MMD statistic */ /* preallocate some memory for faster processing */ SGVector<float64_t> pp(num_this_run); SGVector<float64_t> qq(num_this_run); SGVector<float64_t> pq(num_this_run); SGVector<float64_t> qp(num_this_run); SGVector<float64_t> h_i_a(num_this_run); SGVector<float64_t> h_i_b(num_this_run); SGVector<float64_t> h_j_a(num_this_run); SGVector<float64_t> h_j_b(num_this_run); /* iterate through Q matrix and update values, compute mmd */ CKernel* kernel_i=(CKernel*)list_i->get_first_element(); for (index_t i=0; i<num_kernels; ++i) { /* compute all necessary 8 h-vectors for this burst. * h_delta-terms for each kernel, expression 7 of NIPS paper * first kernel */ /* first kernel, a-part */ kernel_i->init(p1a, p2a); pp=kernel_i->get_kernel_diagonal(pp); kernel_i->init(q1a, q2a); qq=kernel_i->get_kernel_diagonal(qq); kernel_i->init(p1a, q2a); pq=kernel_i->get_kernel_diagonal(pq); kernel_i->init(q1a, p2a); qp=kernel_i->get_kernel_diagonal(qp); for (index_t it=0; it<num_this_run; ++it) h_i_a[it]=pp[it]+qq[it]-pq[it]-qp[it]; /* first kernel, b-part */ kernel_i->init(p1b, p2b); pp=kernel_i->get_kernel_diagonal(pp); kernel_i->init(q1b, q2b); qq=kernel_i->get_kernel_diagonal(qq); kernel_i->init(p1b, q2b); pq=kernel_i->get_kernel_diagonal(pq); kernel_i->init(q1b, p2b); qp=kernel_i->get_kernel_diagonal(qp); for (index_t it=0; it<num_this_run; ++it) h_i_b[it]=pp[it]+qq[it]-pq[it]-qp[it]; /* iterate through j, but use symmetry in order to save half of the * computations */ CKernel* kernel_j=(CKernel*)list_j->get_first_element(); for (index_t j=0; j<=i; ++j) { /* compute all necessary 8 h-vectors for this burst. * h_delta-terms for each kernel, expression 7 of NIPS paper * second kernel */ /* second kernel, a-part */ kernel_j->init(p1a, p2a); pp=kernel_j->get_kernel_diagonal(pp); kernel_j->init(q1a, q2a); qq=kernel_j->get_kernel_diagonal(qq); kernel_j->init(p1a, q2a); pq=kernel_j->get_kernel_diagonal(pq); kernel_j->init(q1a, p2a); qp=kernel_j->get_kernel_diagonal(qp); for (index_t it=0; it<num_this_run; ++it) h_j_a[it]=pp[it]+qq[it]-pq[it]-qp[it]; /* second kernel, b-part */ kernel_j->init(p1b, p2b); pp=kernel_j->get_kernel_diagonal(pp); kernel_j->init(q1b, q2b); qq=kernel_j->get_kernel_diagonal(qq); kernel_j->init(p1b, q2b); pq=kernel_j->get_kernel_diagonal(pq); kernel_j->init(q1b, p2b); qp=kernel_j->get_kernel_diagonal(qp); for (index_t it=0; it<num_this_run; ++it) h_j_b[it]=pp[it]+qq[it]-pq[it]-qp[it]; float64_t term; for (index_t it=0; it<num_this_run; ++it) { /* current term of expression 7 of NIPS paper */ term=(h_i_a[it]-h_i_b[it])*(h_j_a[it]-h_j_b[it]); /* update covariance element for the current burst. This is a * running average of the product of the h_delta terms of each * kernel */ Q(i, j)+=(term-Q(i, j))/term_counters_Q(i, j)++; } /* use symmetry */ Q(j, i)=Q(i, j); /* next kernel j */ kernel_j=(CKernel*)list_j->get_next_element(); } /* update MMD statistic online computation for kernel i, using * vectors that were computed above */ SGVector<float64_t> h(num_this_run*2); for (index_t it=0; it<num_this_run; ++it) { /* update statistic for kernel i (outer loop) and update using * all elements of the h_i_a, h_i_b vectors (iterate over it) */ statistic[i]=statistic[i]+ (h_i_a[it]-statistic[i])/term_counters_statistic[i]++; /* Make sure to use all data, i.e. part a and b */ statistic[i]=statistic[i]+ (h_i_b[it]-statistic[i])/(term_counters_statistic[i]++); } /* next kernel i */ kernel_i=(CKernel*)list_i->get_next_element(); } /* clean up streamed data */ SG_UNREF(p1a); SG_UNREF(p1b); SG_UNREF(p2a); SG_UNREF(p2b); SG_UNREF(q1a); SG_UNREF(q1b); SG_UNREF(q2a); SG_UNREF(q2b); /* add number of processed examples for this run */ num_examples_processed+=num_this_run; } /* clean up */ SG_UNREF(list_i); SG_UNREF(list_j); SG_DEBUG("Done compouting statistic, processed 4*%d examples.\n", num_examples_processed); SG_DEBUG("leaving %s::compute_statistic_and_Q()\n", get_name()) }