/*---------------------------------------------------------------------------------- sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following: new_a = sum(data_X(:,find(new_cut ~=0 )),2); new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a]; sparse_A(:,nSel+1) = new_a; ---------------------------------------------------------------------------------*/ void* CWDSVMOcas::add_new_cut_helper( void* ptr) { wdocas_thread_params_add* p = (wdocas_thread_params_add*) ptr; CWDSVMOcas* o = p->wdocas; int32_t start = p->start; int32_t end = p->end; int32_t string_length = o->string_length; //uint32_t nDim=(uint32_t) o->w_dim; uint32_t cut_length=p->cut_length; uint32_t* new_cut=p->new_cut; int32_t* w_offsets = o->w_offsets; float64_t* y = o->lab; int32_t alphabet_size = o->alphabet_size; float32_t* wd_weights = o->wd_weights; int32_t degree = o->degree; CStringFeatures<uint8_t>* f = o->features; float64_t normalization_const = o->normalization_const; // temporary vector float32_t* new_a = p->new_a; //float32_t* new_a = SG_MALLOC(float32_t, nDim); //memset(new_a, 0, sizeof(float32_t)*nDim); int32_t* val=SG_MALLOC(int32_t, cut_length); for (int32_t j=start; j<end; j++) { int32_t offs=o->w_dim_single_char*j; memset(val,0,sizeof(int32_t)*cut_length); int32_t lim=CMath::min(degree, string_length-j); int32_t len; for (int32_t k=0; k<lim; k++) { bool free_vec; uint8_t* vec = f->get_feature_vector(j+k, len, free_vec); float32_t wd = wd_weights[k]/normalization_const; for(uint32_t i=0; i < cut_length; i++) { val[i]=val[i]*alphabet_size + vec[new_cut[i]]; new_a[offs+val[i]]+=wd * y[new_cut[i]]; } offs+=w_offsets[k]; f->free_feature_vector(vec, j+k, free_vec); } } //p->new_a=new_a; SG_FREE(val); return NULL; }
float64_t CCommWordStringKernel::compute_diag(int32_t idx_a) { int32_t alen; CStringFeatures<uint16_t>* l = (CStringFeatures<uint16_t>*) lhs; CStringFeatures<uint16_t>* r = (CStringFeatures<uint16_t>*) rhs; bool free_av; uint16_t* av=l->get_feature_vector(idx_a, alen, free_av); float64_t result=0.0 ; ASSERT(l==r) ASSERT(sizeof(uint16_t)<=sizeof(float64_t)) ASSERT((1<<(sizeof(uint16_t)*8)) > alen) int32_t num_symbols=(int32_t) l->get_num_symbols(); ASSERT(num_symbols<=dictionary_size) int32_t* dic = dict_diagonal_optimization; memset(dic, 0, num_symbols*sizeof(int32_t)); for (int32_t i=0; i<alen; i++) dic[av[i]]++; if (use_sign) { for (int32_t i=0; i<(int32_t) l->get_num_symbols(); i++) { if (dic[i]!=0) result++; } } else { for (int32_t i=0; i<num_symbols; i++) { if (dic[i]!=0) result+=dic[i]*dic[i]; } } l->free_feature_vector(av, idx_a, free_av); return result; }
float64_t CCommWordStringKernel::compute_helper( int32_t idx_a, int32_t idx_b, bool do_sort) { int32_t alen, blen; bool free_av, free_bv; CStringFeatures<uint16_t>* l = (CStringFeatures<uint16_t>*) lhs; CStringFeatures<uint16_t>* r = (CStringFeatures<uint16_t>*) rhs; uint16_t* av=l->get_feature_vector(idx_a, alen, free_av); uint16_t* bv=r->get_feature_vector(idx_b, blen, free_bv); uint16_t* avec=av; uint16_t* bvec=bv; if (do_sort) { if (alen>0) { avec=SG_MALLOC(uint16_t, alen); memcpy(avec, av, sizeof(uint16_t)*alen); CMath::radix_sort(avec, alen); } else avec=NULL; if (blen>0) { bvec=SG_MALLOC(uint16_t, blen); memcpy(bvec, bv, sizeof(uint16_t)*blen); CMath::radix_sort(bvec, blen); } else bvec=NULL; } else { if ( (l->get_num_preprocessors() != l->get_num_preprocessed()) || (r->get_num_preprocessors() != r->get_num_preprocessed())) { SG_ERROR("not all preprocessors have been applied to training (%d/%d)" " or test (%d/%d) data\n", l->get_num_preprocessed(), l->get_num_preprocessors(), r->get_num_preprocessed(), r->get_num_preprocessors()); } } float64_t result=0; int32_t left_idx=0; int32_t right_idx=0; if (use_sign) { while (left_idx < alen && right_idx < blen) { if (avec[left_idx]==bvec[right_idx]) { uint16_t sym=avec[left_idx]; while (left_idx< alen && avec[left_idx]==sym) left_idx++; while (right_idx< blen && bvec[right_idx]==sym) right_idx++; result++; } else if (avec[left_idx]<bvec[right_idx]) left_idx++; else right_idx++; } } else { while (left_idx < alen && right_idx < blen) { if (avec[left_idx]==bvec[right_idx]) { int32_t old_left_idx=left_idx; int32_t old_right_idx=right_idx; uint16_t sym=avec[left_idx]; while (left_idx< alen && avec[left_idx]==sym) left_idx++; while (right_idx< blen && bvec[right_idx]==sym) right_idx++; result+=((float64_t) (left_idx-old_left_idx))* ((float64_t) (right_idx-old_right_idx)); } else if (avec[left_idx]<bvec[right_idx]) left_idx++; else right_idx++; } } if (do_sort) { SG_FREE(avec); SG_FREE(bvec); } l->free_feature_vector(av, idx_a, free_av); r->free_feature_vector(bv, idx_b, free_bv); return result; }
float64_t CWeightedCommWordStringKernel::compute_helper( int32_t idx_a, int32_t idx_b, bool do_sort) { int32_t alen, blen; bool free_avec, free_bvec; CStringFeatures<uint16_t>* l = (CStringFeatures<uint16_t>*) lhs; CStringFeatures<uint16_t>* r = (CStringFeatures<uint16_t>*) rhs; uint16_t* av=l->get_feature_vector(idx_a, alen, free_avec); uint16_t* bv=r->get_feature_vector(idx_b, blen, free_bvec); uint16_t* avec=av; uint16_t* bvec=bv; if (do_sort) { if (alen>0) { avec=new uint16_t[alen]; memcpy(avec, av, sizeof(uint16_t)*alen); CMath::radix_sort(avec, alen); } else avec=NULL; if (blen>0) { bvec=new uint16_t[blen]; memcpy(bvec, bv, sizeof(uint16_t)*blen); CMath::radix_sort(bvec, blen); } else bvec=NULL; } else { if ( (l->get_num_preprocessors() != l->get_num_preprocessed()) || (r->get_num_preprocessors() != r->get_num_preprocessed())) { SG_ERROR("not all preprocessors have been applied to training (%d/%d)" " or test (%d/%d) data\n", l->get_num_preprocessed(), l->get_num_preprocessors(), r->get_num_preprocessed(), r->get_num_preprocessors()); } } float64_t result=0; uint8_t mask=0; for (int32_t d=0; d<degree; d++) { mask = mask | (1 << (degree-d-1)); uint16_t masked=((CStringFeatures<uint16_t>*) lhs)->get_masked_symbols(0xffff, mask); int32_t left_idx=0; int32_t right_idx=0; float64_t weight=weights[d]*weights[d]; while (left_idx < alen && right_idx < blen) { uint16_t lsym=avec[left_idx] & masked; uint16_t rsym=bvec[right_idx] & masked; if (lsym == rsym) { int32_t old_left_idx=left_idx; int32_t old_right_idx=right_idx; while (left_idx<alen && (avec[left_idx] & masked) ==lsym) left_idx++; while (right_idx<blen && (bvec[right_idx] & masked) ==lsym) right_idx++; result+=weight*(left_idx-old_left_idx)*(right_idx-old_right_idx); } else if (lsym<rsym) left_idx++; else right_idx++; } } if (do_sort) { delete[] avec; delete[] bvec; } l->free_feature_vector(av, idx_a, free_avec); r->free_feature_vector(bv, idx_b, free_bvec); return result; }