float64_t CCommWordStringKernel::compute_helper( int32_t idx_a, int32_t idx_b, bool do_sort) { int32_t alen, blen; bool free_av, free_bv; CStringFeatures<uint16_t>* l = (CStringFeatures<uint16_t>*) lhs; CStringFeatures<uint16_t>* r = (CStringFeatures<uint16_t>*) rhs; uint16_t* av=l->get_feature_vector(idx_a, alen, free_av); uint16_t* bv=r->get_feature_vector(idx_b, blen, free_bv); uint16_t* avec=av; uint16_t* bvec=bv; if (do_sort) { if (alen>0) { avec=SG_MALLOC(uint16_t, alen); memcpy(avec, av, sizeof(uint16_t)*alen); CMath::radix_sort(avec, alen); } else avec=NULL; if (blen>0) { bvec=SG_MALLOC(uint16_t, blen); memcpy(bvec, bv, sizeof(uint16_t)*blen); CMath::radix_sort(bvec, blen); } else bvec=NULL; } else { if ( (l->get_num_preprocessors() != l->get_num_preprocessed()) || (r->get_num_preprocessors() != r->get_num_preprocessed())) { SG_ERROR("not all preprocessors have been applied to training (%d/%d)" " or test (%d/%d) data\n", l->get_num_preprocessed(), l->get_num_preprocessors(), r->get_num_preprocessed(), r->get_num_preprocessors()); } } float64_t result=0; int32_t left_idx=0; int32_t right_idx=0; if (use_sign) { while (left_idx < alen && right_idx < blen) { if (avec[left_idx]==bvec[right_idx]) { uint16_t sym=avec[left_idx]; while (left_idx< alen && avec[left_idx]==sym) left_idx++; while (right_idx< blen && bvec[right_idx]==sym) right_idx++; result++; } else if (avec[left_idx]<bvec[right_idx]) left_idx++; else right_idx++; } } else { while (left_idx < alen && right_idx < blen) { if (avec[left_idx]==bvec[right_idx]) { int32_t old_left_idx=left_idx; int32_t old_right_idx=right_idx; uint16_t sym=avec[left_idx]; while (left_idx< alen && avec[left_idx]==sym) left_idx++; while (right_idx< blen && bvec[right_idx]==sym) right_idx++; result+=((float64_t) (left_idx-old_left_idx))* ((float64_t) (right_idx-old_right_idx)); } else if (avec[left_idx]<bvec[right_idx]) left_idx++; else right_idx++; } } if (do_sort) { SG_FREE(avec); SG_FREE(bvec); } l->free_feature_vector(av, idx_a, free_av); r->free_feature_vector(bv, idx_b, free_bv); return result; }
float64_t CWeightedCommWordStringKernel::compute_helper( int32_t idx_a, int32_t idx_b, bool do_sort) { int32_t alen, blen; bool free_avec, free_bvec; CStringFeatures<uint16_t>* l = (CStringFeatures<uint16_t>*) lhs; CStringFeatures<uint16_t>* r = (CStringFeatures<uint16_t>*) rhs; uint16_t* av=l->get_feature_vector(idx_a, alen, free_avec); uint16_t* bv=r->get_feature_vector(idx_b, blen, free_bvec); uint16_t* avec=av; uint16_t* bvec=bv; if (do_sort) { if (alen>0) { avec=new uint16_t[alen]; memcpy(avec, av, sizeof(uint16_t)*alen); CMath::radix_sort(avec, alen); } else avec=NULL; if (blen>0) { bvec=new uint16_t[blen]; memcpy(bvec, bv, sizeof(uint16_t)*blen); CMath::radix_sort(bvec, blen); } else bvec=NULL; } else { if ( (l->get_num_preprocessors() != l->get_num_preprocessed()) || (r->get_num_preprocessors() != r->get_num_preprocessed())) { SG_ERROR("not all preprocessors have been applied to training (%d/%d)" " or test (%d/%d) data\n", l->get_num_preprocessed(), l->get_num_preprocessors(), r->get_num_preprocessed(), r->get_num_preprocessors()); } } float64_t result=0; uint8_t mask=0; for (int32_t d=0; d<degree; d++) { mask = mask | (1 << (degree-d-1)); uint16_t masked=((CStringFeatures<uint16_t>*) lhs)->get_masked_symbols(0xffff, mask); int32_t left_idx=0; int32_t right_idx=0; float64_t weight=weights[d]*weights[d]; while (left_idx < alen && right_idx < blen) { uint16_t lsym=avec[left_idx] & masked; uint16_t rsym=bvec[right_idx] & masked; if (lsym == rsym) { int32_t old_left_idx=left_idx; int32_t old_right_idx=right_idx; while (left_idx<alen && (avec[left_idx] & masked) ==lsym) left_idx++; while (right_idx<blen && (bvec[right_idx] & masked) ==lsym) right_idx++; result+=weight*(left_idx-old_left_idx)*(right_idx-old_right_idx); } else if (lsym<rsym) left_idx++; else right_idx++; } } if (do_sort) { delete[] avec; delete[] bvec; } l->free_feature_vector(av, idx_a, free_avec); r->free_feature_vector(bv, idx_b, free_bvec); return result; }