bool CFeatureTreeLeastSquaresRegression::train_machine(CFeatures* data) { if (data && (CDotFeatures*)data) set_features((CDotFeatures*)data); ASSERT(features); ASSERT(m_labels); int32_t n_vectors = features->get_num_vectors(); float64_t* y = SG_MALLOC(float64_t, n_vectors); for (int32_t i=0; i<n_vectors; i++) y[i] = ((CRegressionLabels*)m_labels)->get_label(i); slep_options options; options.general = false; options.termination = m_termination; options.tolerance = m_tolerance; options.max_iter = m_max_iter; options.restart_num = 10000; options.n_nodes = 1; options.regularization = 0; SGVector<float64_t> ind = m_feature_tree->get_ind(); options.ind = ind.vector; options.G = NULL; options.initial_w = NULL; w = slep_tree_lsr(features,y,m_z,options); SG_FREE(y); return true; }
template<class T> SGNDArray<T>::SGNDArray(const SGVector<index_t> dimensions, bool ref_counting) : SGReferencedData(ref_counting) { num_dims = dimensions.size(); dims = SG_MALLOC(index_t, num_dims); len_array = 1; for (int32_t i=0; i<num_dims; i++) { dims[i] = dimensions[i]; len_array *= dims[i]; } REQUIRE(len_array>0, "Length of array (%d) must be greater than 0\n", len_array); array = SG_MALLOC(T, len_array); }
SGMatrixList<T>::SGMatrixList(int32_t nmats, bool ref_counting) : SGReferencedData(ref_counting), num_matrices(nmats) { matrix_list = SG_MALLOC(SGMatrix<T>, nmats); // Call to SGMatrix default constructor in-place for ( int32_t i = 0 ; i < nmats ; ++i ) new (&matrix_list[i]) SGMatrix<T>(); }
T* SGMatrix<T>::clone_matrix(const T* matrix, int32_t nrows, int32_t ncols) { T* result = SG_MALLOC(T, int64_t(nrows)*ncols); for (int64_t i=0; i<int64_t(nrows)*ncols; i++) result[i]=matrix[i]; return result; }
void SGMatrix<T>::inverse(SGMatrix<float64_t> matrix) { ASSERT(matrix.num_cols==matrix.num_rows); int32_t* ipiv = SG_MALLOC(int32_t, matrix.num_cols); clapack_dgetrf(CblasColMajor,matrix.num_cols,matrix.num_cols,matrix.matrix,matrix.num_cols,ipiv); clapack_dgetri(CblasColMajor,matrix.num_cols,matrix.matrix,matrix.num_cols,ipiv); SG_FREE(ipiv); }
template<class T> SGNDArray<T>::SGNDArray(index_t* d, index_t nd, bool ref_counting) : SGReferencedData(ref_counting), dims(d), num_dims(nd) { len_array = 1; for (int32_t i=0; i<num_dims; i++) len_array *= dims[i]; REQUIRE(len_array>0, "Length of array (%d) must be greater than 0\n", len_array); array = SG_MALLOC(T, len_array); }
template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index) { if (vector_index>=get_num_vectors()) { SG_ERROR("Index out of bounds (number of vectors %d, you " "requested %d)\n", get_num_vectors(), vector_index); } dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1); iterator->vec = get_feature_vector(vector_index, iterator->vlen, iterator->vfree); iterator->vidx = vector_index; iterator->index = 0; return iterator; }
void SGMatrix<T>::transpose_matrix( T*& matrix, int32_t& num_feat, int32_t& num_vec) { /* this should be done in-place! Heiko */ T* transposed=SG_MALLOC(T, num_vec*num_feat); for (int32_t i=0; i<num_vec; i++) { for (int32_t j=0; j<num_feat; j++) transposed[i+j*num_vec]=matrix[i*num_feat+j]; } SG_FREE(matrix); matrix=transposed; CMath::swap(num_feat, num_vec); }
ST* CDecompressString<ST>::apply_to_string(ST* f, int32_t &len) { uint64_t compressed_size=((int32_t*) f)[0]; uint64_t uncompressed_size=((int32_t*) f)[1]; int32_t offs=CMath::ceil(2.0*sizeof(int32_t)/sizeof(ST)); ASSERT(uint64_t(len)==uint64_t(offs)+compressed_size); len=uncompressed_size; uncompressed_size*=sizeof(ST); ST* vec=SG_MALLOC(ST, len); compressor->decompress((uint8_t*) (&f[offs]), compressed_size, (uint8_t*) vec, uncompressed_size); ASSERT(uncompressed_size==((uint64_t) len)*sizeof(ST)); return vec; }
template <class ST> void CStringFileFeatures<ST>::fetch_meta_info_from_file(int32_t granularity) { CStringFileFeatures<ST>::cleanup(); uint64_t file_size=file->get_size(); ASSERT(granularity>=1); ASSERT(CStringFeatures<ST>::alphabet); int64_t buffer_size=granularity; CStringFeatures<ST>::features=SG_MALLOC(SGString<ST>, buffer_size); uint64_t offs=0; uint64_t len=0; CStringFeatures<ST>::max_string_length=0; CStringFeatures<ST>::num_vectors=0; while (true) { ST* line=get_line(len, offs, CStringFeatures<ST>::num_vectors, file_size); if (line) { if (CStringFeatures<ST>::num_vectors > buffer_size) { CStringFeatures<ST>::features = SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, buffer_size+granularity); buffer_size+=granularity; } CStringFeatures<ST>::features[CStringFeatures<ST>::num_vectors-1].string=line; CStringFeatures<ST>::features[CStringFeatures<ST>::num_vectors-1].slen=len; CStringFeatures<ST>::max_string_length=CMath::max(CStringFeatures<ST>::max_string_length, (int32_t) len); } else break; } CStringFeatures<ST>::SG_INFO("number of strings:%d\n", CStringFeatures<ST>::num_vectors); CStringFeatures<ST>::SG_INFO("maximum string length:%d\n", CStringFeatures<ST>::max_string_length); CStringFeatures<ST>::SG_INFO("max_value_in_histogram:%d\n", CStringFeatures<ST>::alphabet->get_max_value_in_histogram()); CStringFeatures<ST>::SG_INFO("num_symbols_in_histogram:%d\n", CStringFeatures<ST>::alphabet->get_num_symbols_in_histogram()); if (!CStringFeatures<ST>::alphabet->check_alphabet_size() || !CStringFeatures<ST>::alphabet->check_alphabet()) CStringFileFeatures<ST>::cleanup(); CStringFeatures<ST>::features=SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, CStringFeatures<ST>::num_vectors); }
template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec) { num_feat = get_num_vectors(); num_vec = num_features; int32_t old_num_vec=get_num_vectors(); ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec); for (int32_t i=0; i<old_num_vec; i++) { SGVector<ST> vec=get_feature_vector(i); for (int32_t j=0; j<vec.vlen; j++) fm[j*int64_t(old_num_vec)+i]=vec.vector[j]; free_feature_vector(vec, i); } return fm; }
float64_t* SGMatrix<T>::pinv( float64_t* matrix, int32_t rows, int32_t cols, float64_t* target) { if (!target) target=SG_MALLOC(float64_t, rows*cols); char jobu='A'; char jobvt='A'; int m=rows; /* for calling external lib */ int n=cols; /* for calling external lib */ int lda=m; /* for calling external lib */ int ldu=m; /* for calling external lib */ int ldvt=n; /* for calling external lib */ int info=-1; /* for calling external lib */ int32_t lsize=CMath::min((int32_t) m, (int32_t) n); double* s=SG_MALLOC(double, lsize); double* u=SG_MALLOC(double, m*m); double* vt=SG_MALLOC(double, n*n); wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info); ASSERT(info==0); for (int32_t i=0; i<n; i++) { for (int32_t j=0; j<lsize; j++) vt[i*n+j]=vt[i*n+j]/s[j]; } cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m); SG_FREE(u); SG_FREE(vt); SG_FREE(s); return target; }
SGSparseMatrix<T>::SGSparseMatrix(index_t num_feat, index_t num_vec, bool ref_counting) : SGReferencedData(ref_counting), num_vectors(num_vec), num_features(num_feat) { sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors); }
SGMatrix<T>::SGMatrix(index_t nrows, index_t ncols, bool ref_counting) : SGReferencedData(ref_counting), num_rows(nrows), num_cols(ncols) { matrix=SG_MALLOC(T, ((int64_t) nrows)*ncols); }
SGVector<T>::SGVector(index_t len, bool ref_counting) : SGReferencedData(ref_counting), vlen(len), gpu_ptr(NULL) { vector=SG_MALLOC(T, len); m_on_gpu.store(false, std::memory_order_release); }
SGVector<float64_t> CKernelMeanMatching::compute_weights() { int32_t i,j; ASSERT(m_kernel) ASSERT(m_training_indices.vlen) ASSERT(m_test_indices.vlen) int32_t n_tr = m_training_indices.vlen; int32_t n_te = m_test_indices.vlen; SGVector<float64_t> weights(n_tr); weights.zero(); kmm_K = SG_MALLOC(float64_t, n_tr*n_tr); kmm_K_ld = n_tr; float64_t* diag_K = SG_MALLOC(float64_t, n_tr); for (i=0; i<n_tr; i++) { float64_t d = m_kernel->kernel(m_training_indices[i], m_training_indices[i]); diag_K[i] = d; kmm_K[i*n_tr+i] = d; for (j=i+1; j<n_tr; j++) { d = m_kernel->kernel(m_training_indices[i],m_training_indices[j]); kmm_K[i*n_tr+j] = d; kmm_K[j*n_tr+i] = d; } } float64_t* kappa = SG_MALLOC(float64_t, n_tr); for (i=0; i<n_tr; i++) { float64_t avg = 0.0; for (j=0; j<n_te; j++) avg+= m_kernel->kernel(m_training_indices[i],m_test_indices[j]); avg *= float64_t(n_tr)/n_te; kappa[i] = -avg; } float64_t* a = SG_MALLOC(float64_t, n_tr); for (i=0; i<n_tr; i++) a[i] = 1.0; float64_t* LB = SG_MALLOC(float64_t, n_tr); float64_t* UB = SG_MALLOC(float64_t, n_tr); float64_t B = 2.0; for (i=0; i<n_tr; i++) { LB[i] = 0.0; UB[i] = B; } for (i=0; i<n_tr; i++) weights[i] = 1.0/float64_t(n_tr); libqp_state_T result = libqp_gsmo_solver(&kmm_get_col,diag_K,kappa,a,1.0,LB,UB,weights,n_tr,1000,1e-9,NULL); SG_DEBUG("libqp exitflag=%d, %d iterations passed, primal objective=%f\n", result.exitflag,result.nIter,result.QP); SG_FREE(kappa); SG_FREE(a); SG_FREE(LB); SG_FREE(UB); SG_FREE(diag_K); SG_FREE(kmm_K); return weights; }
SGStringList<T>::SGStringList(index_t num_s, index_t max_length, bool ref_counting) : SGReferencedData(ref_counting), num_strings(num_s), max_string_length(max_length) { strings=SG_MALLOC(SGString<T>, num_strings); }
template<class T> CRegressionLabels* SGSparseMatrix<T>::load_svmlight_file(char* fname, bool do_sort_features) { CRegressionLabels* lab=NULL; size_t blocksize=1024*1024; size_t required_blocksize=blocksize; uint8_t* dummy=SG_MALLOC(uint8_t, blocksize); FILE* f=fopen(fname, "ro"); if (f) { free_data(); SG_SINFO("counting line numbers in file %s\n", fname) size_t sz=blocksize; size_t block_offs=0; size_t old_block_offs=0; fseek(f, 0, SEEK_END); size_t fsize=ftell(f); rewind(f); while (sz == blocksize) { sz=fread(dummy, sizeof(uint8_t), blocksize, f); for (size_t i=0; i<sz; i++) { block_offs++; if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) { num_vectors++; required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1); old_block_offs=block_offs; } } SG_SPROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t") } SG_SINFO("found %d feature vectors\n", num_vectors) SG_FREE(dummy); blocksize=required_blocksize; dummy = SG_MALLOC(uint8_t, blocksize+1); //allow setting of '\0' at EOL lab=new CRegressionLabels(num_vectors); sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors); rewind(f); sz=blocksize; int32_t lines=0; while (sz == blocksize) { sz=fread(dummy, sizeof(uint8_t), blocksize, f); size_t old_sz=0; for (size_t i=0; i<sz; i++) { if (i==sz-1 && dummy[i]!='\n' && sz==blocksize) { size_t len=i-old_sz+1; uint8_t* data=&dummy[old_sz]; for (size_t j=0; j<len; j++) dummy[j]=data[j]; sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, f); i=0; old_sz=0; sz+=len; } if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) { size_t len=i-old_sz; uint8_t* data=&dummy[old_sz]; int32_t dims=0; for (size_t j=0; j<len; j++) { if (data[j]==':') dims++; } if (dims<=0) { SG_SERROR("Error in line %d - number of" " dimensions is %d line is %d characters" " long\n line_content:'%.*s'\n", lines, dims, len, len, (const char*) data); } SGSparseVectorEntry<T>* feat=SG_MALLOC(SGSparseVectorEntry<T>, dims); size_t j=0; for (; j<len; j++) { if (data[j]==' ') { data[j]='\0'; lab->set_label(lines, atof((const char*) data)); break; } } int32_t d=0; j++; uint8_t* start=&data[j]; for (; j<len; j++) { if (data[j]==':') { data[j]='\0'; feat[d].feat_index=(int32_t) atoi((const char*) start)-1; num_features=CMath::max(num_features, feat[d].feat_index+1); j++; start=&data[j]; for (; j<len; j++) { if (data[j]==' ' || data[j]=='\n') { data[j]='\0'; feat[d].entry=(T) atof((const char*) start); d++; break; } } if (j==len) { data[j]='\0'; feat[dims-1].entry=(T) atof((const char*) start); } j++; start=&data[j]; } } sparse_matrix[lines].num_feat_entries=dims; sparse_matrix[lines].features=feat; old_sz=i+1; lines++; SG_SPROGRESS(lines, 0, num_vectors, 1, "LOADING:\t") } } } SG_SINFO("file successfully read\n") fclose(f); }
T* SGVector<T>::clone_vector(const T* vec, int32_t len) { T* result = SG_MALLOC(T, len); memcpy(result, vec, sizeof(T)*len); return result; }
SGVector<T>::SGVector(index_t len, bool ref_counting) : SGReferencedData(ref_counting), vlen(len) { vector=SG_MALLOC(T, len); }