template <class ST> void CStringFileFeatures<ST>::fetch_meta_info_from_file(int32_t granularity) { CStringFileFeatures<ST>::cleanup(); uint64_t file_size=file->get_size(); ASSERT(granularity>=1); ASSERT(CStringFeatures<ST>::alphabet); int64_t buffer_size=granularity; CStringFeatures<ST>::features=SG_MALLOC(SGString<ST>, buffer_size); uint64_t offs=0; uint64_t len=0; CStringFeatures<ST>::max_string_length=0; CStringFeatures<ST>::num_vectors=0; while (true) { ST* line=get_line(len, offs, CStringFeatures<ST>::num_vectors, file_size); if (line) { if (CStringFeatures<ST>::num_vectors > buffer_size) { CStringFeatures<ST>::features = SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, buffer_size+granularity); buffer_size+=granularity; } CStringFeatures<ST>::features[CStringFeatures<ST>::num_vectors-1].string=line; CStringFeatures<ST>::features[CStringFeatures<ST>::num_vectors-1].slen=len; CStringFeatures<ST>::max_string_length=CMath::max(CStringFeatures<ST>::max_string_length, (int32_t) len); } else break; } CStringFeatures<ST>::SG_INFO("number of strings:%d\n", CStringFeatures<ST>::num_vectors); CStringFeatures<ST>::SG_INFO("maximum string length:%d\n", CStringFeatures<ST>::max_string_length); CStringFeatures<ST>::SG_INFO("max_value_in_histogram:%d\n", CStringFeatures<ST>::alphabet->get_max_value_in_histogram()); CStringFeatures<ST>::SG_INFO("num_symbols_in_histogram:%d\n", CStringFeatures<ST>::alphabet->get_num_symbols_in_histogram()); if (!CStringFeatures<ST>::alphabet->check_alphabet_size() || !CStringFeatures<ST>::alphabet->check_alphabet()) CStringFileFeatures<ST>::cleanup(); CStringFeatures<ST>::features=SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, CStringFeatures<ST>::num_vectors); }
void CStreamingSparseFeatures<T>::expand_if_required(float64_t*& vec, int32_t &len) { int32_t dim = get_dim_feature_space(); if (dim > len) { vec = SG_REALLOC(float64_t, vec, len, dim); memset(&vec[len], 0, (dim-len) * sizeof(float64_t)); len = dim; } }
void CSparseMatrixOperator<T>::set_diagonal(SGVector<T> diag) { REQUIRE(m_operator.sparse_matrix, "Operator not initialized!\n"); REQUIRE(diag.vector, "Diagonal not initialized!\n"); const int32_t diag_size=m_operator.num_vectors>m_operator.num_features ? m_operator.num_features : m_operator.num_vectors; REQUIRE(diag_size==diag.vlen, "Dimension mismatch!\n"); bool need_sorting=false; for (index_t i=0; i<diag_size; ++i) { SGSparseVectorEntry<T>* current_row=m_operator[i].features; bool inserted=false; // we just change the entry if the diagonal element for this row exists for (index_t j=0; j<m_operator[i].num_feat_entries; ++j) { if (i==current_row[j].feat_index) { current_row[j].entry=diag[i]; inserted=true; break; } } // we create a new entry if the diagonal element for this row doesn't exist if (!inserted) { index_t j=m_operator[i].num_feat_entries; m_operator[i].num_feat_entries=j+1; m_operator[i].features=SG_REALLOC(SGSparseVectorEntry<T>, m_operator[i].features, j, j+1); m_operator[i].features[j].feat_index=i; m_operator[i].features[j].entry=diag[i]; need_sorting=true; } } if (need_sorting) m_operator.sort_features(); }