nv_matrix_t *nv_matrix_list_get(const nv_matrix_t *parent, int list)
{
	nv_matrix_t *matrix = (nv_matrix_t *)malloc(sizeof(nv_matrix_t));
	matrix->list = 1;
	matrix->n = parent->n;
	matrix->m = parent->m;
	matrix->rows = parent->rows;
	matrix->cols = parent->cols;
	matrix->v = &NV_MAT_LIST_V(parent, list, 0, 0);
	matrix->step = parent->step;
	matrix->list_step = parent->list_step;
	matrix->alias = 1;

	return matrix;
}
Exemple #2
0
static void 
nv_lr_dw(const nv_lr_t *lr, float w,
	 nv_matrix_t *dw, int el,
	 const nv_matrix_t *data, int dj,
	 const nv_matrix_t *t, int tj,
	 const nv_matrix_t *y, int yj)
{
	int j;

	for (j = 0; j < lr->k; ++j) {
		int i;
		float y_t = NV_MAT_V(y, yj, j) - NV_MAT_V(t, tj, j);
		for (i = 0; i < lr->n; ++i) {
			NV_MAT_LIST_V(dw, el, j, i) += w * y_t * NV_MAT_V(data, dj, i);
		}
	}
}
Exemple #3
0
int 
nv_klr_em(nv_lr_t *lr,         // k
		  nv_matrix_t *count,  // k
		  nv_matrix_t *labels, // data->m
		  const nv_matrix_t *data,
		  const nv_lr_param_t param,
		  const int max_epoch)
{
	int j, l;
	int processing = 1, last_processing = 0;
	int converge, epoch;
	long t;
	int relabel_count;
	int empty_class;
	float relabel_per;
	int num_threads = nv_omp_procs();
	nv_matrix_t *old_labels = nv_matrix_alloc(1, data->m);
	nv_matrix_t *count_tmp = nv_matrix_list_alloc(1, lr->k, num_threads);

	NV_ASSERT(labels->m >= data->m);
	NV_ASSERT(count->m >= lr->k);

	nv_matrix_copy(old_labels, 0, labels, 0, old_labels->m);

	epoch = 0;
	do {
		if (last_processing) {
			processing = 0;
		}
		t = nv_clock();
		nv_matrix_zero(count);
		nv_matrix_zero(count_tmp);
#ifdef _OPENMP
#pragma omp parallel for num_threads(num_threads)
#endif
		for (j = 0; j < data->m; ++j) {
			int label = nv_lr_predict_label(lr, data, j);
			int thread_idx = nv_omp_thread_id();

			NV_ASSERT(label < lr->k);

			NV_MAT_V(labels, j, 0) = (float)label;
			NV_MAT_LIST_V(count_tmp, thread_idx, label, 0) += 1.0f;
		}

		for (l = 0; l < num_threads; ++l) {
			for (j = 0; j < count->m; ++j) {
				NV_MAT_V(count, j, 0) += NV_MAT_LIST_V(count_tmp, l, j, 0);
			}
		}
		++epoch;

		/* 終了判定 */
		relabel_count = 0;
		for (j = 0; j < data->m; ++j) {
			if (NV_MAT_V(labels, j, 0) != NV_MAT_V(old_labels, j, 0)) {
				++relabel_count;
			}
		}
		empty_class = 0;
		for (j = 0; j < lr->k; ++j) {
			empty_class += (NV_MAT_V(count, j, 0) > 0.0f ? 0:1);
		}
		relabel_per = (float)relabel_count / data->m;
		if (epoch > 1) {
			converge = (relabel_per < 0.001f) ? 1:0;
		} else {
			converge =0;
		}

		if (nv_klr_progress_flag) {
			printf("nv_klr: %d: relabel: %f, empty_class: %d, %ldms\n",
			epoch, relabel_per, empty_class, nv_clock() -t);
			fflush(stdout);
		}
		t = nv_clock();

		if (converge) {
			/* 終了 */ 
			if (nv_klr_progress_flag) {
				printf("nv_klr: %d: finish:\n", epoch);
				fflush(stdout);				
			}
			processing = 0;
		} else {
			/* ラベル更新 */ 
			nv_matrix_copy(old_labels, 0, labels, 0, old_labels->m);

			/* LR再計算 */ 
			nv_lr_train(lr, data, labels, param);

			/* 最大試行回数判定 */ 
			if (max_epoch != 0
				&& epoch >= max_epoch)
			{
				/* 終了 */
				processing = 0;
			}
			if (nv_klr_progress_flag) {
				printf("nv_klr: %d: train: %ldms\n", epoch, nv_clock() -t);
				fflush(stdout);				
			}
		}
	} while (processing);

	nv_matrix_free(&old_labels);
	nv_matrix_free(&count_tmp);

	return converge;
}
Exemple #4
0
void 
nv_lr_train(nv_lr_t *lr,
			const nv_matrix_t *data, const nv_matrix_t *label,
			nv_lr_param_t param)
{
	int m, n, i, j, k, l;
	long tm, tm_all = nv_clock();
	float oe = FLT_MAX, er = 1.0f, we;
	float sum_e = 0.0f;
	int epoch = 0;
	int pn = (data->m > 256) ? 128:1;
	int step = data->m / (pn);
	int threads = nv_omp_procs();
	nv_matrix_t *y = nv_matrix_alloc(lr->k, threads);
	nv_matrix_t *t = nv_matrix_alloc(lr->k, threads);
	nv_matrix_t *dw = nv_matrix_list_alloc(lr->n, lr->k, threads);
	nv_matrix_t *count = nv_matrix_alloc(lr->k, 1);
	nv_matrix_t *label_weight = nv_matrix_alloc(lr->k, 1);
	float count_max_log;
	
	nv_matrix_zero(count);
	nv_matrix_fill(label_weight, 1.0f);
	if (param.auto_balance) {
		/* クラスごとに数が違う場合に更新重みをスケーリングする */
		for (m = 0; m < data->m; ++m) {
			NV_MAT_V(count, 0, (int)NV_MAT_V(label, m, 0)) += 1.0f;
		}
		count_max_log = logf(3.0f + NV_MAT_V(count, 0, nv_vector_max_n(count, 0)));
		for (n = 0; n < count->n; ++n) {
			if (NV_MAT_V(count, 0, n) > 0.0f) {
				float count_log = logf(3.0f + NV_MAT_V(count, 0, n));
				NV_MAT_V(label_weight, 0, n) = 
					powf(count_max_log, NV_LR_CLASS_COUNT_PENALTY_EXP) 
					/ powf(count_log, NV_LR_CLASS_COUNT_PENALTY_EXP);
			} else {
				NV_MAT_V(label_weight, 0, n) = 1.0f;
			}
		}
	}
	do {
		we = 1.0f / er;
		tm = nv_clock();
		sum_e = 0.0f;

		for (m = 0; m < step; ++m) {
			nv_matrix_zero(dw);

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 4) reduction(+:sum_e) num_threads(threads) 
#endif
			for (i = 0; i < pn; ++i) {
				int rand_m = NV_ROUND_INT((data->m - 1) * nv_rand());
				int thread_num = nv_omp_thread_id();
				int label_i = (int)NV_MAT_V(label, rand_m, 0);
				float weight = NV_MAT_V(label_weight, 0, label_i);
				float yp;

				nv_vector_zero(t, thread_num);
				NV_MAT_V(t, thread_num, label_i) = 1.0f;
				nv_lr_predict_vector(lr, y, thread_num, data, rand_m);
				yp = NV_MAT_V(y, thread_num, (int)NV_MAT_V(label, rand_m, 0));
				
				if (yp < 1.0 - NV_LR_MARGIN) {
					nv_lr_dw(lr, weight, dw, thread_num, data, rand_m, t, thread_num, y, thread_num);
					sum_e += nv_lr_error(t, thread_num, y, thread_num);
				}
			}

			for (l = 1; l < threads; ++l) {
				for (j = 0; j < dw->m; ++j) {
					for (i = 0; i < dw->n; ++i) {
						NV_MAT_LIST_V(dw, 0, j, i) += NV_MAT_LIST_V(dw, l, j, i);
					}
				}
			}
#ifdef _OPENMP
#pragma omp parallel for private(n)  num_threads(threads) if (lr->k > 32)
#endif
			for (k = 0; k < lr->k; ++k) {
				switch (param.reg_type) {
                case NV_LR_REG_NONE:
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n);
					}
					break;
				case NV_LR_REG_L1:
					// FOBOS L1
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * param.grad_w * NV_MAT_LIST_V(dw, 0, k, n);
					}
					for (n = 0; n < lr->n; ++n) {
						float w_i = NV_MAT_V(lr->w, k, n);
						float lambda = we * param.reg_w * (1.0f / (1.0f + epoch));
						NV_MAT_V(lr->w, k, n) = nv_sign(w_i) * NV_MAX(0.0f, (fabsf(w_i) - lambda));
					}
					break;
				case NV_LR_REG_L2:
					for (n = 0; n < lr->n; ++n) {
						NV_MAT_V(lr->w, k, n) -= 
							we * (param.grad_w * (NV_MAT_LIST_V(dw, 0, k, n)
												  + param.reg_w * NV_MAT_V(lr->w, k, n)));
					}
					break;
				}
			}
		}
		if (nv_lr_progress_flag) {
			printf("nv_lr:%d: E: %E, %ldms\n",
				epoch, sum_e / (pn * step), nv_clock() - tm);
		}
		if (nv_lr_progress_flag > 1) {
			int *ok = nv_alloc_type(int, lr->k);
			int *ng = nv_alloc_type(int, lr->k);

			memset(ok, 0, sizeof(int) * lr->k);
			memset(ng, 0, sizeof(int) * lr->k);
			for (i = 0; i < data->m; ++i) {
				int predict = nv_lr_predict_label(lr, data, i);
				int teach = (int)NV_MAT_V(label, i, 0);
				if (predict == teach) {
					++ok[teach];
				} else {
					++ng[teach];
				}
			}
			for (i = 0; i < lr->k; ++i) {
				printf("%d: ok: %d, ng: %d, %f\n", i, ok[i], ng[i], (float)ok[i] / (float)(ok[i] + ng[i]));
			}
			nv_free(ok);
			nv_free(ng);
		}
		if (nv_lr_progress_flag) {
			fflush(stdout);
		}

		if (sum_e > oe) {
			er += 1.0f;
		}
		if (er >= 20.0f) {
			break;
		}
		
		if (sum_e < FLT_EPSILON) {
			break;
		}
		oe = sum_e;
	} while (param.max_epoch > ++epoch);