void vec_mult(DATA_TYPE* A, DATA_TYPE* B, DATA_TYPE* C) {
  int i;

  for (i=0; i<N; i++)
    C[i] = A[i] * B[i];

  init_again(A, B);

  for (i=0; i<N; i++)
    C[i] += A[i] * B[i];
}
void vec_mult(float *p, float *v1, float *v2, int N)
{
   int i;
   init(v1, v2, N);
   #pragma omp target data map(from: p[0:N])
   {
      #pragma omp target map(to: v1[:N], v2[:N])
      #pragma omp parallel for
      for (i=0; i<N; i++)
	p[i] = v1[i] * v2[i];
      init_again(v1, v2, N);
      #pragma omp target map(to: v1[:N], v2[:N])
      #pragma omp parallel for
      for (i=0; i<N; i++)
	p[i] = p[i] + (v1[i] * v2[i]);
   }
   output(p, N);
}
void vec_mult_OMP(DATA_TYPE* A, DATA_TYPE* B, DATA_TYPE* C) {
  int i;

  #pragma omp target data map(from: C[:N])
  {
    #pragma omp target map(to: A[:N], B[:N])
    #pragma omp parallel for
    for (i=0; i<N; i++)
      C[i] = A[i] * B[i];

    init_again(A, B);

    #pragma omp target map(to: A[:N], B[:N])
    #pragma omp parallel for
    for (i=0; i<N; i++)
      C[i] += A[i] * B[i];
  }
}