void py_coulomb_set_hubbard_u(PyObject *self, void *p, double *U, int *error)
{
  particles_t *py_p;
  PyObject *py_U, *r;
  npy_intp dims[1];

  INIT_ERROR(error);

#ifdef DEBUG
  printf("[py_coulomb_set_Hubbard_U] %s %p %p\n",
	 PyString_AsString(PyObject_Str(self)), U, error);
#endif

  f_particles_get_tag(p, (void**) &py_p);
  assert(py_p->f90obj == p);

  dims[0] = f_particles_get_nel(py_p->f90obj);
  py_U = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE, U);

  r = PyObject_CallMethod(self, "set_Hubbard_U", "(OO)", py_p, py_U);

  Py_DECREF(py_U);
  PASS_PYTHON_ERROR(error, r);
  Py_DECREF(r);
}
void iterative_matrix_inverse(double *matptr, double *invmatptr, int n,
			      _Bool prev, double epsilon, double *work1,
			      double *work2, int *error,
			      cublasHandle_t cublas_handle, int *nit_out)
{
  INIT_ERROR(error);

  mat<double> matr(n, matptr, cublas_handle);
  mat<double> invmat(n, invmatptr, cublas_handle);
  /* Will allocate and release upon destruction if work1, work2 == NULL */
  mat<double> help1(n, work1, cublas_handle);
  mat<double> help2(n, work2, cublas_handle);

  /*
   * - Initialize inverse matrix if previous not used
   *   The starting invmat has to be small enough so that the iteration
   *   won't start running to infinity
   */

#if 0
  mat<double> dummy(n);
  dummy = matr;

  printf("dummy.data() = %p\n", dummy.data());
  printf("matr.data() = %p\n", matr.data());
  printf("dummy.on_host() = %i\n", dummy.on_host(error));
  PASS_ERROR(error);
  printf("matr.on_host() = %i\n", matr.on_host(error));
  PASS_ERROR(error);
  printf("sum = %f %f\n", dummy.sum(), matr.sum());
  printf("max = %f %f\n", dummy.max(), matr.max());
  printf("min = %f %f\n", dummy.min(), matr.min());
  printf("amax = %f %f\n", dummy.amax(), matr.amax());
  printf("amin = %f %f\n", dummy.amin(), matr.amin());
#endif

  if (!prev) {
    double smin, smax;
    ev_bounds(n, matptr, &smin, &smax, error);
    PASS_ERROR(error);
    mat_mul_sca(1.0/(n*MAX(fabs(smin), fabs(smax))), matr, invmat, error);
    PASS_ERROR(error);
  }

  /*
   * Find inverse via S^-1 = 2 S^1 - S^-1 S S^-1
   */

  double sigma = epsilon + 1.0;
  int i = 0;
  while (sigma > epsilon) {

    /*
     * help1 = matr.invmat
     */

    gemm(OP_N, OP_N, 1.0, matr, invmat, 0.0, help1, error);
    PASS_ERROR(error);

    help2 = invmat;

    /*
     * invmat = -help2.help1 + 2*invmat
     */

    gemm(OP_N, OP_N, -1.0, help2, help1, 2.0, invmat, error);
    PASS_ERROR(error);

    mat_mul_sca(1.0, help2, -1.0, invmat, help1, error);
    PASS_ERROR(error);

    sigma = help1.amax(error);
    PASS_ERROR(error);
    i = i+1;

    if (i % 100 == 0) {
      prscrlog("iterative_matrix_inverse: No convergence after %i iterations.",
	       i);
    }

  }

  if (nit_out) {
    *nit_out = i;
  }
}